glide-mq 0.11.1 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/CHANGELOG.md +105 -1
  2. package/README.md +162 -178
  3. package/dist/base-worker.d.ts +25 -4
  4. package/dist/base-worker.d.ts.map +1 -1
  5. package/dist/base-worker.js +293 -28
  6. package/dist/base-worker.js.map +1 -1
  7. package/dist/broadcast-worker.d.ts.map +1 -1
  8. package/dist/broadcast-worker.js +3 -12
  9. package/dist/broadcast-worker.js.map +1 -1
  10. package/dist/connection.d.ts.map +1 -1
  11. package/dist/connection.js +3 -0
  12. package/dist/connection.js.map +1 -1
  13. package/dist/errors.d.ts +25 -0
  14. package/dist/errors.d.ts.map +1 -1
  15. package/dist/errors.js +34 -1
  16. package/dist/errors.js.map +1 -1
  17. package/dist/flow-producer.d.ts +18 -2
  18. package/dist/flow-producer.d.ts.map +1 -1
  19. package/dist/flow-producer.js +64 -5
  20. package/dist/flow-producer.js.map +1 -1
  21. package/dist/functions/index.d.ts +44 -6
  22. package/dist/functions/index.d.ts.map +1 -1
  23. package/dist/functions/index.js +728 -156
  24. package/dist/functions/index.js.map +1 -1
  25. package/dist/index.d.ts +4 -2
  26. package/dist/index.d.ts.map +1 -1
  27. package/dist/index.js +3 -1
  28. package/dist/index.js.map +1 -1
  29. package/dist/job.d.ts +103 -1
  30. package/dist/job.d.ts.map +1 -1
  31. package/dist/job.js +228 -0
  32. package/dist/job.js.map +1 -1
  33. package/dist/producer.d.ts +3 -0
  34. package/dist/producer.d.ts.map +1 -1
  35. package/dist/producer.js +14 -7
  36. package/dist/producer.js.map +1 -1
  37. package/dist/proxy/routes.d.ts.map +1 -1
  38. package/dist/proxy/routes.js +67 -0
  39. package/dist/proxy/routes.js.map +1 -1
  40. package/dist/queue-events.d.ts.map +1 -1
  41. package/dist/queue-events.js +1 -4
  42. package/dist/queue-events.js.map +1 -1
  43. package/dist/queue.d.ts +98 -1
  44. package/dist/queue.d.ts.map +1 -1
  45. package/dist/queue.js +478 -21
  46. package/dist/queue.js.map +1 -1
  47. package/dist/scheduler.d.ts +5 -0
  48. package/dist/scheduler.d.ts.map +1 -1
  49. package/dist/scheduler.js +15 -1
  50. package/dist/scheduler.js.map +1 -1
  51. package/dist/telemetry.d.ts +5 -0
  52. package/dist/telemetry.d.ts.map +1 -1
  53. package/dist/telemetry.js +9 -9
  54. package/dist/telemetry.js.map +1 -1
  55. package/dist/testing.d.ts +178 -3
  56. package/dist/testing.d.ts.map +1 -1
  57. package/dist/testing.js +472 -3
  58. package/dist/testing.js.map +1 -1
  59. package/dist/types.d.ts +221 -1
  60. package/dist/types.d.ts.map +1 -1
  61. package/dist/types.js.map +1 -1
  62. package/dist/utils.d.ts +18 -1
  63. package/dist/utils.d.ts.map +1 -1
  64. package/dist/utils.js +76 -4
  65. package/dist/utils.js.map +1 -1
  66. package/dist/worker.d.ts.map +1 -1
  67. package/dist/worker.js +3 -12
  68. package/dist/worker.js.map +1 -1
  69. package/package.json +24 -5
package/CHANGELOG.md CHANGED
@@ -6,6 +6,110 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
6
6
 
7
7
  ---
8
8
 
9
+ ## [0.14.0] - 2026-03-28
10
+
11
+ ### Breaking Changes
12
+
13
+ - **JobUsage redesigned**: `inputTokens`/`outputTokens` replaced with `tokens: Record<string, number>` for extensible category tracking (input, output, reasoning, cachedInput, etc.)
14
+ - **Cost tracking redesigned**: `costUsd` replaced with `costs: Record<string, number>` + `costUnit` for currency-agnostic per-category cost tracking
15
+ - **BudgetOptions expanded**: `maxCostUsd` replaced with `maxTotalCost`. Added `maxTokens` (per-category caps), `tokenWeights` (weighted totals), `maxCosts` (per-category cost caps), `costUnit`
16
+ - **getFlowUsage return type changed**: `totalInputTokens`/`totalOutputTokens`/`totalCostUsd` replaced with `tokens`/`costs` maps + `totalTokens`/`totalCost`
17
+
18
+ ### Added
19
+
20
+ - `job.streamChunk(type, content?)` - typed streaming convenience for reasoning vs content chunks
21
+ - Per-category budget enforcement with independent limits per token/cost category
22
+ - Weighted token budgets - reasoning tokens can count 4x toward budget
23
+ - `ConnectionOptions.requestTimeout` - configurable command timeout (was hardcoded 500ms)
24
+ - 9 new examples: thinking-model, cost-breakdown, budget-weighted, reasoning-stream, agent-budget-loop, multi-model-cost, fallback-usage, streaming-sse, batch-embed-tpm
25
+ - Upgraded to valkey-search 1.2 in test infrastructure (compose.yaml)
26
+ - Bumped speedkey to 0.3.0-rc1
27
+
28
+ ### Fixed
29
+
30
+ - Budget bypass when only `totalTokens` reported without `tokens` breakdown
31
+ - `JSON.parse` null safety in budget and usage parsing
32
+ - Prototype pollution prevention with `Object.create(null)` in aggregation maps
33
+ - DAG cluster test flaky timeouts (15s -> 30s)
34
+ - `TestJobRecord` missing `usage` field causing empty `getFlowUsage()` in testing mode
35
+
36
+ ---
37
+
38
+ ## [0.13.0] - 2026-03-27
39
+
40
+ ### Added
41
+
42
+ - **Structured AI metadata** (#168): `job.reportUsage({ model, tokens: { input, output }, costs: { total } })` records LLM usage on any job. `queue.getFlowUsage(flowId)` aggregates token counts and cost across an entire flow.
43
+ - **Per-job streaming channel** (#169): `job.stream(chunk)` publishes incremental data (LLM tokens, progress events) to a dedicated channel. `queue.readStream(jobId, opts?)` consumes chunks in real time. Blocking reads via XREAD BLOCK.
44
+ - **Suspend/resume with signals** (#170): `job.suspend(opts?)` pauses a job mid-processor; `queue.signal(jobId, name, data?)` resumes it with an external event. Enables human-in-the-loop approval gates, webhook callbacks, and any pattern requiring external input before a job can continue.
45
+ - `SuspendOptions`: `reason` (label), `timeout` (auto-fail after N ms)
46
+ - `onResume` callback: best-effort same-worker continuation called with `signals[]` on resume
47
+ - `queue.getSuspendInfo(jobId)`: returns suspension metadata and signals delivered so far
48
+ - `glidemq_suspend` FCALL: moves active job to suspended sorted set, releases group slot
49
+ - `glidemq_signal` FCALL: appends signal, re-queues job to stream
50
+ - `glidemq_sweepSuspended` FCALL: fails timed-out suspended jobs on each stalled recovery tick
51
+ - Proxy: `POST /queues/:name/jobs/:id/signal` endpoint
52
+ - Testing: `TestJob.suspend()` and `TestQueue.signal()` with full parity (no Valkey)
53
+ - **Per-job lockDuration override** (#172): set `lockDuration` per job to control heartbeat interval and stall detection timeout independently of the worker default.
54
+ - **Fallback chains** (#173): ordered list of model/provider alternatives via `opts.fallbacks`. On processor failure, the job automatically retries with the next fallback entry. Each fallback can override `data` and `metadata`.
55
+ - **Budget middleware** (#174): flow-level token and cost caps. Set `budget: { maxTokens, maxCost }` on a flow; jobs that would exceed the budget are failed before execution.
56
+ - **Dual-axis rate limiting (RPM + TPM)** (#175): enforce both requests-per-minute and tokens-per-minute limits on a queue. Designed for LLM API compliance where providers impose concurrent rate ceilings.
57
+ - **18 real-world AI examples** (#176): framework integrations covering LangChain, Vercel AI SDK, OpenAI, Anthropic, multi-model routing, RAG pipelines, and more.
58
+ - **Valkey Search integration** (#177): vector search over jobs using Valkey Search module. `queue.createIndex(schema, opts?)` defines indexes; `queue.search(query, opts?)` runs hybrid vector + filter queries. `IndexCreateOptions` and `SearchQueryOptions` types decoupled from speedkey.
59
+ - `SuspendError`, `SuspendOptions`, `SignalEntry` exported from public API.
60
+ - Stress tests: 38 tests for correctness under concurrent load and edge-case pressure.
61
+ - Docker: `compose.yaml` uses `valkey-bundle` image (search + json + bloom modules).
62
+ - CI: `test-search` job with `valkey-bundle` for search integration tests.
63
+
64
+ ### Fixed
65
+
66
+ - OTel `SpanStatusCode` values corrected (OK=1, ERROR=2) - previously swapped.
67
+ - Signal data auto-deserialization: signals received via `onResume` are now parsed from JSON automatically.
68
+ - Fallback type uses explicit `metadata` field instead of index signature.
69
+ - `glidemq_clean` and `glidemq_drain` now delete `signals:{id}` LIST keys when removing jobs, preventing a key leak when suspended jobs time out or are cleaned after failure.
70
+
71
+ ---
72
+
73
+ ## [0.12.0] - 2026-03-20
74
+
75
+ ### Added
76
+
77
+ - **Runtime per-group rate limiting** (#148): three complementary APIs for pausing individual ordering groups at runtime.
78
+ - `job.rateLimitGroup(duration, opts?)` - pause from inside the processor (e.g., on 429 response)
79
+ - `throw new GroupRateLimitError(duration, opts?)` - throw-style sugar
80
+ - `queue.rateLimitGroup(groupKey, duration, opts?)` - pause from outside (webhooks, health checks)
81
+ - Options: `currentJob` (requeue|fail), `requeuePosition` (front|back), `extend` (max|replace)
82
+ - **Ordering path unification** (#158): all `ordering.key` jobs now route through the group path with implicit `concurrency: 1`. Enables group features (runtime rate limiting, token bucket) for all ordering-key users.
83
+ - ZSET groupq for ordered promotion (score = orderingSeq)
84
+ - `nextSeq` counter on group hash gates all 6 activation paths
85
+ - Step-jobs hold ordering slot until full completion
86
+ - Returning step-jobs bypass concurrency/rate gates
87
+ - `GroupRateLimitError` and `GroupRateLimitOptions` exported from public API.
88
+ - `BroadcastWorker.waitUntilReady()` method (#149).
89
+ - Queue/Producer option `events: false` to skip XADD 'added' event emission on job add.
90
+
91
+ ### Performance
92
+
93
+ - **HMGET consolidation in `completeAndFetchNext`**: merge 4 separate hash lookups into 1 HMGET. Reduces redis.call()s from 13 to 10 on hot path.
94
+ - **Remove auto-ID EXISTS check**: monotonic INCR cannot collide. Saves 1 redis.call() per add.
95
+ - **Parallel resource cleanup** in test fixtures (#151).
96
+ - **Multi-key DEL** for queue obliteration (#154).
97
+ - TS-side micro-optimizations: `withSpan` lazy attributes, `Buffer.byteLength` skip, cached retention objects.
98
+
99
+ ### Fixed
100
+
101
+ - `Broadcast.publish()` signature documented correctly - subject is first arg (#152).
102
+ - DLQ configuration location clarified in docs (#153).
103
+ - `addBulk` dedup batch paths correctly pass `skipEvents`.
104
+ - `advanceIdCounter` avoids Lua float precision loss on large IDs.
105
+ - `flatted` dependency bumped to resolve prototype pollution vulnerability.
106
+
107
+ ### Breaking
108
+
109
+ - `groupq` key type changed from LIST to ZSET. Existing groups with queued jobs need migration (drain before upgrade). Pre-stable, acceptable.
110
+
111
+ ---
112
+
9
113
  ## [0.11.0] - 2026-03-10
10
114
 
11
115
  ### Added
@@ -70,7 +174,7 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
70
174
  - DAG workflows - `FlowProducer.addDAG()` and `dag()` helper for arbitrary DAG topologies (#86).
71
175
  - Serverless usage guide (`docs/SERVERLESS.md`) - Lambda, Cloudflare Workers, Vercel Edge examples.
72
176
  - List-active counter self-healing via `glidemq_healListActive` Lua function. Automatically corrects counter drift caused by worker crashes during scheduler promotion ticks (#124).
73
- - Proxy endpoints: `GET /queues/:name/jobs` (list/filter), `DELETE /queues/:name/jobs/:id` (#124).
177
+ - Proxy endpoint: `GET /queues/:name/jobs/:id` (fetch single job by ID) (#124). Note: `GET /queues/:name/jobs` (list/filter) and `DELETE /queues/:name/jobs/:id` (remove) were planned but not implemented.
74
178
  - CI: `npm audit` security scanning, `timeout-minutes` on all jobs, `npm ci` with cache in publish workflow (#124).
75
179
 
76
180
  ### Fixed
package/README.md CHANGED
@@ -3,227 +3,211 @@
3
3
  [![npm version](https://img.shields.io/npm/v/glide-mq)](https://www.npmjs.com/package/glide-mq)
4
4
  [![license](https://img.shields.io/npm/l/glide-mq)](https://github.com/avifenesh/glide-mq/blob/main/LICENSE)
5
5
  [![CI](https://github.com/avifenesh/glide-mq/actions/workflows/ci.yml/badge.svg)](https://github.com/avifenesh/glide-mq/actions/workflows/ci.yml)
6
- [![node](https://img.shields.io/node/v/glide-mq)](https://nodejs.org/)
7
- [![changelog](https://img.shields.io/badge/changelog-CHANGELOG.md-blue)](CHANGELOG.md)
8
6
 
9
- High-performance message queue for Node.js built on Valkey/Redis Streams with 1-RTT job operations and cluster-native design.
7
+ High-performance message queue for Node.js with first-class AI orchestration. Built on Valkey/Redis Streams with a Rust NAPI core.
10
8
 
11
- glide-mq is for anyone building background jobs, task queues, or workflow orchestration in Node.js. It connects through a Rust-native NAPI client ([valkey-glide](https://github.com/valkey-io/valkey-glide)), executes all queue logic in a single Valkey Server Function call per operation (FCALL, not EVAL), and hash-tags every key for automatic cluster slot alignment. The result is fewer round trips, no Lua cache misses, and zero cluster configuration.
12
-
13
- > If glide-mq is useful to you, consider giving it a star on [GitHub](https://github.com/avifenesh/glide-mq). It helps others discover the project.
14
-
15
- ## Why glide-mq
16
-
17
- - Use this when you need **throughput**: 25,000+ jobs/s single-node with 1 RTT per job via Valkey Server Functions.
18
- - Use this when you run **Valkey/Redis clusters**: all keys hash-tagged out of the box, no `{braces}` workarounds.
19
- - Use this when you need **workflows**: parent-child trees, DAGs with fan-in, step jobs, batch processing, and cron scheduling in one library.
20
- - Use this when you deploy to **serverless**: lightweight `Producer` and `ServerlessPool` cache connections across warm invocations.
21
- - Use this when you want **pub/sub with durability**: `Broadcast` delivers to all subscribers with retries, backpressure, and NATS-style subject filtering.
22
-
23
- ## Install
9
+ Completes and fetches the next job in a single server-side function call (1 RTT per job), hash-tags every key for zero-config clustering, and ships seven built-in primitives for LLM orchestration - cost tracking, token streaming, human-in-the-loop, model failover, TPM rate limiting, budget caps, and vector search.
24
10
 
25
11
  ```bash
26
12
  npm install glide-mq
27
13
  ```
28
14
 
29
- Requires Node.js 20+ and a running [Valkey](https://valkey.io) 7.0+ or Redis 7.0+ instance.
30
-
31
- ## Quick start
15
+ ### General Usage
32
16
 
33
17
  ```typescript
34
18
  import { Queue, Worker } from 'glide-mq';
35
19
 
36
20
  const connection = { addresses: [{ host: 'localhost', port: 6379 }] };
37
-
38
21
  const queue = new Queue('tasks', { connection });
39
- await queue.add('send-email', { to: 'user@example.com', subject: 'Hello' });
40
22
 
41
- const worker = new Worker('tasks', async (job) => {
42
- console.log(`Processing ${job.name}:`, job.data);
43
- return { sent: true };
44
- }, { connection, concurrency: 10 });
23
+ await queue.add('send-email', { to: 'user@example.com', subject: 'Welcome' });
45
24
 
46
- worker.on('completed', (job) => console.log(`Job ${job.id} done`));
47
- worker.on('failed', (job, err) => console.error(`Job ${job.id} failed:`, err.message));
25
+ const worker = new Worker(
26
+ 'tasks',
27
+ async (job) => {
28
+ await sendEmail(job.data.to, job.data.subject);
29
+ return { sent: true };
30
+ },
31
+ { connection, concurrency: 10 },
32
+ );
48
33
  ```
49
34
 
50
- ## Performance
51
-
52
- - **1 RTT per job** -- `completeAndFetchNext` completes the current job and fetches the next in a single FCALL
53
- - **25,000+ jobs/s** single-node floor (bare-metal Linux, localhost); scales higher with network pipelining
54
- - **addBulk**: 10,000 jobs in 350 ms
55
- - **Gzip compression**: 98% payload reduction on 15 KB payloads
56
-
57
- Throughput scales with concurrency up to the Valkey single-thread FCALL execution ceiling. Deployments with network latency between app and Valkey benefit from glide's auto-pipelining -- higher concurrency batches more commands per wire write. Run `npm run bench` to measure your environment.
58
-
59
- ## How it's different
60
-
61
- | Aspect | glide-mq approach |
62
- |--------|-------------------|
63
- | **Network per job** | 1 RTT -- complete current job + fetch next in a single FCALL |
64
- | **Client** | Rust NAPI bindings ([valkey-glide](https://github.com/valkey-io/valkey-glide)) -- no JS protocol parsing |
65
- | **Server logic** | 1 persistent Valkey Function library (FUNCTION LOAD + FCALL) -- no per-call EVAL recompilation |
66
- | **Cluster** | Hash-tagged keys (`glide:{queueName}:*`) -- all queue data routes to the same slot automatically |
67
- | **Workflows** | FlowProducer trees, DAGs with fan-in, chain/group/chord, step jobs, dynamic children |
68
- | **Pub/sub** | Broadcast with NATS-style subject filtering, independent subscriber retries |
69
- | **Serverless** | Lightweight `Producer` + `ServerlessPool` for Lambda/Edge with connection reuse |
70
-
71
- ## Core concepts
72
-
73
- - **Queue** -- stores jobs in Valkey Streams. Handles enqueue, delay, priority, pause, drain, and bulk operations.
74
- - **Worker** -- processes jobs with configurable concurrency, prefetch, lock duration, and stalled-job recovery.
75
- - **Job** -- a unit of work with name, data, options (retries, backoff, priority, TTL), and lifecycle events.
76
- - **FlowProducer** -- creates parent-child job trees and DAGs. A parent waits for all children before processing.
77
- - **Producer** -- lightweight enqueue-only client. No EventEmitter, no Job instances, returns plain string IDs. Built for serverless.
78
- - **Broadcast** -- fan-out pub/sub. Each message is delivered to every subscriber group with independent retries and backpressure.
79
- - **QueueEvents** -- real-time stream of job lifecycle events (completed, failed, delayed, waiting, etc.).
80
-
81
- ## Features
82
-
83
- ### Core
84
-
85
- - **Queues and workers** with configurable concurrency, prefetch, and lock duration ([Usage](docs/USAGE.md))
86
- - **Delayed, priority, and bulk enqueue** for scheduling and high-throughput ingestion ([Usage](docs/USAGE.md))
87
- - **Batch processing** -- process multiple jobs at once via `batch: { size, timeout? }` ([Usage](docs/USAGE.md#batch-processing))
88
- - **Request-reply** -- `queue.addAndWait(name, data, { waitTimeout })` for synchronous RPC ([Usage](docs/USAGE.md#request-reply-with-addandwait))
89
- - **LIFO mode** -- `lifo: true` processes newest jobs first ([Advanced](docs/ADVANCED.md#lifo-mode))
90
- - **Job TTL** -- auto-expire jobs after a time-to-live window ([Advanced](docs/ADVANCED.md#job-ttl))
91
- - **Custom job IDs** -- deterministic, idempotent enqueue; duplicates return `null` ([Advanced](docs/ADVANCED.md#custom-job-ids))
92
- - **Pluggable serializers** -- swap JSON for any `{ serialize, deserialize }` implementation ([Advanced](docs/ADVANCED.md#pluggable-serializers))
93
- - **Transparent compression** -- gzip payloads at the queue level ([Advanced](docs/ADVANCED.md#transparent-compression))
94
-
95
- ### Reliability
96
-
97
- - **Retries with exponential, fixed, or custom backoff** and dead-letter queues ([Advanced](docs/ADVANCED.md#retries-and-backoff))
98
- - **UnrecoverableError** -- skip all retries and fail permanently ([Usage](docs/USAGE.md#unrecoverableerror))
99
- - **Stalled recovery** -- auto-reclaim stuck jobs via consumer group PEL and `XAUTOCLAIM` ([Usage](docs/USAGE.md#worker))
100
- - **Job revocation** -- cooperative cancellation with `AbortSignal` ([Advanced](docs/ADVANCED.md#job-revocation))
101
- - **Deduplication** -- simple, throttle, and debounce modes with configurable TTL ([Advanced](docs/ADVANCED.md#deduplication))
102
- - **Per-key ordering** -- sequential processing per ordering key with configurable group concurrency ([Advanced](docs/ADVANCED.md#ordering-and-group-concurrency))
103
- - **Rate limiting** -- per-group sliding window, token bucket, and global queue-wide limits ([Advanced](docs/ADVANCED.md#global-rate-limiting))
104
- - **Sandboxed processors** -- run processors in worker threads or child processes ([Architecture](docs/ARCHITECTURE.md))
105
-
106
- ### Orchestration
35
+ ### AI Usage
107
36
 
108
- - **FlowProducer** -- parent-child job trees with `chain`, `group`, and `chord` helpers ([Workflows](docs/WORKFLOWS.md))
109
- - **DAG workflows** -- arbitrary dependency graphs with `FlowProducer.addDAG()` and `dag()` helper; multi-parent fan-in, diamond patterns, cycle detection ([Workflows](docs/WORKFLOWS.md))
110
- - **Step jobs** -- `job.moveToDelayed(timestamp, nextStep)` suspends a job mid-processor and resumes later ([Usage](docs/USAGE.md#pause-and-resume-a-job-later-step-jobs))
111
- - **Dynamic children** -- `job.moveToWaitingChildren()` pauses a parent to add children mid-execution ([Workflows](docs/WORKFLOWS.md))
112
- - **Batch processing** -- process multiple jobs at once for bulk I/O ([Usage](docs/USAGE.md#batch-processing))
113
-
114
- ### Scheduling
115
-
116
- - **Cron and interval schedulers** -- 5-field cron with timezone, fixed intervals, and `repeatAfterComplete` mode ([Advanced](docs/ADVANCED.md#job-schedulers))
117
- - **Bounded schedulers** -- `limit`, `startDate`, and `endDate` for finite schedules ([Advanced](docs/ADVANCED.md#bounded-schedulers))
118
-
119
- ### Pub/Sub
120
-
121
- - **Broadcast** -- fan-out delivery to all subscriber groups ([Usage](docs/USAGE.md#broadcast--broadcastworker))
122
- - **BroadcastWorker** -- independent consumer groups with own retries, concurrency, and backpressure ([Usage](docs/USAGE.md#broadcast--broadcastworker))
123
- - **Subject filtering** -- NATS-style patterns (`*` one segment, `>` trailing wildcard) for topic-based routing ([Usage](docs/USAGE.md#broadcast--broadcastworker))
124
-
125
- ### Serverless
126
-
127
- - **Producer** -- enqueue without EventEmitter overhead, returns plain string IDs ([Usage](docs/USAGE.md))
128
- - **ServerlessPool** -- connection caching across warm Lambda/Edge invocations ([Serverless](docs/SERVERLESS.md))
129
-
130
- ### Observability
37
+ ```typescript
38
+ import { Queue, Worker } from 'glide-mq';
131
39
 
132
- - **QueueEvents** -- real-time stream-based lifecycle events ([Observability](docs/OBSERVABILITY.md))
133
- - **Time-series metrics** -- per-minute throughput and latency retained 24h, recorded server-side ([Observability](docs/OBSERVABILITY.md))
134
- - **OpenTelemetry** -- automatic span emission; bring your own tracer or auto-detect `@opentelemetry/api` ([Observability](docs/OBSERVABILITY.md))
135
- - **Job logs** -- append structured log entries per job with pagination ([Observability](docs/OBSERVABILITY.md))
136
- - **Job mutations** -- `changePriority()`, `changeDelay()`, `promote()` after enqueue; `retryJobs()` and `clean()` in bulk ([Usage](docs/USAGE.md))
137
- - **Graceful shutdown** -- `gracefulShutdown()` helper registers SIGTERM/SIGINT handlers ([Usage](docs/USAGE.md#graceful-shutdown))
138
- - **In-memory testing** -- `TestQueue` and `TestWorker` with zero Valkey dependency ([Testing](docs/TESTING.md))
40
+ const queue = new Queue('ai', { connection });
41
+
42
+ await queue.add(
43
+ 'inference',
44
+ { prompt: 'Explain message queues' },
45
+ {
46
+ fallbacks: [{ model: 'gpt-5.4-nano', provider: 'openai' }],
47
+ lockDuration: 120000,
48
+ },
49
+ );
50
+
51
+ const worker = new Worker(
52
+ 'ai',
53
+ async (job) => {
54
+ const result = await callLLM(job.data.prompt);
55
+ await job.reportUsage({
56
+ model: 'gpt-5.4',
57
+ tokens: { input: 50, output: 200 },
58
+ costs: { total: 0.003 },
59
+ });
60
+ await job.stream({ type: 'token', content: result });
61
+ return result;
62
+ },
63
+ { connection, tokenLimiter: { maxTokens: 100000, duration: 60000 } },
64
+ );
65
+ ```
139
66
 
140
- ### Cloud
67
+ ## When to use glide-mq
141
68
 
142
- - **Cluster-native** -- hash-tagged keys `glide:{queueName}:*` route all queue data to the same slot ([Usage](docs/USAGE.md#cluster-mode))
143
- - **IAM authentication** -- native SigV4 auth for AWS ElastiCache and MemoryDB ([Usage](docs/USAGE.md#cluster-mode))
144
- - **AZ-affinity routing** -- `readFrom: 'AZAffinity'` routes reads to same-AZ replicas ([Usage](docs/USAGE.md#cluster-mode))
69
+ - **Background jobs and task processing** - email, image processing, data pipelines, webhooks, any async work.
70
+ - **Scheduled and recurring work** - cron jobs, interval tasks, bounded schedulers.
71
+ - **Distributed workflows** - parent-child trees, DAGs, fan-in/fan-out, step jobs, dynamic children.
72
+ - **High-throughput queues over real networks** - 1 RTT per job via Valkey Server Functions, up to 38% faster than alternatives.
73
+ - **LLM pipelines and model orchestration** - cost tracking, token streaming, model failover, budget caps without external middleware.
74
+ - **Valkey/Redis clusters** - hash-tagged keys out of the box with zero configuration.
145
75
 
146
- ## Framework integrations
76
+ ## How it's different
147
77
 
148
- | Package | Install | Setup |
149
- |---------|---------|-------|
150
- | [`@glidemq/hono`](https://github.com/avifenesh/glidemq-hono) | `npm i @glidemq/hono` | `app.use(glideMQ({ connection, queues: { ... } }))` |
151
- | [`@glidemq/fastify`](https://github.com/avifenesh/glidemq-fastify) | `npm i @glidemq/fastify` | `app.register(glideMQPlugin, { connection, queues: { ... } })` |
152
- | [`@glidemq/nestjs`](https://github.com/avifenesh/glidemq-nestjs) | `npm i @glidemq/nestjs` | `GlideMQModule.forRoot({ connection, queues: { ... } })` |
153
- | [`@glidemq/dashboard`](https://github.com/avifenesh/glidemq-dashboard) | `npm i @glidemq/dashboard` | `app.use('/dashboard', createDashboard([queue1, queue2]))` |
154
- | [`@glidemq/hapi`](https://github.com/avifenesh/glidemq-hapi) | `npm i @glidemq/hapi` | `await server.register({ plugin: glideMQPlugin, options: { connection, queues } })` |
78
+ | Aspect | glide-mq |
79
+ | ------------------- | --------------------------------------------------------------------------------------------------------- |
80
+ | **Network per job** | 1 RTT - complete + fetch next in a single FCALL |
81
+ | **Client** | Rust NAPI bindings via [valkey-glide](https://github.com/valkey-io/valkey-glide) - no JS protocol parsing |
82
+ | **Server logic** | Persistent Valkey Function library (FUNCTION LOAD + FCALL) - no per-call EVAL |
83
+ | **Cluster** | Hash-tagged keys (`glide:{queueName}:*`) route to the same slot automatically |
84
+ | **AI-native** | Cost tracking, token streaming, suspend/resume, fallback chains, TPM limits, budget caps |
85
+ | **Vector search** | KNN similarity queries over job data via Valkey Search |
155
86
 
156
- All framework packages provide REST endpoints, SSE events, and serverless Producer support. See each package's README for full documentation.
87
+ ## AI-native primitives
157
88
 
158
- ## Cross-language
89
+ Seven primitives for LLM and agent workflows, built into the core API.
159
90
 
160
- Non-Node.js services can enqueue jobs into glide-mq queues using the HTTP proxy or direct FCALL:
91
+ - **Cost tracking** - `job.reportUsage()` records model, tokens, cost, latency per job. `queue.getFlowUsage()` aggregates across flows.
92
+ - **Token streaming** - `job.stream(chunk)` pushes LLM output tokens in real time. `queue.readStream(jobId)` consumes them with optional long-polling.
93
+ - **Suspend/resume** - `job.suspend()` pauses mid-processor for human approval or webhook callback. `queue.signal(jobId, name, data)` resumes with external input.
94
+ - **Fallback chains** - ordered `fallbacks` array on job options. On failure, the next retry reads `job.currentFallback` for the alternate model/provider.
95
+ - **TPM rate limiting** - `tokenLimiter` on worker options enforces tokens-per-minute caps. Combine with RPM `limiter` for dual-axis rate control.
96
+ - **Budget caps** - `FlowProducer.add(flow, { budget })` sets `maxTotalTokens` and `maxTotalCost` across all jobs in a flow. Jobs fail or pause when exceeded.
97
+ - **Per-job lock duration** - override `lockDuration` per job for adaptive stall detection. Short for classifiers, long for multi-minute LLM calls.
161
98
 
162
- ```typescript
163
- import { createProxyServer } from 'glide-mq/proxy';
99
+ See [Usage - AI-native primitives](docs/USAGE.md#ai-native-primitives) for full examples.
164
100
 
165
- const proxy = createProxyServer({
166
- connection: { addresses: [{ host: 'localhost', port: 6379 }] },
167
- queues: ['emails', 'reports'],
168
- });
169
- proxy.app.listen(3000);
170
- ```
101
+ ## Features
171
102
 
172
- ```bash
173
- curl -X POST http://localhost:3000/queues/emails/jobs \
174
- -H 'Content-Type: application/json' \
175
- -d '{"name": "send-email", "data": {"to": "user@example.com"}}'
176
- ```
103
+ - **1 RTT per job** - complete current + fetch next in a single server-side function call
104
+ - **Cluster-native** - hash-tagged keys, zero cluster configuration
105
+ - **Workflows** - FlowProducer trees, DAGs with fan-in, chain/group/chord, step jobs, dynamic children
106
+ - **Scheduling** - 5-field cron with timezone, fixed intervals, bounded schedulers
107
+ - **Retries** - exponential, fixed, or custom backoff with dead-letter queues
108
+ - **Rate limiting** - per-group sliding window, token bucket, global queue-wide limits
109
+ - **Broadcast** - fan-out pub/sub with NATS-style subject filtering and independent subscriber retries
110
+ - **Batch processing** - process multiple jobs at once for bulk I/O
111
+ - **Request-reply** - `queue.addAndWait()` for synchronous RPC patterns
112
+ - **Deduplication** - simple, throttle, and debounce modes
113
+ - **Compression** - transparent gzip at the queue level
114
+ - **Serverless** - lightweight `Producer` and `ServerlessPool` for Lambda/Edge
115
+ - **OpenTelemetry** - automatic span emission with bring-your-own tracer
116
+ - **In-memory testing** - `TestQueue` and `TestWorker` with zero Valkey dependency
117
+ - **Cross-language** - HTTP proxy and wire protocol for non-Node.js services
177
118
 
178
- Endpoints: `POST /queues/:name/jobs`, `POST /queues/:name/jobs/bulk`, `GET /queues/:name/jobs/:id`, `POST /queues/:name/pause`, `POST /queues/:name/resume`, `GET /queues/:name/counts`, `GET /health`.
119
+ ## Performance
179
120
 
180
- For zero-overhead integration, call Valkey Server Functions directly from any language with a Valkey client. See [Wire Protocol](docs/WIRE_PROTOCOL.md) for FCALL signatures, key layout, and examples in Python and Go.
121
+ Benchmarked on AWS ElastiCache Valkey 8.2 (r7g.large) with TLS, EC2 client in the same region.
122
+
123
+ | Concurrency | glide-mq | BullMQ | Delta |
124
+ | :---------: | ---------: | ---------: | :---: |
125
+ | c=5 | 10,754 j/s | 9,866 j/s | +9% |
126
+ | c=10 | 18,218 j/s | 13,541 j/s | +35% |
127
+ | c=15 | 19,583 j/s | 14,162 j/s | +38% |
128
+ | c=20 | 19,408 j/s | 16,085 j/s | +21% |
129
+
130
+ The advantage comes from completing and fetching the next job in a single FCALL. The savings compound over real network latency - exactly the conditions in every production deployment. At high concurrency both libraries converge toward the Valkey single-thread ceiling.
131
+
132
+ Reproduce with `npm run bench` or `npx tsx benchmarks/elasticache-head-to-head.ts` against your own infrastructure.
133
+
134
+ ## Examples
135
+
136
+ 27 runnable examples in `examples/`. Run any with `npx tsx examples/<name>.ts`.
137
+
138
+ | Example | What it shows |
139
+ | ----------------------- | ----------------------------------------------- |
140
+ | `usage-tracking.ts` | Token and cost tracking across multi-step flows |
141
+ | `token-streaming.ts` | Real-time LLM token streaming to clients |
142
+ | `human-approval.ts` | Suspend/resume with editorial review gate |
143
+ | `model-failover.ts` | Fallback chains across providers |
144
+ | `tpm-throttle.ts` | Dual-axis RPM + TPM rate limiting |
145
+ | `budget-cap.ts` | Flow-level token and cost caps |
146
+ | `vector-search.ts` | KNN similarity search with pre-filters |
147
+ | `with-langchain.ts` | LangChain integration with token tracking |
148
+ | `with-vercel-ai-sdk.ts` | Vercel AI SDK integration with streaming |
149
+ | `rag-pipeline.ts` | RAG with embedding, indexing, retrieval |
150
+ | `ai-agent-loop.ts` | Autonomous agent loop with budget enforcement |
151
+ | `testing-mode.ts` | In-memory testing without Valkey |
152
+ | `agent-budget-loop.ts` | Agent loop with per-step budget tracking |
153
+ | `multi-model-cost.ts` | Cost breakdown across multiple models |
154
+ | `fallback-usage.ts` | Usage tracking through fallback chains |
155
+ | `streaming-sse.ts` | Server-sent events with token streaming |
156
+ | `batch-embed-tpm.ts` | Batch embeddings with TPM rate limiting |
157
+ | `thinking-model.ts` | Thinking/reasoning model token tracking |
158
+ | `cost-breakdown.ts` | Detailed per-category cost breakdown |
159
+ | `budget-weighted.ts` | Weighted budget allocation across flow steps |
160
+ | `reasoning-stream.ts` | Streaming reasoning/chain-of-thought tokens |
161
+ | `adaptive-timeout.ts` | Adaptive lock duration based on model complexity |
162
+ | `broadcast-events.ts` | Fan-out event publishing with subject filtering |
163
+ | `agent-memory.ts` | Multi-turn agent with persistent memory |
164
+ | `search-dashboard.ts` | Job search and monitoring dashboard |
165
+ | `embedding-pipeline.ts` | Batch document embedding with rate limiting |
166
+ | `content-pipeline.ts` | Content moderation with streaming and approval |
167
+
168
+ ## When NOT to use glide-mq
169
+
170
+ - **You need a log-based event streaming platform.** glide-mq is a job/task queue, not a partitioned event log. It does not provide Kafka-style topic partitions, consumer offset management, or event replay.
171
+ - **You need browser support.** The Rust NAPI client requires a server-side runtime (Node.js 20+, Bun, or Deno with NAPI support).
172
+ - **You need exactly-once semantics.** glide-mq provides at-least-once delivery. Duplicate processing is rare but possible - design processors to be idempotent.
173
+ - **You need to run without Valkey or Redis.** Production use requires Valkey 7.0+ or Redis 7.0+. For dev/testing, `TestQueue`/`TestWorker` run fully in-memory.
181
174
 
182
175
  ## Documentation
183
176
 
184
- | Guide | Topics |
185
- |-------|--------|
186
- | [Usage](docs/USAGE.md) | Queue, Worker, Producer, batch, request-reply, graceful shutdown, cluster mode |
187
- | [Broadcast](docs/BROADCAST.md) | Pub/sub fan-out, BroadcastWorker, subject filtering |
188
- | [Step Jobs](docs/STEP_JOBS.md) | `moveToDelayed`, `moveToWaitingChildren`, multi-step processors |
189
- | [Advanced](docs/ADVANCED.md) | Schedulers, rate limiting, dedup, compression, retries, DLQ, custom IDs, LIFO, TTL, serializers |
190
- | [Workflows](docs/WORKFLOWS.md) | FlowProducer, DAG, `chain`, `group`, `chord`, dynamic children |
191
- | [Observability](docs/OBSERVABILITY.md) | OpenTelemetry, time-series metrics, job logs, dashboard |
192
- | [Serverless](docs/SERVERLESS.md) | Producer, ServerlessPool, Lambda and Edge deployment |
193
- | [Testing](docs/TESTING.md) | In-memory `TestQueue` and `TestWorker` -- no Valkey needed |
194
- | [Wire Protocol](docs/WIRE_PROTOCOL.md) | Cross-language FCALL specs, key layout, Python and Go examples |
195
- | [Architecture](docs/ARCHITECTURE.md) | Key design, Valkey functions, LIFO, Broadcast, DAG internals |
196
- | [Durability](docs/DURABILITY.md) | Persistence modes, crash windows, feature-specific durability |
197
- | [Migration](docs/MIGRATION.md) | Coming from BullMQ? API mapping and step-by-step guide |
198
-
199
- ## Limitations
200
-
201
- - Requires a running Valkey 7.0+ or Redis 7.0+ instance. There is no embedded mode.
202
- - Node.js only. The Rust-native NAPI client (`@valkey/valkey-glide`) does not run in browsers or Deno.
203
- - At-least-once delivery semantics. Jobs may be processed more than once after crashes or stalled recovery.
204
- - Not a streaming platform. glide-mq is a job/task queue, not a replacement for Kafka or NATS JetStream.
205
- - Single dependency on `@glidemq/speedkey` (which wraps `@valkey/valkey-glide`). Native addon compilation is required on install.
177
+ | Guide | Topics |
178
+ | -------------------------------------- | ----------------------------------------------------------- |
179
+ | [Usage](docs/USAGE.md) | Queue, Worker, Producer, batch, request-reply, cluster mode |
180
+ | [Workflows](docs/WORKFLOWS.md) | FlowProducer, DAG, chain/group/chord, dynamic children |
181
+ | [Advanced](docs/ADVANCED.md) | Schedulers, rate limiting, dedup, compression, retries, DLQ |
182
+ | [Broadcast](docs/BROADCAST.md) | Pub/sub fan-out, subject filtering |
183
+ | [Observability](docs/OBSERVABILITY.md) | OpenTelemetry, metrics, job logs, dashboard |
184
+ | [Serverless](docs/SERVERLESS.md) | Producer, ServerlessPool, Lambda/Edge |
185
+ | [Testing](docs/TESTING.md) | In-memory TestQueue and TestWorker |
186
+ | [Wire Protocol](docs/WIRE_PROTOCOL.md) | Cross-language FCALL specs, Python/Go examples |
187
+ | [Step Jobs](docs/STEP_JOBS.md) | Step-job workflows with moveToDelayed |
188
+ | [Durability](docs/DURABILITY.md) | Durability guarantees, persistence, delivery semantics |
189
+ | [Architecture](docs/ARCHITECTURE.md) | Internal architecture and design reference |
190
+ | [Migration](docs/MIGRATION.md) | Coming from BullMQ - API mapping guide |
206
191
 
207
192
  ## Ecosystem
208
193
 
209
- | Package | Description | Links |
210
- |---------|-------------|-------|
211
- | [glide-mq](https://github.com/avifenesh/glide-mq) | Core queue library | [npm](https://www.npmjs.com/package/glide-mq) |
212
- | [@glidemq/hono](https://github.com/avifenesh/glidemq-hono) | Hono middleware -- REST endpoints, SSE, serverless Producer | [npm](https://www.npmjs.com/package/@glidemq/hono) |
213
- | [@glidemq/fastify](https://github.com/avifenesh/glidemq-fastify) | Fastify plugin -- REST endpoints, SSE, serverless Producer | [npm](https://www.npmjs.com/package/@glidemq/fastify) |
214
- | [@glidemq/nestjs](https://github.com/avifenesh/glidemq-nestjs) | NestJS module -- decorators, DI, lifecycle management | [npm](https://www.npmjs.com/package/@glidemq/nestjs) |
215
- | [@glidemq/dashboard](https://github.com/avifenesh/glidemq-dashboard) | Web UI -- metrics charts, scheduler management, job mutations | [npm](https://www.npmjs.com/package/@glidemq/dashboard) |
216
- | [@glidemq/speedkey](https://github.com/avifenesh/speedkey) | Valkey GLIDE client with native NAPI bindings | [npm](https://www.npmjs.com/package/@glidemq/speedkey) |
217
- | [glidemq-examples](https://github.com/avifenesh/glidemq-examples) | 34 runnable examples across frameworks and use cases | [GitHub](https://github.com/avifenesh/glidemq-examples) |
218
-
219
- > If glide-mq is useful to you, consider [starring the repo](https://github.com/avifenesh/glide-mq). It helps others find the project.
194
+ | Package | Description |
195
+ | -------------------------------------------------------------------- | --------------------------------------------- |
196
+ | [@glidemq/speedkey](https://github.com/avifenesh/speedkey) | Valkey GLIDE client with native NAPI bindings |
197
+ | [@glidemq/dashboard](https://github.com/avifenesh/glidemq-dashboard) | Web UI for metrics, schedulers, job mutations |
198
+ | [@glidemq/hono](https://github.com/avifenesh/glidemq-hono) | Hono middleware |
199
+ | [@glidemq/fastify](https://github.com/avifenesh/glidemq-fastify) | Fastify plugin |
200
+ | [@glidemq/nestjs](https://github.com/avifenesh/glidemq-nestjs) | NestJS module |
201
+ | [@glidemq/hapi](https://github.com/avifenesh/glidemq-hapi) | Hapi plugin |
202
+ | [glide-mq.dev](https://avifenesh.github.io/glide-mq.dev/) | Full documentation site |
220
203
 
221
204
  ## Contributing
222
205
 
223
- Bug reports, feature requests, and pull requests are welcome. See [CHANGELOG.md](CHANGELOG.md) for release history.
206
+ Bug reports, feature requests, and pull requests are welcome.
224
207
 
225
208
  - [Open an issue](https://github.com/avifenesh/glide-mq/issues)
226
209
  - [Discussions](https://github.com/avifenesh/glide-mq/discussions)
210
+ - [Changelog](CHANGELOG.md)
227
211
 
228
212
  ## License
229
213
 
@@ -1,10 +1,10 @@
1
1
  import { EventEmitter } from 'events';
2
- import type { WorkerOptions, Processor, BatchProcessor, Client, Serializer } from './types';
2
+ import type { WorkerOptions, Processor, BatchProcessor, Client, Serializer, SignalEntry } from './types';
3
3
  import { Job } from './job';
4
4
  import { buildKeys } from './utils';
5
5
  import type { QueueKeys } from './functions/index';
6
6
  import { Scheduler } from './scheduler';
7
- export type WorkerEvent = 'completed' | 'failed' | 'error' | 'stalled' | 'closing' | 'closed' | 'active' | 'drained';
7
+ export type WorkerEvent = 'completed' | 'failed' | 'error' | 'stalled' | 'closing' | 'closed' | 'active' | 'drained' | 'budget-exceeded';
8
8
  /**
9
9
  * Configuration that differs between Worker and BroadcastWorker.
10
10
  * Passed from the subclass constructor to BaseWorker.
@@ -59,9 +59,12 @@ export declare abstract class BaseWorker<D = any, R = any> extends EventEmitter
59
59
  protected globalRateLimitEnabled: boolean;
60
60
  protected cachedRateLimitMax: number;
61
61
  protected cachedRateLimitDuration: number;
62
+ protected tpmLocalCounter: number;
63
+ protected tpmWindowStart: number;
62
64
  protected sandboxClose?: (force?: boolean) => Promise<void>;
63
65
  protected workerHeartbeatTimer: ReturnType<typeof setInterval> | null;
64
66
  protected pollLoopPromise: Promise<void> | null;
67
+ protected suspendContinuations: Map<string, (signals: SignalEntry[]) => Promise<any>>;
65
68
  protected readonly startedAt: number;
66
69
  protected readonly hostname: string;
67
70
  protected serializer: Serializer;
@@ -74,6 +77,9 @@ export declare abstract class BaseWorker<D = any, R = any> extends EventEmitter
74
77
  protected readonly startFrom: string;
75
78
  protected readonly skipEvents: boolean;
76
79
  protected readonly skipMetrics: boolean;
80
+ private hasCompletedListeners;
81
+ private hasActiveListeners;
82
+ private hasFailedListeners;
77
83
  protected constructor(name: string, processor: Processor<D, R> | BatchProcessor<D, R> | string, opts: WorkerOptions, config: BaseWorkerConfig);
78
84
  /**
79
85
  * Wait for the worker to be fully initialized and connected.
@@ -132,7 +138,7 @@ export declare abstract class BaseWorker<D = any, R = any> extends EventEmitter
132
138
  * Handle a moveToActive result that is not a valid hash (null or REVOKED).
133
139
  * Returns true if the result was handled (caller should return), false if the hash is valid.
134
140
  */
135
- protected handleMoveToActiveEdgeCase(moveResult: Record<string, string> | 'REVOKED' | 'EXPIRED' | 'GROUP_FULL' | 'GROUP_RATE_LIMITED' | 'GROUP_TOKEN_LIMITED' | 'ERR:COST_EXCEEDS_CAPACITY' | null, jobId: string, entryId: string): Promise<boolean>;
141
+ protected handleMoveToActiveEdgeCase(moveResult: Record<string, string> | 'REVOKED' | 'EXPIRED' | 'GROUP_FULL' | 'GROUP_RATE_LIMITED' | 'GROUP_TOKEN_LIMITED' | 'GROUP_ORDERED' | 'ERR:COST_EXCEEDS_CAPACITY' | null, jobId: string, entryId: string): Promise<boolean>;
136
142
  /**
137
143
  * Run the processor with optional timeout, AbortController, and heartbeat.
138
144
  * Returns { result, error } - exactly one will be set.
@@ -214,7 +220,7 @@ export declare abstract class BaseWorker<D = any, R = any> extends EventEmitter
214
220
  * Returns true if the job was found and aborted, false if not currently active.
215
221
  */
216
222
  abortJob(jobId: string): boolean;
217
- protected startHeartbeat(jobId: string): void;
223
+ protected startHeartbeat(jobId: string, jobLockDuration?: number): void;
218
224
  protected stopHeartbeat(jobId: string): void;
219
225
  protected moveToDLQ(job: Job<D, R>, error: Error): Promise<void>;
220
226
  /**
@@ -222,8 +228,23 @@ export declare abstract class BaseWorker<D = any, R = any> extends EventEmitter
222
228
  * Also respects any manual rate limit set via rateLimit(ms).
223
229
  */
224
230
  protected waitForRateLimit(): Promise<void>;
231
+ /**
232
+ * Check the TPM (token-per-minute) rate limit and wait if either the local or
233
+ * per-queue counter exceeds the configured maxTokens for the current window.
234
+ */
235
+ protected waitForTokenLimit(): Promise<void>;
236
+ /**
237
+ * Increment the TPM counter after a job completes (or reports tokens).
238
+ * Called from the completion path when tokenLimiter is configured.
239
+ */
240
+ protected incrementTpmCounter(tokens: number): Promise<void>;
225
241
  /** Refresh cached meta flags from Valkey. Called on init and each scheduler tick. */
226
242
  private refreshMetaFlags;
243
+ /**
244
+ * Read the onExceeded policy from a budget hash.
245
+ * Returns 'fail' (default) or 'pause'.
246
+ */
247
+ private getBudgetOnExceeded;
227
248
  /**
228
249
  * Register this worker in Valkey with a TTL-based heartbeat key.
229
250
  * The key expires after stalledInterval ms; a periodic timer refreshes it at half that interval.