npm - glide-mq - Versions diffs - 0.11.1 → 0.14.0 - Mend

glide-mq 0.11.1 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

package/CHANGELOG.md +105 -1
package/README.md +162 -178
package/dist/base-worker.d.ts +25 -4
package/dist/base-worker.d.ts.map +1 -1
package/dist/base-worker.js +293 -28
package/dist/base-worker.js.map +1 -1
package/dist/broadcast-worker.d.ts.map +1 -1
package/dist/broadcast-worker.js +3 -12
package/dist/broadcast-worker.js.map +1 -1
package/dist/connection.d.ts.map +1 -1
package/dist/connection.js +3 -0
package/dist/connection.js.map +1 -1
package/dist/errors.d.ts +25 -0
package/dist/errors.d.ts.map +1 -1
package/dist/errors.js +34 -1
package/dist/errors.js.map +1 -1
package/dist/flow-producer.d.ts +18 -2
package/dist/flow-producer.d.ts.map +1 -1
package/dist/flow-producer.js +64 -5
package/dist/flow-producer.js.map +1 -1
package/dist/functions/index.d.ts +44 -6
package/dist/functions/index.d.ts.map +1 -1
package/dist/functions/index.js +728 -156
package/dist/functions/index.js.map +1 -1
package/dist/index.d.ts +4 -2
package/dist/index.d.ts.map +1 -1
package/dist/index.js +3 -1
package/dist/index.js.map +1 -1
package/dist/job.d.ts +103 -1
package/dist/job.d.ts.map +1 -1
package/dist/job.js +228 -0
package/dist/job.js.map +1 -1
package/dist/producer.d.ts +3 -0
package/dist/producer.d.ts.map +1 -1
package/dist/producer.js +14 -7
package/dist/producer.js.map +1 -1
package/dist/proxy/routes.d.ts.map +1 -1
package/dist/proxy/routes.js +67 -0
package/dist/proxy/routes.js.map +1 -1
package/dist/queue-events.d.ts.map +1 -1
package/dist/queue-events.js +1 -4
package/dist/queue-events.js.map +1 -1
package/dist/queue.d.ts +98 -1
package/dist/queue.d.ts.map +1 -1
package/dist/queue.js +478 -21
package/dist/queue.js.map +1 -1
package/dist/scheduler.d.ts +5 -0
package/dist/scheduler.d.ts.map +1 -1
package/dist/scheduler.js +15 -1
package/dist/scheduler.js.map +1 -1
package/dist/telemetry.d.ts +5 -0
package/dist/telemetry.d.ts.map +1 -1
package/dist/telemetry.js +9 -9
package/dist/telemetry.js.map +1 -1
package/dist/testing.d.ts +178 -3
package/dist/testing.d.ts.map +1 -1
package/dist/testing.js +472 -3
package/dist/testing.js.map +1 -1
package/dist/types.d.ts +221 -1
package/dist/types.d.ts.map +1 -1
package/dist/types.js.map +1 -1
package/dist/utils.d.ts +18 -1
package/dist/utils.d.ts.map +1 -1
package/dist/utils.js +76 -4
package/dist/utils.js.map +1 -1
package/dist/worker.d.ts.map +1 -1
package/dist/worker.js +3 -12
package/dist/worker.js.map +1 -1
package/package.json +24 -5

package/CHANGELOG.md CHANGED Viewed

@@ -6,6 +6,110 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ---
+## [0.14.0] - 2026-03-28
+### Breaking Changes
+- **JobUsage redesigned**: `inputTokens`/`outputTokens` replaced with `tokens: Record<string, number>` for extensible category tracking (input, output, reasoning, cachedInput, etc.)
+- **Cost tracking redesigned**: `costUsd` replaced with `costs: Record<string, number>` + `costUnit` for currency-agnostic per-category cost tracking
+- **BudgetOptions expanded**: `maxCostUsd` replaced with `maxTotalCost`. Added `maxTokens` (per-category caps), `tokenWeights` (weighted totals), `maxCosts` (per-category cost caps), `costUnit`
+- **getFlowUsage return type changed**: `totalInputTokens`/`totalOutputTokens`/`totalCostUsd` replaced with `tokens`/`costs` maps + `totalTokens`/`totalCost`
+### Added
+- `job.streamChunk(type, content?)` - typed streaming convenience for reasoning vs content chunks
+- Per-category budget enforcement with independent limits per token/cost category
+- Weighted token budgets - reasoning tokens can count 4x toward budget
+- `ConnectionOptions.requestTimeout` - configurable command timeout (was hardcoded 500ms)
+- 9 new examples: thinking-model, cost-breakdown, budget-weighted, reasoning-stream, agent-budget-loop, multi-model-cost, fallback-usage, streaming-sse, batch-embed-tpm
+- Upgraded to valkey-search 1.2 in test infrastructure (compose.yaml)
+- Bumped speedkey to 0.3.0-rc1
+### Fixed
+- Budget bypass when only `totalTokens` reported without `tokens` breakdown
+- `JSON.parse` null safety in budget and usage parsing
+- Prototype pollution prevention with `Object.create(null)` in aggregation maps
+- DAG cluster test flaky timeouts (15s -> 30s)
+- `TestJobRecord` missing `usage` field causing empty `getFlowUsage()` in testing mode
+---
+## [0.13.0] - 2026-03-27
+### Added
+- **Structured AI metadata** (#168): `job.reportUsage({ model, tokens: { input, output }, costs: { total } })` records LLM usage on any job. `queue.getFlowUsage(flowId)` aggregates token counts and cost across an entire flow.
+- **Per-job streaming channel** (#169): `job.stream(chunk)` publishes incremental data (LLM tokens, progress events) to a dedicated channel. `queue.readStream(jobId, opts?)` consumes chunks in real time. Blocking reads via XREAD BLOCK.
+- **Suspend/resume with signals** (#170): `job.suspend(opts?)` pauses a job mid-processor; `queue.signal(jobId, name, data?)` resumes it with an external event. Enables human-in-the-loop approval gates, webhook callbacks, and any pattern requiring external input before a job can continue.
+  - `SuspendOptions`: `reason` (label), `timeout` (auto-fail after N ms)
+  - `onResume` callback: best-effort same-worker continuation called with `signals[]` on resume
+  - `queue.getSuspendInfo(jobId)`: returns suspension metadata and signals delivered so far
+  - `glidemq_suspend` FCALL: moves active job to suspended sorted set, releases group slot
+  - `glidemq_signal` FCALL: appends signal, re-queues job to stream
+  - `glidemq_sweepSuspended` FCALL: fails timed-out suspended jobs on each stalled recovery tick
+  - Proxy: `POST /queues/:name/jobs/:id/signal` endpoint
+  - Testing: `TestJob.suspend()` and `TestQueue.signal()` with full parity (no Valkey)
+- **Per-job lockDuration override** (#172): set `lockDuration` per job to control heartbeat interval and stall detection timeout independently of the worker default.
+- **Fallback chains** (#173): ordered list of model/provider alternatives via `opts.fallbacks`. On processor failure, the job automatically retries with the next fallback entry. Each fallback can override `data` and `metadata`.
+- **Budget middleware** (#174): flow-level token and cost caps. Set `budget: { maxTokens, maxCost }` on a flow; jobs that would exceed the budget are failed before execution.
+- **Dual-axis rate limiting (RPM + TPM)** (#175): enforce both requests-per-minute and tokens-per-minute limits on a queue. Designed for LLM API compliance where providers impose concurrent rate ceilings.
+- **18 real-world AI examples** (#176): framework integrations covering LangChain, Vercel AI SDK, OpenAI, Anthropic, multi-model routing, RAG pipelines, and more.
+- **Valkey Search integration** (#177): vector search over jobs using Valkey Search module. `queue.createIndex(schema, opts?)` defines indexes; `queue.search(query, opts?)` runs hybrid vector + filter queries. `IndexCreateOptions` and `SearchQueryOptions` types decoupled from speedkey.
+- `SuspendError`, `SuspendOptions`, `SignalEntry` exported from public API.
+- Stress tests: 38 tests for correctness under concurrent load and edge-case pressure.
+- Docker: `compose.yaml` uses `valkey-bundle` image (search + json + bloom modules).
+- CI: `test-search` job with `valkey-bundle` for search integration tests.
+### Fixed
+- OTel `SpanStatusCode` values corrected (OK=1, ERROR=2) - previously swapped.
+- Signal data auto-deserialization: signals received via `onResume` are now parsed from JSON automatically.
+- Fallback type uses explicit `metadata` field instead of index signature.
+- `glidemq_clean` and `glidemq_drain` now delete `signals:{id}` LIST keys when removing jobs, preventing a key leak when suspended jobs time out or are cleaned after failure.
+---
+## [0.12.0] - 2026-03-20
+### Added
+- **Runtime per-group rate limiting** (#148): three complementary APIs for pausing individual ordering groups at runtime.
+  - `job.rateLimitGroup(duration, opts?)` - pause from inside the processor (e.g., on 429 response)
+  - `throw new GroupRateLimitError(duration, opts?)` - throw-style sugar
+  - `queue.rateLimitGroup(groupKey, duration, opts?)` - pause from outside (webhooks, health checks)
+  - Options: `currentJob` (requeue|fail), `requeuePosition` (front|back), `extend` (max|replace)
+- **Ordering path unification** (#158): all `ordering.key` jobs now route through the group path with implicit `concurrency: 1`. Enables group features (runtime rate limiting, token bucket) for all ordering-key users.
+  - ZSET groupq for ordered promotion (score = orderingSeq)
+  - `nextSeq` counter on group hash gates all 6 activation paths
+  - Step-jobs hold ordering slot until full completion
+  - Returning step-jobs bypass concurrency/rate gates
+- `GroupRateLimitError` and `GroupRateLimitOptions` exported from public API.
+- `BroadcastWorker.waitUntilReady()` method (#149).
+- Queue/Producer option `events: false` to skip XADD 'added' event emission on job add.
+### Performance
+- **HMGET consolidation in `completeAndFetchNext`**: merge 4 separate hash lookups into 1 HMGET. Reduces redis.call()s from 13 to 10 on hot path.
+- **Remove auto-ID EXISTS check**: monotonic INCR cannot collide. Saves 1 redis.call() per add.
+- **Parallel resource cleanup** in test fixtures (#151).
+- **Multi-key DEL** for queue obliteration (#154).
+- TS-side micro-optimizations: `withSpan` lazy attributes, `Buffer.byteLength` skip, cached retention objects.
+### Fixed
+- `Broadcast.publish()` signature documented correctly - subject is first arg (#152).
+- DLQ configuration location clarified in docs (#153).
+- `addBulk` dedup batch paths correctly pass `skipEvents`.
+- `advanceIdCounter` avoids Lua float precision loss on large IDs.
+- `flatted` dependency bumped to resolve prototype pollution vulnerability.
+### Breaking
+- `groupq` key type changed from LIST to ZSET. Existing groups with queued jobs need migration (drain before upgrade). Pre-stable, acceptable.
+---
 ## [0.11.0] - 2026-03-10
 ### Added
@@ -70,7 +174,7 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - DAG workflows - `FlowProducer.addDAG()` and `dag()` helper for arbitrary DAG topologies (#86).
 - Serverless usage guide (`docs/SERVERLESS.md`) - Lambda, Cloudflare Workers, Vercel Edge examples.
 - List-active counter self-healing via `glidemq_healListActive` Lua function. Automatically corrects counter drift caused by worker crashes during scheduler promotion ticks (#124).
-- Proxy endpoints: `GET /queues/:name/jobs` (list/filter), `DELETE /queues/:name/jobs/:id` (#124).
+- Proxy endpoint: `GET /queues/:name/jobs/:id` (fetch single job by ID) (#124). Note: `GET /queues/:name/jobs` (list/filter) and `DELETE /queues/:name/jobs/:id` (remove) were planned but not implemented.
 - CI: `npm audit` security scanning, `timeout-minutes` on all jobs, `npm ci` with cache in publish workflow (#124).
 ### Fixed

package/README.md CHANGED Viewed

@@ -3,227 +3,211 @@
 [![npm version](https://img.shields.io/npm/v/glide-mq)](https://www.npmjs.com/package/glide-mq)
 [![license](https://img.shields.io/npm/l/glide-mq)](https://github.com/avifenesh/glide-mq/blob/main/LICENSE)
 [![CI](https://github.com/avifenesh/glide-mq/actions/workflows/ci.yml/badge.svg)](https://github.com/avifenesh/glide-mq/actions/workflows/ci.yml)
-[![node](https://img.shields.io/node/v/glide-mq)](https://nodejs.org/)
-[![changelog](https://img.shields.io/badge/changelog-CHANGELOG.md-blue)](CHANGELOG.md)
-High-performance message queue for Node.js built on Valkey/Redis Streams with 1-RTT job operations and cluster-native design.
+High-performance message queue for Node.js with first-class AI orchestration. Built on Valkey/Redis Streams with a Rust NAPI core.
-glide-mq is for anyone building background jobs, task queues, or workflow orchestration in Node.js. It connects through a Rust-native NAPI client ([valkey-glide](https://github.com/valkey-io/valkey-glide)), executes all queue logic in a single Valkey Server Function call per operation (FCALL, not EVAL), and hash-tags every key for automatic cluster slot alignment. The result is fewer round trips, no Lua cache misses, and zero cluster configuration.
-> If glide-mq is useful to you, consider giving it a star on [GitHub](https://github.com/avifenesh/glide-mq). It helps others discover the project.
-## Why glide-mq
-- Use this when you need **throughput**: 25,000+ jobs/s single-node with 1 RTT per job via Valkey Server Functions.
-- Use this when you run **Valkey/Redis clusters**: all keys hash-tagged out of the box, no `{braces}` workarounds.
-- Use this when you need **workflows**: parent-child trees, DAGs with fan-in, step jobs, batch processing, and cron scheduling in one library.
-- Use this when you deploy to **serverless**: lightweight `Producer` and `ServerlessPool` cache connections across warm invocations.
-- Use this when you want **pub/sub with durability**: `Broadcast` delivers to all subscribers with retries, backpressure, and NATS-style subject filtering.
-## Install
+Completes and fetches the next job in a single server-side function call (1 RTT per job), hash-tags every key for zero-config clustering, and ships seven built-in primitives for LLM orchestration - cost tracking, token streaming, human-in-the-loop, model failover, TPM rate limiting, budget caps, and vector search.
 ```bash
 npm install glide-mq
 ```
-Requires Node.js 20+ and a running [Valkey](https://valkey.io) 7.0+ or Redis 7.0+ instance.
-## Quick start
+### General Usage
 ```typescript
 import { Queue, Worker } from 'glide-mq';
 const connection = { addresses: [{ host: 'localhost', port: 6379 }] };
 const queue = new Queue('tasks', { connection });
-await queue.add('send-email', { to: 'user@example.com', subject: 'Hello' });
-const worker = new Worker('tasks', async (job) => {
-  console.log(`Processing ${job.name}:`, job.data);
-  return { sent: true };
-}, { connection, concurrency: 10 });
+await queue.add('send-email', { to: 'user@example.com', subject: 'Welcome' });
-worker.on('completed', (job) => console.log(`Job ${job.id} done`));
-worker.on('failed', (job, err) => console.error(`Job ${job.id} failed:`, err.message));
+const worker = new Worker(
+  'tasks',
+  async (job) => {
+    await sendEmail(job.data.to, job.data.subject);
+    return { sent: true };
+  },
+  { connection, concurrency: 10 },
+);
 ```
-## Performance
-- **1 RTT per job** -- `completeAndFetchNext` completes the current job and fetches the next in a single FCALL
-- **25,000+ jobs/s** single-node floor (bare-metal Linux, localhost); scales higher with network pipelining
-- **addBulk**: 10,000 jobs in 350 ms
-- **Gzip compression**: 98% payload reduction on 15 KB payloads
-Throughput scales with concurrency up to the Valkey single-thread FCALL execution ceiling. Deployments with network latency between app and Valkey benefit from glide's auto-pipelining -- higher concurrency batches more commands per wire write. Run `npm run bench` to measure your environment.
-## How it's different
-| Aspect | glide-mq approach |
-|--------|-------------------|
-| **Network per job** | 1 RTT -- complete current job + fetch next in a single FCALL |
-| **Client** | Rust NAPI bindings ([valkey-glide](https://github.com/valkey-io/valkey-glide)) -- no JS protocol parsing |
-| **Server logic** | 1 persistent Valkey Function library (FUNCTION LOAD + FCALL) -- no per-call EVAL recompilation |
-| **Cluster** | Hash-tagged keys (`glide:{queueName}:*`) -- all queue data routes to the same slot automatically |
-| **Workflows** | FlowProducer trees, DAGs with fan-in, chain/group/chord, step jobs, dynamic children |
-| **Pub/sub** | Broadcast with NATS-style subject filtering, independent subscriber retries |
-| **Serverless** | Lightweight `Producer` + `ServerlessPool` for Lambda/Edge with connection reuse |
-## Core concepts
-- **Queue** -- stores jobs in Valkey Streams. Handles enqueue, delay, priority, pause, drain, and bulk operations.
-- **Worker** -- processes jobs with configurable concurrency, prefetch, lock duration, and stalled-job recovery.
-- **Job** -- a unit of work with name, data, options (retries, backoff, priority, TTL), and lifecycle events.
-- **FlowProducer** -- creates parent-child job trees and DAGs. A parent waits for all children before processing.
-- **Producer** -- lightweight enqueue-only client. No EventEmitter, no Job instances, returns plain string IDs. Built for serverless.
-- **Broadcast** -- fan-out pub/sub. Each message is delivered to every subscriber group with independent retries and backpressure.
-- **QueueEvents** -- real-time stream of job lifecycle events (completed, failed, delayed, waiting, etc.).
-## Features
-### Core
-- **Queues and workers** with configurable concurrency, prefetch, and lock duration ([Usage](docs/USAGE.md))
-- **Delayed, priority, and bulk enqueue** for scheduling and high-throughput ingestion ([Usage](docs/USAGE.md))
-- **Batch processing** -- process multiple jobs at once via `batch: { size, timeout? }` ([Usage](docs/USAGE.md#batch-processing))
-- **Request-reply** -- `queue.addAndWait(name, data, { waitTimeout })` for synchronous RPC ([Usage](docs/USAGE.md#request-reply-with-addandwait))
-- **LIFO mode** -- `lifo: true` processes newest jobs first ([Advanced](docs/ADVANCED.md#lifo-mode))
-- **Job TTL** -- auto-expire jobs after a time-to-live window ([Advanced](docs/ADVANCED.md#job-ttl))
-- **Custom job IDs** -- deterministic, idempotent enqueue; duplicates return `null` ([Advanced](docs/ADVANCED.md#custom-job-ids))
-- **Pluggable serializers** -- swap JSON for any `{ serialize, deserialize }` implementation ([Advanced](docs/ADVANCED.md#pluggable-serializers))
-- **Transparent compression** -- gzip payloads at the queue level ([Advanced](docs/ADVANCED.md#transparent-compression))
-### Reliability
-- **Retries with exponential, fixed, or custom backoff** and dead-letter queues ([Advanced](docs/ADVANCED.md#retries-and-backoff))
-- **UnrecoverableError** -- skip all retries and fail permanently ([Usage](docs/USAGE.md#unrecoverableerror))
-- **Stalled recovery** -- auto-reclaim stuck jobs via consumer group PEL and `XAUTOCLAIM` ([Usage](docs/USAGE.md#worker))
-- **Job revocation** -- cooperative cancellation with `AbortSignal` ([Advanced](docs/ADVANCED.md#job-revocation))
-- **Deduplication** -- simple, throttle, and debounce modes with configurable TTL ([Advanced](docs/ADVANCED.md#deduplication))
-- **Per-key ordering** -- sequential processing per ordering key with configurable group concurrency ([Advanced](docs/ADVANCED.md#ordering-and-group-concurrency))
-- **Rate limiting** -- per-group sliding window, token bucket, and global queue-wide limits ([Advanced](docs/ADVANCED.md#global-rate-limiting))
-- **Sandboxed processors** -- run processors in worker threads or child processes ([Architecture](docs/ARCHITECTURE.md))
-### Orchestration
+### AI Usage
-- **FlowProducer** -- parent-child job trees with `chain`, `group`, and `chord` helpers ([Workflows](docs/WORKFLOWS.md))
-- **DAG workflows** -- arbitrary dependency graphs with `FlowProducer.addDAG()` and `dag()` helper; multi-parent fan-in, diamond patterns, cycle detection ([Workflows](docs/WORKFLOWS.md))
-- **Step jobs** -- `job.moveToDelayed(timestamp, nextStep)` suspends a job mid-processor and resumes later ([Usage](docs/USAGE.md#pause-and-resume-a-job-later-step-jobs))
-- **Dynamic children** -- `job.moveToWaitingChildren()` pauses a parent to add children mid-execution ([Workflows](docs/WORKFLOWS.md))
-- **Batch processing** -- process multiple jobs at once for bulk I/O ([Usage](docs/USAGE.md#batch-processing))
-### Scheduling
-- **Cron and interval schedulers** -- 5-field cron with timezone, fixed intervals, and `repeatAfterComplete` mode ([Advanced](docs/ADVANCED.md#job-schedulers))
-- **Bounded schedulers** -- `limit`, `startDate`, and `endDate` for finite schedules ([Advanced](docs/ADVANCED.md#bounded-schedulers))
-### Pub/Sub
-- **Broadcast** -- fan-out delivery to all subscriber groups ([Usage](docs/USAGE.md#broadcast--broadcastworker))
-- **BroadcastWorker** -- independent consumer groups with own retries, concurrency, and backpressure ([Usage](docs/USAGE.md#broadcast--broadcastworker))
-- **Subject filtering** -- NATS-style patterns (`*` one segment, `>` trailing wildcard) for topic-based routing ([Usage](docs/USAGE.md#broadcast--broadcastworker))
-### Serverless
-- **Producer** -- enqueue without EventEmitter overhead, returns plain string IDs ([Usage](docs/USAGE.md))
-- **ServerlessPool** -- connection caching across warm Lambda/Edge invocations ([Serverless](docs/SERVERLESS.md))
-### Observability
+```typescript
+import { Queue, Worker } from 'glide-mq';
-- **QueueEvents** -- real-time stream-based lifecycle events ([Observability](docs/OBSERVABILITY.md))
-- **Time-series metrics** -- per-minute throughput and latency retained 24h, recorded server-side ([Observability](docs/OBSERVABILITY.md))
-- **OpenTelemetry** -- automatic span emission; bring your own tracer or auto-detect `@opentelemetry/api` ([Observability](docs/OBSERVABILITY.md))
-- **Job logs** -- append structured log entries per job with pagination ([Observability](docs/OBSERVABILITY.md))
-- **Job mutations** -- `changePriority()`, `changeDelay()`, `promote()` after enqueue; `retryJobs()` and `clean()` in bulk ([Usage](docs/USAGE.md))
-- **Graceful shutdown** -- `gracefulShutdown()` helper registers SIGTERM/SIGINT handlers ([Usage](docs/USAGE.md#graceful-shutdown))
-- **In-memory testing** -- `TestQueue` and `TestWorker` with zero Valkey dependency ([Testing](docs/TESTING.md))
+const queue = new Queue('ai', { connection });
+await queue.add(
+  'inference',
+  { prompt: 'Explain message queues' },
+  {
+    fallbacks: [{ model: 'gpt-5.4-nano', provider: 'openai' }],
+    lockDuration: 120000,
+  },
+);
+const worker = new Worker(
+  'ai',
+  async (job) => {
+    const result = await callLLM(job.data.prompt);
+    await job.reportUsage({
+      model: 'gpt-5.4',
+      tokens: { input: 50, output: 200 },
+      costs: { total: 0.003 },
+    });
+    await job.stream({ type: 'token', content: result });
+    return result;
+  },
+  { connection, tokenLimiter: { maxTokens: 100000, duration: 60000 } },
+);
+```
-### Cloud
+## When to use glide-mq
-- **Cluster-native** -- hash-tagged keys `glide:{queueName}:*` route all queue data to the same slot ([Usage](docs/USAGE.md#cluster-mode))
-- **IAM authentication** -- native SigV4 auth for AWS ElastiCache and MemoryDB ([Usage](docs/USAGE.md#cluster-mode))
-- **AZ-affinity routing** -- `readFrom: 'AZAffinity'` routes reads to same-AZ replicas ([Usage](docs/USAGE.md#cluster-mode))
+- **Background jobs and task processing** - email, image processing, data pipelines, webhooks, any async work.
+- **Scheduled and recurring work** - cron jobs, interval tasks, bounded schedulers.
+- **Distributed workflows** - parent-child trees, DAGs, fan-in/fan-out, step jobs, dynamic children.
+- **High-throughput queues over real networks** - 1 RTT per job via Valkey Server Functions, up to 38% faster than alternatives.
+- **LLM pipelines and model orchestration** - cost tracking, token streaming, model failover, budget caps without external middleware.
+- **Valkey/Redis clusters** - hash-tagged keys out of the box with zero configuration.
-## Framework integrations
+## How it's different
-| Package | Install | Setup |
-|---------|---------|-------|
-| [`@glidemq/hono`](https://github.com/avifenesh/glidemq-hono) | `npm i @glidemq/hono` | `app.use(glideMQ({ connection, queues: { ... } }))` |
-| [`@glidemq/fastify`](https://github.com/avifenesh/glidemq-fastify) | `npm i @glidemq/fastify` | `app.register(glideMQPlugin, { connection, queues: { ... } })` |
-| [`@glidemq/nestjs`](https://github.com/avifenesh/glidemq-nestjs) | `npm i @glidemq/nestjs` | `GlideMQModule.forRoot({ connection, queues: { ... } })` |
-| [`@glidemq/dashboard`](https://github.com/avifenesh/glidemq-dashboard) | `npm i @glidemq/dashboard` | `app.use('/dashboard', createDashboard([queue1, queue2]))` |
-| [`@glidemq/hapi`](https://github.com/avifenesh/glidemq-hapi) | `npm i @glidemq/hapi` | `await server.register({ plugin: glideMQPlugin, options: { connection, queues } })` |
+| Aspect              | glide-mq                                                                                                  |
+| ------------------- | --------------------------------------------------------------------------------------------------------- |
+| **Network per job** | 1 RTT - complete + fetch next in a single FCALL                                                           |
+| **Client**          | Rust NAPI bindings via [valkey-glide](https://github.com/valkey-io/valkey-glide) - no JS protocol parsing |
+| **Server logic**    | Persistent Valkey Function library (FUNCTION LOAD + FCALL) - no per-call EVAL                             |
+| **Cluster**         | Hash-tagged keys (`glide:{queueName}:*`) route to the same slot automatically                             |
+| **AI-native**       | Cost tracking, token streaming, suspend/resume, fallback chains, TPM limits, budget caps                  |
+| **Vector search**   | KNN similarity queries over job data via Valkey Search                                                    |
-All framework packages provide REST endpoints, SSE events, and serverless Producer support. See each package's README for full documentation.
+## AI-native primitives
-## Cross-language
+Seven primitives for LLM and agent workflows, built into the core API.
-Non-Node.js services can enqueue jobs into glide-mq queues using the HTTP proxy or direct FCALL:
+- **Cost tracking** - `job.reportUsage()` records model, tokens, cost, latency per job. `queue.getFlowUsage()` aggregates across flows.
+- **Token streaming** - `job.stream(chunk)` pushes LLM output tokens in real time. `queue.readStream(jobId)` consumes them with optional long-polling.
+- **Suspend/resume** - `job.suspend()` pauses mid-processor for human approval or webhook callback. `queue.signal(jobId, name, data)` resumes with external input.
+- **Fallback chains** - ordered `fallbacks` array on job options. On failure, the next retry reads `job.currentFallback` for the alternate model/provider.
+- **TPM rate limiting** - `tokenLimiter` on worker options enforces tokens-per-minute caps. Combine with RPM `limiter` for dual-axis rate control.
+- **Budget caps** - `FlowProducer.add(flow, { budget })` sets `maxTotalTokens` and `maxTotalCost` across all jobs in a flow. Jobs fail or pause when exceeded.
+- **Per-job lock duration** - override `lockDuration` per job for adaptive stall detection. Short for classifiers, long for multi-minute LLM calls.
-```typescript
-import { createProxyServer } from 'glide-mq/proxy';
+See [Usage - AI-native primitives](docs/USAGE.md#ai-native-primitives) for full examples.
-const proxy = createProxyServer({
-  connection: { addresses: [{ host: 'localhost', port: 6379 }] },
-  queues: ['emails', 'reports'],
-});
-proxy.app.listen(3000);
-```
+## Features
-```bash
-curl -X POST http://localhost:3000/queues/emails/jobs \
-  -H 'Content-Type: application/json' \
-  -d '{"name": "send-email", "data": {"to": "user@example.com"}}'
-```
+- **1 RTT per job** - complete current + fetch next in a single server-side function call
+- **Cluster-native** - hash-tagged keys, zero cluster configuration
+- **Workflows** - FlowProducer trees, DAGs with fan-in, chain/group/chord, step jobs, dynamic children
+- **Scheduling** - 5-field cron with timezone, fixed intervals, bounded schedulers
+- **Retries** - exponential, fixed, or custom backoff with dead-letter queues
+- **Rate limiting** - per-group sliding window, token bucket, global queue-wide limits
+- **Broadcast** - fan-out pub/sub with NATS-style subject filtering and independent subscriber retries
+- **Batch processing** - process multiple jobs at once for bulk I/O
+- **Request-reply** - `queue.addAndWait()` for synchronous RPC patterns
+- **Deduplication** - simple, throttle, and debounce modes
+- **Compression** - transparent gzip at the queue level
+- **Serverless** - lightweight `Producer` and `ServerlessPool` for Lambda/Edge
+- **OpenTelemetry** - automatic span emission with bring-your-own tracer
+- **In-memory testing** - `TestQueue` and `TestWorker` with zero Valkey dependency
+- **Cross-language** - HTTP proxy and wire protocol for non-Node.js services
-Endpoints: `POST /queues/:name/jobs`, `POST /queues/:name/jobs/bulk`, `GET /queues/:name/jobs/:id`, `POST /queues/:name/pause`, `POST /queues/:name/resume`, `GET /queues/:name/counts`, `GET /health`.
+## Performance
-For zero-overhead integration, call Valkey Server Functions directly from any language with a Valkey client. See [Wire Protocol](docs/WIRE_PROTOCOL.md) for FCALL signatures, key layout, and examples in Python and Go.
+Benchmarked on AWS ElastiCache Valkey 8.2 (r7g.large) with TLS, EC2 client in the same region.
+| Concurrency |   glide-mq |     BullMQ | Delta |
+| :---------: | ---------: | ---------: | :---: |
+|     c=5     | 10,754 j/s |  9,866 j/s |  +9%  |
+|    c=10     | 18,218 j/s | 13,541 j/s | +35%  |
+|    c=15     | 19,583 j/s | 14,162 j/s | +38%  |
+|    c=20     | 19,408 j/s | 16,085 j/s | +21%  |
+The advantage comes from completing and fetching the next job in a single FCALL. The savings compound over real network latency - exactly the conditions in every production deployment. At high concurrency both libraries converge toward the Valkey single-thread ceiling.
+Reproduce with `npm run bench` or `npx tsx benchmarks/elasticache-head-to-head.ts` against your own infrastructure.
+## Examples
+27 runnable examples in `examples/`. Run any with `npx tsx examples/<name>.ts`.
+| Example                 | What it shows                                   |
+| ----------------------- | ----------------------------------------------- |
+| `usage-tracking.ts`     | Token and cost tracking across multi-step flows |
+| `token-streaming.ts`    | Real-time LLM token streaming to clients        |
+| `human-approval.ts`     | Suspend/resume with editorial review gate       |
+| `model-failover.ts`     | Fallback chains across providers                |
+| `tpm-throttle.ts`       | Dual-axis RPM + TPM rate limiting               |
+| `budget-cap.ts`         | Flow-level token and cost caps                  |
+| `vector-search.ts`      | KNN similarity search with pre-filters          |
+| `with-langchain.ts`     | LangChain integration with token tracking       |
+| `with-vercel-ai-sdk.ts` | Vercel AI SDK integration with streaming        |
+| `rag-pipeline.ts`       | RAG with embedding, indexing, retrieval         |
+| `ai-agent-loop.ts`      | Autonomous agent loop with budget enforcement   |
+| `testing-mode.ts`       | In-memory testing without Valkey                |
+| `agent-budget-loop.ts`  | Agent loop with per-step budget tracking        |
+| `multi-model-cost.ts`   | Cost breakdown across multiple models           |
+| `fallback-usage.ts`     | Usage tracking through fallback chains          |
+| `streaming-sse.ts`      | Server-sent events with token streaming         |
+| `batch-embed-tpm.ts`    | Batch embeddings with TPM rate limiting         |
+| `thinking-model.ts`     | Thinking/reasoning model token tracking         |
+| `cost-breakdown.ts`     | Detailed per-category cost breakdown            |
+| `budget-weighted.ts`    | Weighted budget allocation across flow steps    |
+| `reasoning-stream.ts`   | Streaming reasoning/chain-of-thought tokens     |
+| `adaptive-timeout.ts`   | Adaptive lock duration based on model complexity |
+| `broadcast-events.ts`   | Fan-out event publishing with subject filtering  |
+| `agent-memory.ts`       | Multi-turn agent with persistent memory          |
+| `search-dashboard.ts`   | Job search and monitoring dashboard              |
+| `embedding-pipeline.ts` | Batch document embedding with rate limiting      |
+| `content-pipeline.ts`   | Content moderation with streaming and approval   |
+## When NOT to use glide-mq
+- **You need a log-based event streaming platform.** glide-mq is a job/task queue, not a partitioned event log. It does not provide Kafka-style topic partitions, consumer offset management, or event replay.
+- **You need browser support.** The Rust NAPI client requires a server-side runtime (Node.js 20+, Bun, or Deno with NAPI support).
+- **You need exactly-once semantics.** glide-mq provides at-least-once delivery. Duplicate processing is rare but possible - design processors to be idempotent.
+- **You need to run without Valkey or Redis.** Production use requires Valkey 7.0+ or Redis 7.0+. For dev/testing, `TestQueue`/`TestWorker` run fully in-memory.
 ## Documentation
-| Guide | Topics |
-|-------|--------|
-| [Usage](docs/USAGE.md) | Queue, Worker, Producer, batch, request-reply, graceful shutdown, cluster mode |
-| [Broadcast](docs/BROADCAST.md) | Pub/sub fan-out, BroadcastWorker, subject filtering |
-| [Step Jobs](docs/STEP_JOBS.md) | `moveToDelayed`, `moveToWaitingChildren`, multi-step processors |
-| [Advanced](docs/ADVANCED.md) | Schedulers, rate limiting, dedup, compression, retries, DLQ, custom IDs, LIFO, TTL, serializers |
-| [Workflows](docs/WORKFLOWS.md) | FlowProducer, DAG, `chain`, `group`, `chord`, dynamic children |
-| [Observability](docs/OBSERVABILITY.md) | OpenTelemetry, time-series metrics, job logs, dashboard |
-| [Serverless](docs/SERVERLESS.md) | Producer, ServerlessPool, Lambda and Edge deployment |
-| [Testing](docs/TESTING.md) | In-memory `TestQueue` and `TestWorker` -- no Valkey needed |
-| [Wire Protocol](docs/WIRE_PROTOCOL.md) | Cross-language FCALL specs, key layout, Python and Go examples |
-| [Architecture](docs/ARCHITECTURE.md) | Key design, Valkey functions, LIFO, Broadcast, DAG internals |
-| [Durability](docs/DURABILITY.md) | Persistence modes, crash windows, feature-specific durability |
-| [Migration](docs/MIGRATION.md) | Coming from BullMQ? API mapping and step-by-step guide |
-## Limitations
-- Requires a running Valkey 7.0+ or Redis 7.0+ instance. There is no embedded mode.
-- Node.js only. The Rust-native NAPI client (`@valkey/valkey-glide`) does not run in browsers or Deno.
-- At-least-once delivery semantics. Jobs may be processed more than once after crashes or stalled recovery.
-- Not a streaming platform. glide-mq is a job/task queue, not a replacement for Kafka or NATS JetStream.
-- Single dependency on `@glidemq/speedkey` (which wraps `@valkey/valkey-glide`). Native addon compilation is required on install.
+| Guide                                  | Topics                                                      |
+| -------------------------------------- | ----------------------------------------------------------- |
+| [Usage](docs/USAGE.md)                 | Queue, Worker, Producer, batch, request-reply, cluster mode |
+| [Workflows](docs/WORKFLOWS.md)         | FlowProducer, DAG, chain/group/chord, dynamic children      |
+| [Advanced](docs/ADVANCED.md)           | Schedulers, rate limiting, dedup, compression, retries, DLQ |
+| [Broadcast](docs/BROADCAST.md)         | Pub/sub fan-out, subject filtering                          |
+| [Observability](docs/OBSERVABILITY.md) | OpenTelemetry, metrics, job logs, dashboard                 |
+| [Serverless](docs/SERVERLESS.md)       | Producer, ServerlessPool, Lambda/Edge                       |
+| [Testing](docs/TESTING.md)             | In-memory TestQueue and TestWorker                          |
+| [Wire Protocol](docs/WIRE_PROTOCOL.md) | Cross-language FCALL specs, Python/Go examples              |
+| [Step Jobs](docs/STEP_JOBS.md)         | Step-job workflows with moveToDelayed                       |
+| [Durability](docs/DURABILITY.md)       | Durability guarantees, persistence, delivery semantics      |
+| [Architecture](docs/ARCHITECTURE.md)   | Internal architecture and design reference                  |
+| [Migration](docs/MIGRATION.md)         | Coming from BullMQ - API mapping guide                      |
 ## Ecosystem
-| Package | Description | Links |
-|---------|-------------|-------|
-| [glide-mq](https://github.com/avifenesh/glide-mq) | Core queue library | [npm](https://www.npmjs.com/package/glide-mq) |
-| [@glidemq/hono](https://github.com/avifenesh/glidemq-hono) | Hono middleware -- REST endpoints, SSE, serverless Producer | [npm](https://www.npmjs.com/package/@glidemq/hono) |
-| [@glidemq/fastify](https://github.com/avifenesh/glidemq-fastify) | Fastify plugin -- REST endpoints, SSE, serverless Producer | [npm](https://www.npmjs.com/package/@glidemq/fastify) |
-| [@glidemq/nestjs](https://github.com/avifenesh/glidemq-nestjs) | NestJS module -- decorators, DI, lifecycle management | [npm](https://www.npmjs.com/package/@glidemq/nestjs) |
-| [@glidemq/dashboard](https://github.com/avifenesh/glidemq-dashboard) | Web UI -- metrics charts, scheduler management, job mutations | [npm](https://www.npmjs.com/package/@glidemq/dashboard) |
-| [@glidemq/speedkey](https://github.com/avifenesh/speedkey) | Valkey GLIDE client with native NAPI bindings | [npm](https://www.npmjs.com/package/@glidemq/speedkey) |
-| [glidemq-examples](https://github.com/avifenesh/glidemq-examples) | 34 runnable examples across frameworks and use cases | [GitHub](https://github.com/avifenesh/glidemq-examples) |
-> If glide-mq is useful to you, consider [starring the repo](https://github.com/avifenesh/glide-mq). It helps others find the project.
+| Package                                                              | Description                                   |
+| -------------------------------------------------------------------- | --------------------------------------------- |
+| [@glidemq/speedkey](https://github.com/avifenesh/speedkey)           | Valkey GLIDE client with native NAPI bindings |
+| [@glidemq/dashboard](https://github.com/avifenesh/glidemq-dashboard) | Web UI for metrics, schedulers, job mutations |
+| [@glidemq/hono](https://github.com/avifenesh/glidemq-hono)           | Hono middleware                               |
+| [@glidemq/fastify](https://github.com/avifenesh/glidemq-fastify)     | Fastify plugin                                |
+| [@glidemq/nestjs](https://github.com/avifenesh/glidemq-nestjs)       | NestJS module                                 |
+| [@glidemq/hapi](https://github.com/avifenesh/glidemq-hapi)           | Hapi plugin                                   |
+| [glide-mq.dev](https://avifenesh.github.io/glide-mq.dev/)            | Full documentation site                       |
 ## Contributing
-Bug reports, feature requests, and pull requests are welcome. See [CHANGELOG.md](CHANGELOG.md) for release history.
+Bug reports, feature requests, and pull requests are welcome.
 - [Open an issue](https://github.com/avifenesh/glide-mq/issues)
 - [Discussions](https://github.com/avifenesh/glide-mq/discussions)
+- [Changelog](CHANGELOG.md)
 ## License

package/dist/base-worker.d.ts CHANGED Viewed

@@ -1,10 +1,10 @@
 import { EventEmitter } from 'events';
-import type { WorkerOptions, Processor, BatchProcessor, Client, Serializer } from './types';
+import type { WorkerOptions, Processor, BatchProcessor, Client, Serializer, SignalEntry } from './types';
 import { Job } from './job';
 import { buildKeys } from './utils';
 import type { QueueKeys } from './functions/index';
 import { Scheduler } from './scheduler';
-export type WorkerEvent = 'completed' | 'failed' | 'error' | 'stalled' | 'closing' | 'closed' | 'active' | 'drained';
+export type WorkerEvent = 'completed' | 'failed' | 'error' | 'stalled' | 'closing' | 'closed' | 'active' | 'drained' | 'budget-exceeded';
 /**
  * Configuration that differs between Worker and BroadcastWorker.
  * Passed from the subclass constructor to BaseWorker.
@@ -59,9 +59,12 @@ export declare abstract class BaseWorker<D = any, R = any> extends EventEmitter
     protected globalRateLimitEnabled: boolean;
     protected cachedRateLimitMax: number;
     protected cachedRateLimitDuration: number;
+    protected tpmLocalCounter: number;
+    protected tpmWindowStart: number;
     protected sandboxClose?: (force?: boolean) => Promise<void>;
     protected workerHeartbeatTimer: ReturnType<typeof setInterval> | null;
     protected pollLoopPromise: Promise<void> | null;
+    protected suspendContinuations: Map<string, (signals: SignalEntry[]) => Promise<any>>;
     protected readonly startedAt: number;
     protected readonly hostname: string;
     protected serializer: Serializer;
@@ -74,6 +77,9 @@ export declare abstract class BaseWorker<D = any, R = any> extends EventEmitter
     protected readonly startFrom: string;
     protected readonly skipEvents: boolean;
     protected readonly skipMetrics: boolean;
+    private hasCompletedListeners;
+    private hasActiveListeners;
+    private hasFailedListeners;
     protected constructor(name: string, processor: Processor<D, R> | BatchProcessor<D, R> | string, opts: WorkerOptions, config: BaseWorkerConfig);
     /**
      * Wait for the worker to be fully initialized and connected.
@@ -132,7 +138,7 @@ export declare abstract class BaseWorker<D = any, R = any> extends EventEmitter
      * Handle a moveToActive result that is not a valid hash (null or REVOKED).
      * Returns true if the result was handled (caller should return), false if the hash is valid.
      */
-    protected handleMoveToActiveEdgeCase(moveResult: Record<string, string> | 'REVOKED' | 'EXPIRED' | 'GROUP_FULL' | 'GROUP_RATE_LIMITED' | 'GROUP_TOKEN_LIMITED' | 'ERR:COST_EXCEEDS_CAPACITY' | null, jobId: string, entryId: string): Promise<boolean>;
+    protected handleMoveToActiveEdgeCase(moveResult: Record<string, string> | 'REVOKED' | 'EXPIRED' | 'GROUP_FULL' | 'GROUP_RATE_LIMITED' | 'GROUP_TOKEN_LIMITED' | 'GROUP_ORDERED' | 'ERR:COST_EXCEEDS_CAPACITY' | null, jobId: string, entryId: string): Promise<boolean>;
     /**
      * Run the processor with optional timeout, AbortController, and heartbeat.
      * Returns { result, error } - exactly one will be set.
@@ -214,7 +220,7 @@ export declare abstract class BaseWorker<D = any, R = any> extends EventEmitter
      * Returns true if the job was found and aborted, false if not currently active.
      */
     abortJob(jobId: string): boolean;
-    protected startHeartbeat(jobId: string): void;
+    protected startHeartbeat(jobId: string, jobLockDuration?: number): void;
     protected stopHeartbeat(jobId: string): void;
     protected moveToDLQ(job: Job<D, R>, error: Error): Promise<void>;
     /**
@@ -222,8 +228,23 @@ export declare abstract class BaseWorker<D = any, R = any> extends EventEmitter
      * Also respects any manual rate limit set via rateLimit(ms).
      */
     protected waitForRateLimit(): Promise<void>;
+    /**
+     * Check the TPM (token-per-minute) rate limit and wait if either the local or
+     * per-queue counter exceeds the configured maxTokens for the current window.
+     */
+    protected waitForTokenLimit(): Promise<void>;
+    /**
+     * Increment the TPM counter after a job completes (or reports tokens).
+     * Called from the completion path when tokenLimiter is configured.
+     */
+    protected incrementTpmCounter(tokens: number): Promise<void>;
     /** Refresh cached meta flags from Valkey. Called on init and each scheduler tick. */
     private refreshMetaFlags;
+    /**
+     * Read the onExceeded policy from a budget hash.
+     * Returns 'fail' (default) or 'pause'.
+     */
+    private getBudgetOnExceeded;
     /**
      * Register this worker in Valkey with a TTL-based heartbeat key.
      * The key expires after stalledInterval ms; a periodic timer refreshes it at half that interval.