@qianxude/tem 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 qianxude
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,303 @@
1
+ # tem
2
+
3
+ A lightweight, embeddable task execution engine for IO-bound workloads (LLM calls, API requests) with SQLite persistence, automatic retry, and rate limiting.
4
+
5
+ Built for **single-process, IO-bound scenarios** where you need reliable task execution without the complexity of distributed systems.
6
+
7
+ ---
8
+
9
+ ## Features
10
+
11
+ - **SQLite Persistence** — Tasks survive process restarts using `bun:sqlite` with WAL mode
12
+ - **Claim-based Execution** — Atomic task claiming prevents duplicate execution, safe for concurrent async operations
13
+ - **Batch Management** — Group tasks into batches with custom `code` tags for easy identification and recovery
14
+ - **Automatic Retry** — Configurable max attempts with automatic retry for failed tasks
15
+ - **Resume & Recover** — Resume interrupted batches (crash recovery) or retry all failed tasks after fixing issues
16
+ - **Built-in Concurrency Control** — Native semaphore-based concurrency, no need for p-limit/p-queue
17
+ - **Rate Limiting** — Token bucket rate limiter for per-minute/per-second API limits (essential for LLM providers)
18
+ - **Zero External Dependencies** — No Redis, no message queues, no complex infrastructure
19
+
20
+ ---
21
+
22
+ ## When to Use tem
23
+
24
+ Use tem when you:
25
+
26
+ - Run IO-bound tasks (LLM calls, API requests) from a single process
27
+ - Need persistence across restarts without external databases
28
+ - Want built-in retry and rate limiting without complex setup
29
+ - Process tasks in batches and need checkpoint/resume capabilities
30
+ - Don't need multi-process clusters or DAG dependencies (yet)
31
+
32
+ Don't use tem when you need:
33
+
34
+ - Multi-process worker clusters (CPU-bound tasks)
35
+ - Complex task dependencies (DAG)
36
+ - Sub-millisecond latency requirements
37
+ - Distributed execution across machines
38
+
39
+ ---
40
+
41
+ ## Quick Start
42
+
43
+ ```typescript
44
+ import { TEM } from "@qianxude/tem";
45
+
46
+ // Initialize
47
+ const tem = new TEM({
48
+ dbPath: "./tem.db",
49
+ concurrency: 5, // Max 5 concurrent tasks
50
+ pollInterval: 1000, // Check for new tasks every 1s
51
+ rateLimit: {
52
+ perMinute: 60, // Respect LLM provider limits
53
+ perSecond: 5
54
+ }
55
+ });
56
+
57
+ // Create a batch
58
+ const batch = await tem.batch.create({
59
+ code: "2026-02-15-llm-fix", // Your custom tag
60
+ type: "rewrite-docs"
61
+ });
62
+
63
+ // Enqueue tasks
64
+ await tem.task.enqueueMany([
65
+ { batchId: batch.id, type: "rewrite", payload: { docId: 1 } },
66
+ { batchId: batch.id, type: "rewrite", payload: { docId: 2 } },
67
+ { batchId: batch.id, type: "rewrite", payload: { docId: 3 } }
68
+ ]);
69
+
70
+ // Register handler
71
+ tem.worker.register("rewrite", async (task) => {
72
+ const result = await callLLM(task.payload);
73
+ return result; // Stored in task.result
74
+ });
75
+
76
+ // Start processing
77
+ tem.worker.start();
78
+ ```
79
+
80
+ ---
81
+
82
+ ## Task Lifecycle
83
+
84
+ ```
85
+ pending
86
+ ↓ claim (atomic)
87
+ running
88
+ ↓ success
89
+ completed
90
+
91
+ running
92
+ ↓ error + attempt < max_attempt
93
+ pending (auto-retry)
94
+
95
+ running
96
+ ↓ error + attempt >= max_attempt
97
+ failed
98
+ ```
99
+
100
+ ---
101
+
102
+ ## Recovery Patterns
103
+
104
+ ### Resume After Crash
105
+
106
+ If the process crashes while tasks are `running`, resume them on restart:
107
+
108
+ ```typescript
109
+ // Reset all 'running' tasks back to 'pending'
110
+ await tem.batch.resume(batchId);
111
+ tem.worker.start(); // Continue processing
112
+ ```
113
+
114
+ ### Retry Failed Tasks
115
+
116
+ After fixing the root cause (e.g., API key issue), retry all failed tasks:
117
+
118
+ ```typescript
119
+ // Reset failed tasks to pending, attempt counter reset to 0
120
+ await tem.batch.retryFailed(batchId);
121
+ tem.worker.start();
122
+ ```
123
+
124
+ ---
125
+
126
+ ## Architecture
127
+
128
+ ```
129
+ TEM
130
+ ├── DatabaseLayer # bun:sqlite with WAL mode
131
+ ├── BatchService # Batch CRUD + recovery
132
+ ├── TaskService # Task enqueue + claim + state updates
133
+ ├── Worker # Execution loop with concurrency/rate limiting
134
+ ├── ConcurrencyController # Semaphore for local concurrency
135
+ ├── RateLimiter # Token bucket for API rate limits
136
+ └── RetryStrategy # Configurable retry logic
137
+ ```
138
+
139
+ ### Why Claim-Based?
140
+
141
+ Instead of:
142
+ ```typescript
143
+ // WRONG: Race conditions in concurrent scenarios
144
+ const task = await db.query("SELECT * FROM task WHERE status='pending'");
145
+ await db.run("UPDATE task SET status='running' WHERE id=?", task.id);
146
+ ```
147
+
148
+ tem uses atomic claim:
149
+ ```typescript
150
+ // CORRECT: Atomic state transition with optimistic locking
151
+ UPDATE task
152
+ SET status='running', claimed_at=?, version=version+1
153
+ WHERE id=? AND status='pending' AND version=?
154
+ ```
155
+
156
+ This ensures:
157
+ - No duplicate execution even with concurrent async operations
158
+ - Safe for future multi-worker extensions
159
+ - Clear ownership of running tasks
160
+
161
+ ---
162
+
163
+ ## Database Schema
164
+
165
+ ### batch
166
+
167
+ | Column | Type | Description |
168
+ |--------|------|-------------|
169
+ | id | TEXT PK | UUID |
170
+ | code | TEXT | User-provided batch tag (e.g., "2026-02-15-run") |
171
+ | type | TEXT | Batch type for categorization |
172
+ | created_at | INTEGER | Timestamp |
173
+ | completed_at | INTEGER | Timestamp when all tasks done |
174
+ | metadata | TEXT | JSON metadata |
175
+
176
+ ### task
177
+
178
+ | Column | Type | Description |
179
+ |--------|------|-------------|
180
+ | id | TEXT PK | UUID |
181
+ | batch_id | TEXT FK | Parent batch |
182
+ | type | TEXT | Task type for handler routing |
183
+ | status | TEXT | pending/running/completed/failed |
184
+ | payload | TEXT | JSON input data (opaque to framework) |
185
+ | result | TEXT | JSON output from handler |
186
+ | error | TEXT | Error message on failure |
187
+ | attempt | INTEGER | Current attempt count |
188
+ | max_attempt | INTEGER | Max retry attempts |
189
+ | claimed_at | INTEGER | When task was claimed |
190
+ | completed_at | INTEGER | When task finished |
191
+ | version | INTEGER | Optimistic lock version |
192
+ | created_at | INTEGER | Timestamp |
193
+
194
+ ---
195
+
196
+ ## API Reference
197
+
198
+ ### TEM Configuration
199
+
200
+ ```typescript
201
+ interface TEMConfig {
202
+ dbPath: string; // SQLite file path
203
+ concurrency?: number; // Default: 5
204
+ pollInterval?: number; // Default: 1000ms
205
+ rateLimit?: {
206
+ perMinute?: number;
207
+ perSecond?: number;
208
+ };
209
+ }
210
+ ```
211
+
212
+ ### Batch Operations
213
+
214
+ ```typescript
215
+ // Create batch
216
+ const batch = await tem.batch.create({
217
+ code: "unique-batch-code",
218
+ type: "batch-type",
219
+ metadata?: { ... }
220
+ });
221
+
222
+ // Get batch info
223
+ const batch = await tem.batch.get(batchId);
224
+
225
+ // List batches
226
+ const batches = await tem.batch.list({ type?: "..." });
227
+
228
+ // Get statistics
229
+ const stats = await tem.batch.getStats(batchId);
230
+ // { pending: 5, running: 2, completed: 10, failed: 3 }
231
+
232
+ // Resume after crash (running → pending)
233
+ await tem.batch.resume(batchId);
234
+
235
+ // Retry all failed (failed → pending, attempt=0)
236
+ await tem.batch.retryFailed(batchId);
237
+ ```
238
+
239
+ ### Task Operations
240
+
241
+ ```typescript
242
+ // Enqueue single task
243
+ await tem.task.enqueue({
244
+ batchId: string,
245
+ type: string,
246
+ payload: object,
247
+ maxAttempt?: number // Default: 3
248
+ });
249
+
250
+ // Bulk enqueue (transaction)
251
+ await tem.task.enqueueMany([
252
+ { batchId, type, payload },
253
+ ...
254
+ ]);
255
+ ```
256
+
257
+ ### Worker
258
+
259
+ ```typescript
260
+ // Register handler
261
+ tem.worker.register("task-type", async (task) => {
262
+ // task.id, task.batchId, task.payload, task.attempt
263
+ const result = await doWork(task.payload);
264
+ return result; // Will be JSON-serialized to task.result
265
+ });
266
+
267
+ // Control execution
268
+ tem.worker.start();
269
+ await tem.worker.stop();
270
+ ```
271
+
272
+ ---
273
+
274
+ ## Design Principles
275
+
276
+ 1. **Single Process First** — No multi-process complexity until you actually need it
277
+ 2. **Database as Source of Truth** — SQLite with WAL mode, atomic updates only
278
+ 3. **Claim Model** — Never assume you own a task until you atomically claim it
279
+ 4. **Opaque Payload** — Framework doesn't parse payload; handlers decide business logic
280
+ 5. **Batch as Unit** — All operations (resume, retry) work at batch level for convenience
281
+
282
+ ---
283
+
284
+ ## Roadmap
285
+
286
+ - [x] Core execution engine
287
+ - [x] SQLite persistence
288
+ - [x] Claim-based task acquisition
289
+ - [x] Concurrency control
290
+ - [x] Rate limiting
291
+ - [x] Retry mechanism
292
+ - [x] Batch resume/retry
293
+ - [ ] Priority queue
294
+ - [ ] Delayed/scheduled tasks
295
+ - [ ] Task timeout handling
296
+ - [ ] Metrics and observability
297
+ - [ ] Multi-process worker cluster (future)
298
+
299
+ ---
300
+
301
+ ## License
302
+
303
+ MIT
package/package.json ADDED
@@ -0,0 +1,42 @@
1
+ {
2
+ "name": "@qianxude/tem",
3
+ "version": "0.2.0",
4
+ "description": "A lightweight task execution engine for IO-bound workloads with SQLite persistence, retry, and rate limiting",
5
+ "module": "src/index.ts",
6
+ "type": "module",
7
+ "exports": {
8
+ ".": "./src/index.ts"
9
+ },
10
+ "files": [
11
+ "src/**/*",
12
+ "README.md",
13
+ "LICENSE"
14
+ ],
15
+ "publishConfig": {
16
+ "access": "public",
17
+ "registry": "https://registry.npmjs.org/"
18
+ },
19
+ "scripts": {
20
+ "typecheck": "tsc --noEmit",
21
+ "test": "bun test",
22
+ "test:integration": "bun test tests/integration/*.test.ts",
23
+ "test:mock-server": "bun test tests/integration/mock-server.test.ts",
24
+ "test:simple-tasks": "bun test tests/integration/tem-with-mock-server.test.ts",
25
+ "test:complex-tasks": "bun test tests/integration/llm-provider-simulation.test.ts",
26
+ "test:auto-detect": "bun test tests/integration/auto-detect.test.ts",
27
+ "lint": "oxlint",
28
+ "lint:file": "oxlint",
29
+ "dev": "bun --watch src/index.ts",
30
+ "publish:pkg": "bun publish --access public",
31
+ "version:patch": "./scripts/version.sh patch",
32
+ "version:minor": "./scripts/version.sh minor",
33
+ "version:major": "./scripts/version.sh major"
34
+ },
35
+ "devDependencies": {
36
+ "@types/bun": "latest",
37
+ "oxlint": "latest"
38
+ },
39
+ "peerDependencies": {
40
+ "typescript": "^5.0.0"
41
+ }
42
+ }
@@ -0,0 +1,4 @@
1
+ export { Worker, NonRetryableError } from './worker.js';
2
+ export type { WorkerConfig } from './worker.js';
3
+ export { TEM } from './tem.js';
4
+ export type { TEMConfig, DetectOptions, DetectedConfig } from './tem.js';
@@ -0,0 +1,100 @@
1
+ import { Database, type DatabaseOptions } from '../database/index.js';
2
+ import { BatchService, TaskService } from '../services/index.js';
3
+ import { Worker, type WorkerConfig } from './worker.js';
4
+ import {
5
+ detectConstraints,
6
+ type DetectOptions,
7
+ type DetectedConfig,
8
+ } from '../utils/auto-detect.js';
9
+
10
+ export type { DetectOptions, DetectedConfig };
11
+
12
+ export interface TEMConfig {
13
+ // Database
14
+ databasePath: string;
15
+
16
+ // Concurrency
17
+ concurrency: number;
18
+
19
+ // Rate limiting
20
+ rateLimit?: {
21
+ requests: number;
22
+ windowMs: number;
23
+ };
24
+
25
+ // Retry
26
+ defaultMaxAttempts: number;
27
+
28
+ // Polling
29
+ pollIntervalMs: number;
30
+ }
31
+
32
+ export class TEM {
33
+ readonly batch: BatchService;
34
+ readonly task: TaskService;
35
+ readonly worker: Worker;
36
+
37
+ private database: Database;
38
+
39
+ /**
40
+ * Auto-detect API constraints including maximum concurrency and rate limits.
41
+ * Uses binary search for concurrency detection and burst testing for rate limits.
42
+ *
43
+ * @example
44
+ * ```typescript
45
+ * const config = await TEM.detectConstraints({
46
+ * url: 'https://api.openai.com/v1/chat/completions',
47
+ * method: 'POST',
48
+ * headers: {
49
+ * 'Authorization': 'Bearer ' + process.env.OPENAI_API_KEY,
50
+ * 'Content-Type': 'application/json'
51
+ * },
52
+ * body: {
53
+ * model: 'gpt-4o-mini',
54
+ * messages: [{ role: 'user', content: 'Hi' }],
55
+ * max_tokens: 10
56
+ * }
57
+ * });
58
+ *
59
+ * const tem = new TEM({
60
+ * databasePath: './tasks.db',
61
+ * concurrency: config.concurrency,
62
+ * rateLimit: config.rateLimit,
63
+ * defaultMaxAttempts: 3,
64
+ * pollIntervalMs: 100
65
+ * });
66
+ * ```
67
+ */
68
+ static async detectConstraints(options: DetectOptions): Promise<DetectedConfig> {
69
+ return detectConstraints(options);
70
+ }
71
+
72
+ constructor(config: TEMConfig) {
73
+ // Initialize database
74
+ const dbOptions: DatabaseOptions = {
75
+ path: config.databasePath,
76
+ };
77
+ this.database = new Database(dbOptions);
78
+
79
+ // Initialize services
80
+ this.batch = new BatchService(this.database);
81
+ this.task = new TaskService(this.database);
82
+
83
+ // Initialize worker with config
84
+ const workerConfig: WorkerConfig = {
85
+ concurrency: config.concurrency,
86
+ pollIntervalMs: config.pollIntervalMs,
87
+ rateLimit: config.rateLimit,
88
+ };
89
+ this.worker = new Worker(this.task, workerConfig);
90
+ }
91
+
92
+ /**
93
+ * Stop the TEM engine.
94
+ * Stops the worker and closes the database connection.
95
+ */
96
+ async stop(): Promise<void> {
97
+ await this.worker.stop();
98
+ this.database.close();
99
+ }
100
+ }
@@ -0,0 +1,168 @@
1
+ import * as i from '../interfaces/index.js';
2
+ import { TaskService } from '../services/task.js';
3
+ import { ConcurrencyController, RateLimiter, type RateLimitConfig } from '../utils/index.js';
4
+
5
+ /**
6
+ * Error class to mark errors as non-retryable.
7
+ * When thrown from a task handler, the task will fail immediately
8
+ * without retry attempts.
9
+ */
10
+ export class NonRetryableError extends Error {
11
+ constructor(message: string) {
12
+ super(message);
13
+ this.name = 'NonRetryableError';
14
+ }
15
+ }
16
+
17
+ export interface WorkerConfig {
18
+ concurrency: number;
19
+ pollIntervalMs: number;
20
+ rateLimit?: RateLimitConfig;
21
+ }
22
+
23
+ export class Worker {
24
+ private handlers = new Map<string, i.TaskHandler>();
25
+ private concurrency: ConcurrencyController;
26
+ private rateLimiter?: RateLimiter;
27
+ private running = false;
28
+ private pollIntervalMs: number;
29
+ private abortController: AbortController;
30
+ private inFlightTasks: Set<Promise<void>> = new Set();
31
+
32
+ constructor(
33
+ private taskService: TaskService,
34
+ config: WorkerConfig
35
+ ) {
36
+ this.concurrency = new ConcurrencyController(config.concurrency);
37
+ this.pollIntervalMs = config.pollIntervalMs;
38
+ this.abortController = new AbortController();
39
+
40
+ if (config.rateLimit) {
41
+ this.rateLimiter = new RateLimiter(config.rateLimit);
42
+ }
43
+ }
44
+
45
+ /**
46
+ * Register a handler for a specific task type.
47
+ */
48
+ register<TInput = unknown, TOutput = unknown>(
49
+ type: string,
50
+ handler: i.TaskHandler<TInput, TOutput>
51
+ ): void {
52
+ this.handlers.set(type, handler as i.TaskHandler);
53
+ }
54
+
55
+ /**
56
+ * Start the worker. Begins polling for and executing tasks.
57
+ */
58
+ start(): void {
59
+ if (this.running) return;
60
+
61
+ this.running = true;
62
+ this.abortController = new AbortController();
63
+ this.runLoop();
64
+ }
65
+
66
+ /**
67
+ * Stop the worker. Waits for in-flight tasks to complete.
68
+ */
69
+ async stop(): Promise<void> {
70
+ if (!this.running) return;
71
+
72
+ this.running = false;
73
+ this.abortController.abort();
74
+
75
+ // Wait for all in-flight tasks to complete
76
+ if (this.inFlightTasks.size > 0) {
77
+ await Promise.all(this.inFlightTasks);
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Main execution loop.
83
+ */
84
+ private async runLoop(): Promise<void> {
85
+ while (this.running) {
86
+ // Acquire a slot first (may wait if at concurrency limit)
87
+ await this.concurrency.acquire();
88
+
89
+ try {
90
+ // Check if we're still running after acquiring
91
+ if (!this.running) {
92
+ this.concurrency.release();
93
+ break;
94
+ }
95
+
96
+ // Claim a task while holding the concurrency slot
97
+ const task = await this.taskService.claim();
98
+
99
+ if (!task) {
100
+ // No task available, release the slot and sleep
101
+ this.concurrency.release();
102
+ if (this.running) {
103
+ await Bun.sleep(this.pollIntervalMs);
104
+ }
105
+ continue;
106
+ }
107
+
108
+ // Execute task without awaiting to allow parallel execution
109
+ const taskPromise = this.execute(task);
110
+ this.inFlightTasks.add(taskPromise);
111
+ taskPromise.then(() => {
112
+ this.inFlightTasks.delete(taskPromise);
113
+ });
114
+ } catch {
115
+ // Release slot on error and continue
116
+ this.concurrency.release();
117
+ }
118
+ }
119
+ }
120
+
121
+ /**
122
+ * Execute a single task.
123
+ * Note: Assumes concurrency slot has already been acquired.
124
+ */
125
+ private async execute(task: i.Task): Promise<void> {
126
+ try {
127
+ if (this.rateLimiter) {
128
+ await this.rateLimiter.acquire();
129
+ }
130
+
131
+ const handler = this.handlers.get(task.type);
132
+ if (!handler) {
133
+ throw new NonRetryableError(`No handler registered for type: ${task.type}`);
134
+ }
135
+
136
+ const payload = JSON.parse(task.payload);
137
+ const context: i.TaskContext = {
138
+ taskId: task.id,
139
+ batchId: task.batchId,
140
+ attempt: task.attempt,
141
+ signal: this.abortController.signal,
142
+ };
143
+
144
+ const result = await handler(payload, context);
145
+ await this.taskService.complete(task.id, result);
146
+ } catch (error) {
147
+ await this.handleError(task, error);
148
+ } finally {
149
+ this.concurrency.release();
150
+ }
151
+ }
152
+
153
+ /**
154
+ * Handle task execution errors.
155
+ */
156
+ private async handleError(task: i.Task, error: unknown): Promise<void> {
157
+ const isRetryable = !(error instanceof NonRetryableError);
158
+ const shouldRetry = isRetryable && task.attempt < task.maxAttempt;
159
+
160
+ if (shouldRetry) {
161
+ // Reset to pending for automatic retry (attempt already incremented by claim)
162
+ await this.taskService.retry(task.id);
163
+ } else {
164
+ const message = error instanceof Error ? error.message : String(error);
165
+ await this.taskService.fail(task.id, message);
166
+ }
167
+ }
168
+ }