@qianxude/tem 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/core/tem.ts +29 -1
- package/src/core/worker.ts +78 -6
- package/src/database/index.ts +47 -7
- package/src/index.ts +1 -1
- package/src/interfaces/index.ts +60 -0
- package/src/services/batch-interruption.ts +192 -0
- package/src/services/batch.ts +32 -4
- package/src/services/index.ts +1 -0
- package/src/utils/auto-detect.ts +5 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@qianxude/tem",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "A lightweight task execution engine for IO-bound workloads with SQLite persistence, retry, and rate limiting",
|
|
5
5
|
"module": "src/index.ts",
|
|
6
6
|
"type": "module",
|
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
"test:auto-detect": "bun test tests/integration/auto-detect.test.ts",
|
|
30
30
|
"lint": "oxlint",
|
|
31
31
|
"lint:file": "oxlint",
|
|
32
|
+
"example:llm-detect": "bun examples/llm-detect.ts",
|
|
32
33
|
"dev": "bun --watch src/index.ts",
|
|
33
34
|
"cli": "bun ./src/cli/index.ts",
|
|
34
35
|
"publish:pkg": "bun publish --access public",
|
package/src/core/tem.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Database, type DatabaseOptions } from '../database/index.js';
|
|
2
|
-
import { BatchService, TaskService } from '../services/index.js';
|
|
2
|
+
import { BatchService, TaskService, BatchInterruptionService } from '../services/index.js';
|
|
3
3
|
import { Worker, type WorkerConfig } from './worker.js';
|
|
4
4
|
import {
|
|
5
5
|
detectConstraints,
|
|
@@ -27,12 +27,16 @@ export interface TEMConfig {
|
|
|
27
27
|
|
|
28
28
|
// Polling
|
|
29
29
|
pollIntervalMs: number;
|
|
30
|
+
|
|
31
|
+
// Optional: Specific batch ID to process (if set, only processes this batch)
|
|
32
|
+
batchId?: string;
|
|
30
33
|
}
|
|
31
34
|
|
|
32
35
|
export class TEM {
|
|
33
36
|
readonly batch: BatchService;
|
|
34
37
|
readonly task: TaskService;
|
|
35
38
|
readonly worker: Worker;
|
|
39
|
+
readonly interruption: BatchInterruptionService;
|
|
36
40
|
|
|
37
41
|
private database: Database;
|
|
38
42
|
|
|
@@ -70,6 +74,7 @@ export class TEM {
|
|
|
70
74
|
}
|
|
71
75
|
|
|
72
76
|
constructor(config: TEMConfig) {
|
|
77
|
+
|
|
73
78
|
// Initialize database
|
|
74
79
|
const dbOptions: DatabaseOptions = {
|
|
75
80
|
path: config.databasePath,
|
|
@@ -79,12 +84,15 @@ export class TEM {
|
|
|
79
84
|
// Initialize services
|
|
80
85
|
this.batch = new BatchService(this.database);
|
|
81
86
|
this.task = new TaskService(this.database);
|
|
87
|
+
this.interruption = new BatchInterruptionService(this.database, this.batch);
|
|
82
88
|
|
|
83
89
|
// Initialize worker with config
|
|
84
90
|
const workerConfig: WorkerConfig = {
|
|
85
91
|
concurrency: config.concurrency,
|
|
86
92
|
pollIntervalMs: config.pollIntervalMs,
|
|
87
93
|
rateLimit: config.rateLimit,
|
|
94
|
+
batchId: config.batchId,
|
|
95
|
+
interruptionService: this.interruption,
|
|
88
96
|
};
|
|
89
97
|
this.worker = new Worker(this.task, workerConfig);
|
|
90
98
|
}
|
|
@@ -97,4 +105,24 @@ export class TEM {
|
|
|
97
105
|
await this.worker.stop();
|
|
98
106
|
this.database.close();
|
|
99
107
|
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Manually interrupt a batch with a specified reason.
|
|
111
|
+
* This will stop the worker if processing this batch and prevent further tasks from being claimed.
|
|
112
|
+
*
|
|
113
|
+
* @param batchId - The ID of the batch to interrupt
|
|
114
|
+
* @param reason - The reason for interruption (default: 'manual')
|
|
115
|
+
* @param message - Optional custom message explaining the interruption
|
|
116
|
+
*/
|
|
117
|
+
async interruptBatch(
|
|
118
|
+
batchId: string,
|
|
119
|
+
reason?: import('../interfaces/index.js').BatchInterruptionReason,
|
|
120
|
+
message?: string
|
|
121
|
+
): Promise<void> {
|
|
122
|
+
await this.interruption.interrupt(
|
|
123
|
+
batchId,
|
|
124
|
+
reason ?? 'manual',
|
|
125
|
+
message ?? 'Batch manually interrupted'
|
|
126
|
+
);
|
|
127
|
+
}
|
|
100
128
|
}
|
package/src/core/worker.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as i from '../interfaces/index.js';
|
|
2
|
-
import { TaskService } from '../services/
|
|
2
|
+
import { TaskService, BatchInterruptionService } from '../services/index.js';
|
|
3
3
|
import { ConcurrencyController, RateLimiter, type RateLimitConfig } from '../utils/index.js';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -18,6 +18,10 @@ export interface WorkerConfig {
|
|
|
18
18
|
concurrency: number;
|
|
19
19
|
pollIntervalMs: number;
|
|
20
20
|
rateLimit?: RateLimitConfig;
|
|
21
|
+
/** Specific batch ID to process (optional - if set, only processes this batch) */
|
|
22
|
+
batchId?: string;
|
|
23
|
+
/** Interruption service for checking batch status */
|
|
24
|
+
interruptionService?: BatchInterruptionService;
|
|
21
25
|
}
|
|
22
26
|
|
|
23
27
|
export class Worker {
|
|
@@ -28,6 +32,13 @@ export class Worker {
|
|
|
28
32
|
private pollIntervalMs: number;
|
|
29
33
|
private abortController: AbortController;
|
|
30
34
|
private inFlightTasks: Set<Promise<void>> = new Set();
|
|
35
|
+
private batchId?: string;
|
|
36
|
+
private interruptionService?: BatchInterruptionService;
|
|
37
|
+
|
|
38
|
+
// Track failure context for interruption decisions
|
|
39
|
+
private consecutiveFailures = 0;
|
|
40
|
+
private rateLimitHits = 0;
|
|
41
|
+
private concurrencyErrors = 0;
|
|
31
42
|
|
|
32
43
|
constructor(
|
|
33
44
|
private taskService: TaskService,
|
|
@@ -36,6 +47,8 @@ export class Worker {
|
|
|
36
47
|
this.concurrency = new ConcurrencyController(config.concurrency);
|
|
37
48
|
this.pollIntervalMs = config.pollIntervalMs;
|
|
38
49
|
this.abortController = new AbortController();
|
|
50
|
+
this.batchId = config.batchId;
|
|
51
|
+
this.interruptionService = config.interruptionService;
|
|
39
52
|
|
|
40
53
|
if (config.rateLimit) {
|
|
41
54
|
this.rateLimiter = new RateLimiter(config.rateLimit);
|
|
@@ -93,8 +106,18 @@ export class Worker {
|
|
|
93
106
|
break;
|
|
94
107
|
}
|
|
95
108
|
|
|
109
|
+
// For batch-specific workers: check batch is still active
|
|
110
|
+
if (this.batchId && this.interruptionService) {
|
|
111
|
+
const isActive = await this.interruptionService.isBatchActive(this.batchId);
|
|
112
|
+
if (!isActive) {
|
|
113
|
+
this.concurrency.release();
|
|
114
|
+
this.stop();
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
96
119
|
// Claim a task while holding the concurrency slot
|
|
97
|
-
const task = await this.taskService.claim();
|
|
120
|
+
const task = await this.taskService.claim(this.batchId);
|
|
98
121
|
|
|
99
122
|
if (!task) {
|
|
100
123
|
// No task available, release the slot and sleep
|
|
@@ -123,6 +146,8 @@ export class Worker {
|
|
|
123
146
|
* Note: Assumes concurrency slot has already been acquired.
|
|
124
147
|
*/
|
|
125
148
|
private async execute(task: i.Task): Promise<void> {
|
|
149
|
+
const taskStartTime = Date.now();
|
|
150
|
+
|
|
126
151
|
try {
|
|
127
152
|
if (this.rateLimiter) {
|
|
128
153
|
await this.rateLimiter.acquire();
|
|
@@ -134,6 +159,8 @@ export class Worker {
|
|
|
134
159
|
}
|
|
135
160
|
|
|
136
161
|
const payload = JSON.parse(task.payload);
|
|
162
|
+
|
|
163
|
+
// Build context with optional deadline
|
|
137
164
|
const context: i.TaskContext = {
|
|
138
165
|
taskId: task.id,
|
|
139
166
|
batchId: task.batchId,
|
|
@@ -141,10 +168,22 @@ export class Worker {
|
|
|
141
168
|
signal: this.abortController.signal,
|
|
142
169
|
};
|
|
143
170
|
|
|
171
|
+
// If we have interruption service and batchId, set deadline from criteria
|
|
172
|
+
if (this.interruptionService && task.batchId) {
|
|
173
|
+
const { criteria } = await this.interruptionService['batchService'].getWithCriteria(task.batchId);
|
|
174
|
+
if (criteria?.taskTimeoutMs) {
|
|
175
|
+
context.deadline = new Date(taskStartTime + criteria.taskTimeoutMs);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
144
179
|
const result = await handler(payload, context);
|
|
145
180
|
await this.taskService.complete(task.id, result);
|
|
181
|
+
|
|
182
|
+
// Reset consecutive failures on success
|
|
183
|
+
this.consecutiveFailures = 0;
|
|
146
184
|
} catch (error) {
|
|
147
|
-
|
|
185
|
+
const taskRuntimeMs = Date.now() - taskStartTime;
|
|
186
|
+
await this.handleError(task, error, taskRuntimeMs);
|
|
148
187
|
} finally {
|
|
149
188
|
this.concurrency.release();
|
|
150
189
|
}
|
|
@@ -153,16 +192,49 @@ export class Worker {
|
|
|
153
192
|
/**
|
|
154
193
|
* Handle task execution errors.
|
|
155
194
|
*/
|
|
156
|
-
private async handleError(task: i.Task, error: unknown): Promise<void> {
|
|
195
|
+
private async handleError(task: i.Task, error: unknown, taskRuntimeMs?: number): Promise<void> {
|
|
157
196
|
const isRetryable = !(error instanceof NonRetryableError);
|
|
158
197
|
const shouldRetry = isRetryable && task.attempt < task.maxAttempt;
|
|
159
198
|
|
|
199
|
+
// Track failure type for interruption decisions
|
|
200
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
201
|
+
if (this.isRateLimitError(errorMessage)) {
|
|
202
|
+
this.rateLimitHits++;
|
|
203
|
+
} else if (this.isConcurrencyError(errorMessage)) {
|
|
204
|
+
this.concurrencyErrors++;
|
|
205
|
+
}
|
|
206
|
+
|
|
160
207
|
if (shouldRetry) {
|
|
161
208
|
// Reset to pending for automatic retry (attempt already incremented by claim)
|
|
162
209
|
await this.taskService.retry(task.id);
|
|
210
|
+
this.consecutiveFailures++;
|
|
163
211
|
} else {
|
|
164
|
-
|
|
165
|
-
|
|
212
|
+
await this.taskService.fail(task.id, errorMessage);
|
|
213
|
+
this.consecutiveFailures++;
|
|
214
|
+
|
|
215
|
+
// Check if batch should be interrupted
|
|
216
|
+
if (task.batchId && this.interruptionService) {
|
|
217
|
+
const interrupted = await this.interruptionService.checkAndInterruptIfNeeded(
|
|
218
|
+
task.batchId,
|
|
219
|
+
{
|
|
220
|
+
consecutiveFailures: this.consecutiveFailures,
|
|
221
|
+
rateLimitHits: this.rateLimitHits,
|
|
222
|
+
concurrencyErrors: this.concurrencyErrors,
|
|
223
|
+
currentTaskRuntimeMs: taskRuntimeMs,
|
|
224
|
+
}
|
|
225
|
+
);
|
|
226
|
+
if (interrupted) {
|
|
227
|
+
this.stop();
|
|
228
|
+
}
|
|
229
|
+
}
|
|
166
230
|
}
|
|
167
231
|
}
|
|
232
|
+
|
|
233
|
+
private isRateLimitError(message: string): boolean {
|
|
234
|
+
return message.includes('429') || message.toLowerCase().includes('rate limit');
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
private isConcurrencyError(message: string): boolean {
|
|
238
|
+
return message.includes('502') || message.includes('503') || message.toLowerCase().includes('bad gateway') || message.toLowerCase().includes('service unavailable');
|
|
239
|
+
}
|
|
168
240
|
}
|
package/src/database/index.ts
CHANGED
|
@@ -33,13 +33,20 @@ export class Database implements i.DatabaseConnection {
|
|
|
33
33
|
)
|
|
34
34
|
`);
|
|
35
35
|
|
|
36
|
-
// Check
|
|
37
|
-
const
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
36
|
+
// Check and apply migrations in order
|
|
37
|
+
const migrations = [
|
|
38
|
+
{ name: '001_initial_schema', apply: () => this.applyInitialSchema() },
|
|
39
|
+
{ name: '002_batch_interruption', apply: () => this.applyBatchInterruptionMigration() },
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
for (const migration of migrations) {
|
|
43
|
+
const migrationCount = this.db
|
|
44
|
+
.query('SELECT COUNT(*) as count FROM _migration WHERE name = $name')
|
|
45
|
+
.get({ $name: migration.name }) as { count: number };
|
|
46
|
+
|
|
47
|
+
if (migrationCount.count === 0) {
|
|
48
|
+
migration.apply();
|
|
49
|
+
}
|
|
43
50
|
}
|
|
44
51
|
}
|
|
45
52
|
|
|
@@ -90,6 +97,39 @@ export class Database implements i.DatabaseConnection {
|
|
|
90
97
|
});
|
|
91
98
|
}
|
|
92
99
|
|
|
100
|
+
private applyBatchInterruptionMigration(): void {
|
|
101
|
+
const migration = `
|
|
102
|
+
-- Add status to batch table
|
|
103
|
+
ALTER TABLE batch ADD COLUMN status TEXT NOT NULL DEFAULT 'active'
|
|
104
|
+
CHECK(status IN ('active', 'interrupted', 'completed'));
|
|
105
|
+
|
|
106
|
+
-- Add interruption criteria storage (JSON)
|
|
107
|
+
ALTER TABLE batch ADD COLUMN interruption_criteria TEXT;
|
|
108
|
+
|
|
109
|
+
-- Index for quickly finding active batches
|
|
110
|
+
CREATE INDEX IF NOT EXISTS idx_batch_status ON batch(status);
|
|
111
|
+
|
|
112
|
+
-- New table: interruption log
|
|
113
|
+
CREATE TABLE IF NOT EXISTS batch_interrupt_log (
|
|
114
|
+
id TEXT PRIMARY KEY,
|
|
115
|
+
batch_id TEXT NOT NULL REFERENCES batch(id) ON DELETE CASCADE,
|
|
116
|
+
reason TEXT NOT NULL,
|
|
117
|
+
message TEXT NOT NULL,
|
|
118
|
+
stats_snapshot TEXT NOT NULL, -- JSON of BatchStats
|
|
119
|
+
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
120
|
+
);
|
|
121
|
+
|
|
122
|
+
CREATE INDEX IF NOT EXISTS idx_interrupt_log_batch_id ON batch_interrupt_log(batch_id);
|
|
123
|
+
`;
|
|
124
|
+
|
|
125
|
+
this.transaction(() => {
|
|
126
|
+
this.db.exec(migration);
|
|
127
|
+
this.db
|
|
128
|
+
.query('INSERT INTO _migration (name) VALUES ($name)')
|
|
129
|
+
.run({ $name: '002_batch_interruption' });
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
|
|
93
133
|
query<T = unknown>(sql: string, params?: SQLQueryBindings[]): T[] {
|
|
94
134
|
const stmt = this.db.prepare(sql);
|
|
95
135
|
const results = stmt.all(...(params ?? []));
|
package/src/index.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// Main exports for TEM framework
|
|
2
2
|
export * as interfaces from './interfaces/index.js';
|
|
3
3
|
export { Database, type DatabaseOptions } from './database/index.js';
|
|
4
|
-
export { BatchService, TaskService } from './services/index.js';
|
|
4
|
+
export { BatchService, TaskService, BatchInterruptionService } from './services/index.js';
|
|
5
5
|
export {
|
|
6
6
|
ConcurrencyController,
|
|
7
7
|
RateLimiter,
|
package/src/interfaces/index.ts
CHANGED
|
@@ -6,18 +6,48 @@
|
|
|
6
6
|
// ============================================================================
|
|
7
7
|
|
|
8
8
|
export type TaskStatus = 'pending' | 'running' | 'completed' | 'failed';
|
|
9
|
+
export type BatchStatus = 'active' | 'interrupted' | 'completed';
|
|
10
|
+
|
|
11
|
+
export type BatchInterruptionReason =
|
|
12
|
+
| 'error_rate_exceeded'
|
|
13
|
+
| 'failed_tasks_exceeded'
|
|
14
|
+
| 'consecutive_failures_exceeded'
|
|
15
|
+
| 'rate_limit_hits_exceeded'
|
|
16
|
+
| 'concurrency_errors_exceeded'
|
|
17
|
+
| 'task_timeout'
|
|
18
|
+
| 'batch_runtime_exceeded'
|
|
19
|
+
| 'manual';
|
|
9
20
|
|
|
10
21
|
// ============================================================================
|
|
11
22
|
// Entity Types
|
|
12
23
|
// ============================================================================
|
|
13
24
|
|
|
25
|
+
export interface BatchInterruptionCriteria {
|
|
26
|
+
/** Max error rate (0-1, e.g., 0.1 = 10%) */
|
|
27
|
+
maxErrorRate?: number;
|
|
28
|
+
/** Max absolute number of failed tasks */
|
|
29
|
+
maxFailedTasks?: number;
|
|
30
|
+
/** Max consecutive failures before interruption */
|
|
31
|
+
maxConsecutiveFailures?: number;
|
|
32
|
+
/** Max rate limit (429) hits before interruption */
|
|
33
|
+
maxRateLimitHits?: number;
|
|
34
|
+
/** Max concurrency errors (502/503) before interruption - indicates too aggressive concurrency */
|
|
35
|
+
maxConcurrencyErrors?: number;
|
|
36
|
+
/** Max runtime for a single task in ms */
|
|
37
|
+
taskTimeoutMs?: number;
|
|
38
|
+
/** Max total batch runtime in ms */
|
|
39
|
+
maxBatchRuntimeMs?: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
14
42
|
export interface Batch {
|
|
15
43
|
id: string;
|
|
16
44
|
code: string;
|
|
17
45
|
type: string;
|
|
46
|
+
status: BatchStatus;
|
|
18
47
|
createdAt: Date;
|
|
19
48
|
completedAt: Date | null;
|
|
20
49
|
metadata: Record<string, unknown> | null;
|
|
50
|
+
interruptionCriteria: BatchInterruptionCriteria | null;
|
|
21
51
|
}
|
|
22
52
|
|
|
23
53
|
export interface BatchStats {
|
|
@@ -29,6 +59,14 @@ export interface BatchStats {
|
|
|
29
59
|
failed: number;
|
|
30
60
|
}
|
|
31
61
|
|
|
62
|
+
export interface BatchInterruption {
|
|
63
|
+
batchId: string;
|
|
64
|
+
reason: BatchInterruptionReason;
|
|
65
|
+
message: string;
|
|
66
|
+
statsAtInterruption: BatchStats;
|
|
67
|
+
createdAt: Date;
|
|
68
|
+
}
|
|
69
|
+
|
|
32
70
|
export interface Task {
|
|
33
71
|
id: string;
|
|
34
72
|
batchId: string | null;
|
|
@@ -88,6 +126,8 @@ export interface DetectOptions {
|
|
|
88
126
|
maxConcurrencyToTest?: number;
|
|
89
127
|
/** Duration to run rate limit tests (default: 10000) */
|
|
90
128
|
rateLimitTestDurationMs?: number;
|
|
129
|
+
/** Maximum number of requests to send during rate limit detection (default: 200) */
|
|
130
|
+
maxRateLimitTestRequests?: number;
|
|
91
131
|
}
|
|
92
132
|
|
|
93
133
|
export interface DetectedConfig {
|
|
@@ -118,6 +158,8 @@ export interface TaskContext {
|
|
|
118
158
|
batchId: string | null;
|
|
119
159
|
attempt: number;
|
|
120
160
|
signal: AbortSignal;
|
|
161
|
+
/** Deadline for task execution (for timeout enforcement) */
|
|
162
|
+
deadline?: Date;
|
|
121
163
|
}
|
|
122
164
|
|
|
123
165
|
// ============================================================================
|
|
@@ -144,6 +186,7 @@ export interface CreateBatchInput {
|
|
|
144
186
|
code: string;
|
|
145
187
|
type: string;
|
|
146
188
|
metadata?: Record<string, unknown>;
|
|
189
|
+
interruptionCriteria?: BatchInterruptionCriteria;
|
|
147
190
|
}
|
|
148
191
|
|
|
149
192
|
export interface CreateTaskInput {
|
|
@@ -162,6 +205,23 @@ export interface BatchService {
|
|
|
162
205
|
complete(id: string): Promise<void>;
|
|
163
206
|
resume(id: string): Promise<number>;
|
|
164
207
|
retryFailed(id: string): Promise<number>;
|
|
208
|
+
updateStatus(id: string, status: BatchStatus): Promise<void>;
|
|
209
|
+
getWithCriteria(id: string): Promise<{ batch: Batch; criteria: BatchInterruptionCriteria | null }>;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
export interface BatchInterruptionService {
|
|
213
|
+
checkAndInterruptIfNeeded(
|
|
214
|
+
batchId: string,
|
|
215
|
+
context: {
|
|
216
|
+
consecutiveFailures?: number;
|
|
217
|
+
rateLimitHits?: number;
|
|
218
|
+
concurrencyErrors?: number;
|
|
219
|
+
currentTaskRuntimeMs?: number;
|
|
220
|
+
}
|
|
221
|
+
): Promise<boolean>;
|
|
222
|
+
interrupt(batchId: string, reason: BatchInterruptionReason, message: string): Promise<void>;
|
|
223
|
+
isBatchActive(batchId: string): Promise<boolean>;
|
|
224
|
+
getInterruptionLog(batchId: string): Promise<BatchInterruption[]>;
|
|
165
225
|
}
|
|
166
226
|
|
|
167
227
|
export interface TaskService {
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import * as i from '../interfaces/index.js';
|
|
2
|
+
import type { Database } from '../database/index.js';
|
|
3
|
+
import type { BatchService } from './batch.js';
|
|
4
|
+
|
|
5
|
+
export interface BatchInterruptionRow {
|
|
6
|
+
batch_id: string;
|
|
7
|
+
reason: i.BatchInterruptionReason;
|
|
8
|
+
message: string;
|
|
9
|
+
stats_snapshot: string;
|
|
10
|
+
created_at: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export class BatchInterruptionService implements i.BatchInterruptionService {
|
|
14
|
+
constructor(
|
|
15
|
+
private db: Database,
|
|
16
|
+
private batchService: BatchService
|
|
17
|
+
) {}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Check if batch should be interrupted based on current stats.
|
|
21
|
+
* Called after each task failure or periodically.
|
|
22
|
+
*/
|
|
23
|
+
async checkAndInterruptIfNeeded(
|
|
24
|
+
batchId: string,
|
|
25
|
+
context: {
|
|
26
|
+
consecutiveFailures?: number;
|
|
27
|
+
rateLimitHits?: number;
|
|
28
|
+
concurrencyErrors?: number;
|
|
29
|
+
currentTaskRuntimeMs?: number;
|
|
30
|
+
}
|
|
31
|
+
): Promise<boolean> {
|
|
32
|
+
// Fetch batch with its interruption criteria
|
|
33
|
+
const { batch, criteria } = await this.batchService.getWithCriteria(batchId);
|
|
34
|
+
|
|
35
|
+
// If already interrupted or completed, no need to check
|
|
36
|
+
if (batch.status !== 'active') {
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// If no criteria set, never interrupt
|
|
41
|
+
if (!criteria) {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Get current stats
|
|
46
|
+
const stats = await this.batchService.getStats(batchId);
|
|
47
|
+
|
|
48
|
+
// Check each criterion in order of severity
|
|
49
|
+
|
|
50
|
+
// 1. Check maxBatchRuntimeMs - total batch runtime
|
|
51
|
+
if (criteria.maxBatchRuntimeMs) {
|
|
52
|
+
const batchRuntimeMs = Date.now() - batch.createdAt.getTime();
|
|
53
|
+
if (batchRuntimeMs > criteria.maxBatchRuntimeMs) {
|
|
54
|
+
await this.interrupt(
|
|
55
|
+
batchId,
|
|
56
|
+
'batch_runtime_exceeded',
|
|
57
|
+
`Batch runtime (${batchRuntimeMs}ms) exceeded maximum (${criteria.maxBatchRuntimeMs}ms)`
|
|
58
|
+
);
|
|
59
|
+
return true;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// 2. Check taskTimeoutMs - single task runtime
|
|
64
|
+
if (criteria.taskTimeoutMs && context.currentTaskRuntimeMs) {
|
|
65
|
+
if (context.currentTaskRuntimeMs > criteria.taskTimeoutMs) {
|
|
66
|
+
await this.interrupt(
|
|
67
|
+
batchId,
|
|
68
|
+
'task_timeout',
|
|
69
|
+
`Task runtime (${context.currentTaskRuntimeMs}ms) exceeded maximum (${criteria.taskTimeoutMs}ms)`
|
|
70
|
+
);
|
|
71
|
+
return true;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// 3. Check maxConsecutiveFailures
|
|
76
|
+
if (criteria.maxConsecutiveFailures && context.consecutiveFailures) {
|
|
77
|
+
if (context.consecutiveFailures >= criteria.maxConsecutiveFailures) {
|
|
78
|
+
await this.interrupt(
|
|
79
|
+
batchId,
|
|
80
|
+
'consecutive_failures_exceeded',
|
|
81
|
+
`Consecutive failures (${context.consecutiveFailures}) exceeded maximum (${criteria.maxConsecutiveFailures})`
|
|
82
|
+
);
|
|
83
|
+
return true;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// 4. Check maxRateLimitHits
|
|
88
|
+
if (criteria.maxRateLimitHits && context.rateLimitHits) {
|
|
89
|
+
if (context.rateLimitHits >= criteria.maxRateLimitHits) {
|
|
90
|
+
await this.interrupt(
|
|
91
|
+
batchId,
|
|
92
|
+
'rate_limit_hits_exceeded',
|
|
93
|
+
`Rate limit hits (${context.rateLimitHits}) exceeded maximum (${criteria.maxRateLimitHits})`
|
|
94
|
+
);
|
|
95
|
+
return true;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// 5. Check maxConcurrencyErrors (502/503 errors)
|
|
100
|
+
if (criteria.maxConcurrencyErrors && context.concurrencyErrors) {
|
|
101
|
+
if (context.concurrencyErrors >= criteria.maxConcurrencyErrors) {
|
|
102
|
+
await this.interrupt(
|
|
103
|
+
batchId,
|
|
104
|
+
'concurrency_errors_exceeded',
|
|
105
|
+
`Concurrency errors (${context.concurrencyErrors}) exceeded maximum (${criteria.maxConcurrencyErrors})`
|
|
106
|
+
);
|
|
107
|
+
return true;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// 6. Check maxFailedTasks (absolute count)
|
|
112
|
+
if (criteria.maxFailedTasks) {
|
|
113
|
+
if (stats.failed >= criteria.maxFailedTasks) {
|
|
114
|
+
await this.interrupt(
|
|
115
|
+
batchId,
|
|
116
|
+
'failed_tasks_exceeded',
|
|
117
|
+
`Failed tasks (${stats.failed}) exceeded maximum (${criteria.maxFailedTasks})`
|
|
118
|
+
);
|
|
119
|
+
return true;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// 7. Check maxErrorRate (percentage)
|
|
124
|
+
if (criteria.maxErrorRate && stats.total > 0) {
|
|
125
|
+
const errorRate = stats.failed / stats.total;
|
|
126
|
+
if (errorRate > criteria.maxErrorRate) {
|
|
127
|
+
await this.interrupt(
|
|
128
|
+
batchId,
|
|
129
|
+
'error_rate_exceeded',
|
|
130
|
+
`Error rate (${(errorRate * 100).toFixed(1)}%) exceeded maximum (${(criteria.maxErrorRate * 100).toFixed(1)}%)`
|
|
131
|
+
);
|
|
132
|
+
return true;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Interrupt a batch atomically.
|
|
141
|
+
*/
|
|
142
|
+
async interrupt(
|
|
143
|
+
batchId: string,
|
|
144
|
+
reason: i.BatchInterruptionReason,
|
|
145
|
+
message: string
|
|
146
|
+
): Promise<void> {
|
|
147
|
+
// Get current stats for the log
|
|
148
|
+
const stats = await this.batchService.getStats(batchId);
|
|
149
|
+
|
|
150
|
+
// Update batch status to 'interrupted'
|
|
151
|
+
await this.batchService.updateStatus(batchId, 'interrupted');
|
|
152
|
+
|
|
153
|
+
// Log the interruption event
|
|
154
|
+
const id = crypto.randomUUID();
|
|
155
|
+
const now = new Date().toISOString();
|
|
156
|
+
|
|
157
|
+
this.db.run(
|
|
158
|
+
`INSERT INTO batch_interrupt_log (id, batch_id, reason, message, stats_snapshot, created_at)
|
|
159
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
160
|
+
[id, batchId, reason, message, JSON.stringify(stats), now]
|
|
161
|
+
);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Check if a batch is active (can claim tasks from it).
|
|
166
|
+
*/
|
|
167
|
+
async isBatchActive(batchId: string): Promise<boolean> {
|
|
168
|
+
const batch = await this.batchService.getById(batchId);
|
|
169
|
+
return batch?.status === 'active';
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Get interruption history for a batch.
|
|
174
|
+
*/
|
|
175
|
+
async getInterruptionLog(batchId: string): Promise<i.BatchInterruption[]> {
|
|
176
|
+
const rows = this.db.query<BatchInterruptionRow>(
|
|
177
|
+
`SELECT batch_id, reason, message, stats_snapshot, created_at
|
|
178
|
+
FROM batch_interrupt_log
|
|
179
|
+
WHERE batch_id = ?
|
|
180
|
+
ORDER BY created_at DESC`,
|
|
181
|
+
[batchId]
|
|
182
|
+
);
|
|
183
|
+
|
|
184
|
+
return rows.map((row) => ({
|
|
185
|
+
batchId: row.batch_id,
|
|
186
|
+
reason: row.reason,
|
|
187
|
+
message: row.message,
|
|
188
|
+
statsAtInterruption: JSON.parse(row.stats_snapshot) as i.BatchStats,
|
|
189
|
+
createdAt: new Date(row.created_at),
|
|
190
|
+
}));
|
|
191
|
+
}
|
|
192
|
+
}
|
package/src/services/batch.ts
CHANGED
|
@@ -5,9 +5,11 @@ export interface BatchRow {
|
|
|
5
5
|
id: string;
|
|
6
6
|
code: string;
|
|
7
7
|
type: string;
|
|
8
|
+
status: i.BatchStatus;
|
|
8
9
|
created_at: string;
|
|
9
10
|
completed_at: string | null;
|
|
10
11
|
metadata: string | null;
|
|
12
|
+
interruption_criteria: string | null;
|
|
11
13
|
}
|
|
12
14
|
|
|
13
15
|
function rowToBatch(row: BatchRow): i.Batch {
|
|
@@ -15,9 +17,13 @@ function rowToBatch(row: BatchRow): i.Batch {
|
|
|
15
17
|
id: row.id,
|
|
16
18
|
code: row.code,
|
|
17
19
|
type: row.type,
|
|
20
|
+
status: row.status ?? 'active',
|
|
18
21
|
createdAt: new Date(row.created_at),
|
|
19
22
|
completedAt: row.completed_at ? new Date(row.completed_at) : null,
|
|
20
23
|
metadata: row.metadata ? (JSON.parse(row.metadata) as Record<string, unknown>) : null,
|
|
24
|
+
interruptionCriteria: row.interruption_criteria
|
|
25
|
+
? (JSON.parse(row.interruption_criteria) as i.BatchInterruptionCriteria)
|
|
26
|
+
: null,
|
|
21
27
|
};
|
|
22
28
|
}
|
|
23
29
|
|
|
@@ -29,9 +35,16 @@ export class BatchService implements i.BatchService {
|
|
|
29
35
|
const now = new Date().toISOString();
|
|
30
36
|
|
|
31
37
|
this.db.run(
|
|
32
|
-
`INSERT INTO batch (id, code, type, created_at, metadata)
|
|
33
|
-
VALUES (?, ?, ?, ?, ?)`,
|
|
34
|
-
[
|
|
38
|
+
`INSERT INTO batch (id, code, type, created_at, metadata, interruption_criteria)
|
|
39
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
40
|
+
[
|
|
41
|
+
id,
|
|
42
|
+
input.code,
|
|
43
|
+
input.type,
|
|
44
|
+
now,
|
|
45
|
+
input.metadata ? JSON.stringify(input.metadata) : null,
|
|
46
|
+
input.interruptionCriteria ? JSON.stringify(input.interruptionCriteria) : null,
|
|
47
|
+
]
|
|
35
48
|
);
|
|
36
49
|
|
|
37
50
|
const rows = this.db.query<BatchRow>('SELECT * FROM batch WHERE id = ?', [id]);
|
|
@@ -60,7 +73,7 @@ export class BatchService implements i.BatchService {
|
|
|
60
73
|
async complete(id: string): Promise<void> {
|
|
61
74
|
const now = new Date().toISOString();
|
|
62
75
|
this.db.run(
|
|
63
|
-
`UPDATE batch SET completed_at =
|
|
76
|
+
`UPDATE batch SET completed_at = ?, status = 'completed' WHERE id = ?`,
|
|
64
77
|
[now, id]
|
|
65
78
|
);
|
|
66
79
|
}
|
|
@@ -118,4 +131,19 @@ export class BatchService implements i.BatchService {
|
|
|
118
131
|
);
|
|
119
132
|
return result.changes ?? 0;
|
|
120
133
|
}
|
|
134
|
+
|
|
135
|
+
async updateStatus(id: string, status: i.BatchStatus): Promise<void> {
|
|
136
|
+
this.db.run(
|
|
137
|
+
`UPDATE batch SET status = ? WHERE id = ?`,
|
|
138
|
+
[status, id]
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async getWithCriteria(id: string): Promise<{ batch: i.Batch; criteria: i.BatchInterruptionCriteria | null }> {
|
|
143
|
+
const batch = await this.getById(id);
|
|
144
|
+
if (!batch) {
|
|
145
|
+
throw new Error(`Batch not found: ${id}`);
|
|
146
|
+
}
|
|
147
|
+
return { batch, criteria: batch.interruptionCriteria };
|
|
148
|
+
}
|
|
121
149
|
}
|
package/src/services/index.ts
CHANGED
package/src/utils/auto-detect.ts
CHANGED
|
@@ -6,6 +6,7 @@ export interface DetectOptions {
|
|
|
6
6
|
timeoutMs?: number;
|
|
7
7
|
maxConcurrencyToTest?: number;
|
|
8
8
|
rateLimitTestDurationMs?: number;
|
|
9
|
+
maxRateLimitTestRequests?: number;
|
|
9
10
|
}
|
|
10
11
|
|
|
11
12
|
export interface DetectedConfig {
|
|
@@ -48,6 +49,7 @@ export async function detectConstraints(options: DetectOptions): Promise<Detecte
|
|
|
48
49
|
const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
49
50
|
const maxConcurrency = options.maxConcurrencyToTest ?? DEFAULT_MAX_CONCURRENCY;
|
|
50
51
|
const rateLimitDurationMs = options.rateLimitTestDurationMs ?? DEFAULT_RATE_LIMIT_TEST_DURATION_MS;
|
|
52
|
+
const maxRateLimitTestRequests = options.maxRateLimitTestRequests ?? MAX_RATE_LIMIT_TEST_REQUESTS;
|
|
51
53
|
|
|
52
54
|
const requestOptions: RequestOptions = {
|
|
53
55
|
url: options.url,
|
|
@@ -64,7 +66,7 @@ export async function detectConstraints(options: DetectOptions): Promise<Detecte
|
|
|
64
66
|
|
|
65
67
|
// Phase 2: Detect rate limit using safe concurrency (80% of detected)
|
|
66
68
|
const safeConcurrency = Math.max(1, Math.floor(detectedConcurrency * 0.8));
|
|
67
|
-
const rateLimitResult = await detectRateLimit(requestOptions, safeConcurrency, rateLimitDurationMs, notes);
|
|
69
|
+
const rateLimitResult = await detectRateLimit(requestOptions, safeConcurrency, rateLimitDurationMs, maxRateLimitTestRequests, notes);
|
|
68
70
|
|
|
69
71
|
// Calculate confidence
|
|
70
72
|
const confidence = calculateConfidence(detectedConcurrency, rateLimitResult, notes);
|
|
@@ -206,6 +208,7 @@ async function detectRateLimit(
|
|
|
206
208
|
options: RequestOptions,
|
|
207
209
|
safeConcurrency: number,
|
|
208
210
|
durationMs: number,
|
|
211
|
+
maxRequests: number,
|
|
209
212
|
notes: string[]
|
|
210
213
|
): Promise<{ requests: number; windowMs: number }> {
|
|
211
214
|
const startTime = Date.now();
|
|
@@ -214,7 +217,7 @@ async function detectRateLimit(
|
|
|
214
217
|
let totalRequests = 0;
|
|
215
218
|
|
|
216
219
|
// Send requests as fast as possible at safe concurrency
|
|
217
|
-
while (Date.now() - startTime < durationMs && totalRequests <
|
|
220
|
+
while (Date.now() - startTime < durationMs && totalRequests < maxRequests) {
|
|
218
221
|
const batchStart = Date.now();
|
|
219
222
|
const results = await sendBatch(options, safeConcurrency);
|
|
220
223
|
|