@semiont/jobs 0.5.6 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -13
- package/dist/index.d.ts +78 -31
- package/dist/index.js +217 -90
- package/dist/index.js.map +1 -1
- package/dist/worker-main.js +28 -19
- package/dist/worker-main.js.map +1 -1
- package/package.json +8 -8
package/README.md
CHANGED
|
@@ -20,10 +20,11 @@ npm install @semiont/jobs
|
|
|
20
20
|
|
|
21
21
|
**Dependencies:**
|
|
22
22
|
- `@semiont/core` — Core types, `SemiontProject`, EventBus
|
|
23
|
-
- `@semiont/sdk` — `SemiontSession`, `
|
|
24
|
-
- `@semiont/
|
|
23
|
+
- `@semiont/sdk` — `SemiontSession`, `WorkerBus` (worker process)
|
|
24
|
+
- `@semiont/http-transport` — HTTP transport, OpenAPI types
|
|
25
25
|
- `@semiont/inference` — InferenceClient for AI operations
|
|
26
26
|
- `@semiont/content` — Content storage URI derivation
|
|
27
|
+
- `@semiont/event-sourcing` — Annotation id generation
|
|
27
28
|
- `@semiont/observability` — Spans and job-outcome metrics
|
|
28
29
|
|
|
29
30
|
## Quick Start
|
|
@@ -88,16 +89,16 @@ interface JobMetadata {
|
|
|
88
89
|
id: JobId;
|
|
89
90
|
type: JobType;
|
|
90
91
|
userId: UserId;
|
|
91
|
-
userName: string; //
|
|
92
|
-
userEmail: string; //
|
|
93
|
-
userDomain: string; //
|
|
92
|
+
userName: string; // Audit-only snapshot of the requesting user
|
|
93
|
+
userEmail: string; // Audit-only snapshot of the requesting user
|
|
94
|
+
userDomain: string; // Audit-only snapshot of the requesting user
|
|
94
95
|
created: string;
|
|
95
96
|
retryCount: number;
|
|
96
97
|
maxRetries: number;
|
|
97
98
|
}
|
|
98
99
|
```
|
|
99
100
|
|
|
100
|
-
The `userName`, `userEmail`, and `userDomain` fields are
|
|
101
|
+
The `userName`, `userEmail`, and `userDomain` fields are an audit-only snapshot of the requesting user, persisted in the on-disk job file. Workers derive annotation `creator` attribution from `userId` via `didToAgent()`.
|
|
101
102
|
|
|
102
103
|
## Annotation Workers
|
|
103
104
|
|
|
@@ -112,9 +113,9 @@ The worker process (`worker-main.ts` → `startWorkerProcess` in `worker-process
|
|
|
112
113
|
| `comment-annotation` | `processCommentJob` |
|
|
113
114
|
| `tag-annotation` | `processTagJob` |
|
|
114
115
|
|
|
115
|
-
Detection logic lives in the `AnnotationDetection` class (`src/workers/annotation-detection.ts`); generation synthesis in `generateResourceFromTopic()` (`src/workers/generation/resource-generation.ts`).
|
|
116
|
+
Detection logic lives in the `AnnotationDetection` class (`src/workers/annotation-detection.ts`); generation synthesis in `generateResourceFromTopic()` (`src/workers/generation/resource-generation.ts`). Processors never fetch content themselves — the worker process fetches it via `session.client.browse.resourceContent(resourceId)` and passes it in.
|
|
116
117
|
|
|
117
|
-
Workers emit bus events via `session.client.transport.emit('mark:create' | 'job:start' | 'job:report-progress' | 'job:complete' | 'job:fail', payload)` — the Stower actor in @semiont/make-meaning handles persistence.
|
|
118
|
+
Workers emit bus events via `session.client.transport.emit('mark:create' | 'job:start' | 'job:report-progress' | 'job:complete' | 'job:fail', payload)` — the Stower actor in @semiont/make-meaning handles persistence to the event log, and the job command handlers mirror the same events into the queue files (completion, retry-on-failure with `maxRetries`, progress-as-heartbeat).
|
|
118
119
|
|
|
119
120
|
## Adding a Job Type
|
|
120
121
|
|
|
@@ -133,12 +134,12 @@ Jobs use TypeScript discriminated unions for type safety:
|
|
|
133
134
|
```typescript
|
|
134
135
|
function handleJob(job: AnyJob) {
|
|
135
136
|
if (job.status === 'running') {
|
|
136
|
-
console.log(job
|
|
137
|
+
console.log(job.progress); // Available
|
|
137
138
|
// console.log(job.result); // Compile error
|
|
138
139
|
}
|
|
139
140
|
if (job.status === 'complete') {
|
|
140
141
|
console.log(job.result); // Available
|
|
141
|
-
// console.log(job
|
|
142
|
+
// console.log(job.progress); // Compile error
|
|
142
143
|
}
|
|
143
144
|
}
|
|
144
145
|
```
|
|
@@ -148,7 +149,7 @@ function handleJob(job: AnyJob) {
|
|
|
148
149
|
Jobs are stored as individual JSON files organized by status:
|
|
149
150
|
|
|
150
151
|
```
|
|
151
|
-
|
|
152
|
+
{project.jobsDir}/
|
|
152
153
|
pending/job-abc123.json
|
|
153
154
|
running/job-def456.json
|
|
154
155
|
complete/job-ghi789.json
|
|
@@ -172,7 +173,7 @@ Apache-2.0
|
|
|
172
173
|
## Related Packages
|
|
173
174
|
|
|
174
175
|
- [`@semiont/core`](../core/) — Domain types, `SemiontProject`, EventBus
|
|
175
|
-
- [`@semiont/sdk`](../sdk/) — `SemiontSession`, `
|
|
176
|
-
- [`@semiont/
|
|
176
|
+
- [`@semiont/sdk`](../sdk/) — `SemiontSession`, `WorkerBus`
|
|
177
|
+
- [`@semiont/http-transport`](../http-transport/) — HTTP transport, OpenAPI types
|
|
177
178
|
- [`@semiont/inference`](../inference/) — AI inference client
|
|
178
179
|
- [`@semiont/make-meaning`](../make-meaning/) — Actor model, Knowledge Base, service orchestration
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { ResourceId, JobId, UserId, EntityType, AnnotationId, Annotation, GatheredContext, TagSchema, Logger, EventBus, components } from '@semiont/core';
|
|
1
|
+
import { JobId, UserId, ResourceId, EntityType, AnnotationId, Annotation, GatheredContext, TagSchema, Logger, EventBus, components, SupportedMediaType } from '@semiont/core';
|
|
3
2
|
import { SemiontProject } from '@semiont/core/node';
|
|
4
3
|
import { InferenceClient } from '@semiont/inference';
|
|
5
4
|
|
|
@@ -16,12 +15,6 @@ import { InferenceClient } from '@semiont/inference';
|
|
|
16
15
|
* - State machine is explicit and type-safe
|
|
17
16
|
*/
|
|
18
17
|
|
|
19
|
-
/**
|
|
20
|
-
* Content fetcher - turns a ResourceId into a readable stream.
|
|
21
|
-
* Workers use this to access resource content on demand.
|
|
22
|
-
* The implementation is provided by the backend at startup.
|
|
23
|
-
*/
|
|
24
|
-
type ContentFetcher = (resourceId: ResourceId) => Promise<Readable | null>;
|
|
25
18
|
type JobType = 'reference-annotation' | 'generation' | 'highlight-annotation' | 'assessment-annotation' | 'comment-annotation' | 'tag-annotation';
|
|
26
19
|
type JobStatus = 'pending' | 'running' | 'complete' | 'failed' | 'cancelled';
|
|
27
20
|
/**
|
|
@@ -31,6 +24,13 @@ interface JobMetadata {
|
|
|
31
24
|
id: JobId;
|
|
32
25
|
type: JobType;
|
|
33
26
|
userId: UserId;
|
|
27
|
+
/**
|
|
28
|
+
* Audit-only snapshot of the requesting user (with `userEmail` and
|
|
29
|
+
* `userDomain` below), stamped at job creation and persisted in the
|
|
30
|
+
* on-disk job file. No code path reads these back — annotation
|
|
31
|
+
* `creator` attribution is derived from `userId` via `didToAgent()`.
|
|
32
|
+
* Kept intentionally so job files are self-describing to a human.
|
|
33
|
+
*/
|
|
34
34
|
userName: string;
|
|
35
35
|
userEmail: string;
|
|
36
36
|
userDomain: string;
|
|
@@ -327,7 +327,22 @@ interface JobQueue {
|
|
|
327
327
|
createJob(job: AnyJob): Promise<void>;
|
|
328
328
|
getJob(jobId: JobId): Promise<AnyJob | null>;
|
|
329
329
|
updateJob(job: AnyJob, oldStatus?: JobStatus): Promise<void>;
|
|
330
|
-
|
|
330
|
+
/** Move a running job to `complete`. Returns false if the job isn't running. */
|
|
331
|
+
completeJob(jobId: JobId, result: Record<string, unknown>): Promise<boolean>;
|
|
332
|
+
/**
|
|
333
|
+
* Move a running job back to `pending` (retry, re-announced) while
|
|
334
|
+
* `retryCount < maxRetries`, else to `failed`. Returns what happened,
|
|
335
|
+
* or null if the job isn't running.
|
|
336
|
+
*/
|
|
337
|
+
failJob(jobId: JobId, error: string): Promise<'retried' | 'failed' | null>;
|
|
338
|
+
/** Write progress into a running job's file (throttled, best-effort). */
|
|
339
|
+
recordProgress(jobId: JobId, progress: Record<string, unknown>): Promise<void>;
|
|
340
|
+
/**
|
|
341
|
+
* Cancel all pending jobs in a category — 'generation' is the
|
|
342
|
+
* `generation` type; 'annotation' is every `*-annotation` type.
|
|
343
|
+
* Running jobs are left to finish. Returns the number cancelled.
|
|
344
|
+
*/
|
|
345
|
+
cancelPendingJobs(category: 'annotation' | 'generation'): Promise<number>;
|
|
331
346
|
cancelJob(jobId: JobId): Promise<boolean>;
|
|
332
347
|
getStats(): Promise<{
|
|
333
348
|
pending: number;
|
|
@@ -342,33 +357,40 @@ interface JobQueue {
|
|
|
342
357
|
* Job Queue Manager
|
|
343
358
|
*
|
|
344
359
|
* Filesystem-based job queue with atomic operations.
|
|
345
|
-
* Jobs are stored in directories by status
|
|
360
|
+
* Jobs are stored in directories by status; status transitions are
|
|
361
|
+
* atomic delete + write across directories.
|
|
346
362
|
*/
|
|
347
363
|
|
|
348
364
|
declare class FsJobQueue implements JobQueue {
|
|
349
365
|
private eventBus?;
|
|
350
366
|
private jobsDir;
|
|
351
367
|
private logger;
|
|
352
|
-
private
|
|
353
|
-
private
|
|
354
|
-
|
|
368
|
+
private reannounceTimer;
|
|
369
|
+
private cleanupTimer;
|
|
370
|
+
/** Per-job timestamp of the last progress write, for throttling. */
|
|
371
|
+
private lastProgressWrite;
|
|
355
372
|
constructor(project: SemiontProject, logger: Logger, eventBus?: EventBus | undefined);
|
|
356
373
|
/**
|
|
357
|
-
* Initialize job queue directories,
|
|
374
|
+
* Initialize job queue directories, announce any pending backlog,
|
|
375
|
+
* and start the re-announce interval. Idempotent.
|
|
358
376
|
*/
|
|
359
377
|
initialize(): Promise<void>;
|
|
360
378
|
/**
|
|
361
|
-
*
|
|
379
|
+
* Stop the re-announce and retention intervals
|
|
362
380
|
*/
|
|
363
381
|
destroy(): void;
|
|
364
382
|
/**
|
|
365
|
-
*
|
|
383
|
+
* Emit `job:queued` for a pending job, if an EventBus is wired and
|
|
384
|
+
* the job carries a `resourceId` (every current job type does).
|
|
366
385
|
*/
|
|
367
|
-
private
|
|
386
|
+
private announce;
|
|
368
387
|
/**
|
|
369
|
-
*
|
|
388
|
+
* Announce every job currently in `pending/`. Files that vanish or
|
|
389
|
+
* fail to parse mid-scan (claimed, cancelled, partially written)
|
|
390
|
+
* are skipped — they're either gone for a good reason or picked up
|
|
391
|
+
* on the next tick.
|
|
370
392
|
*/
|
|
371
|
-
private
|
|
393
|
+
private announcePendingJobs;
|
|
372
394
|
/**
|
|
373
395
|
* Create a new job
|
|
374
396
|
*/
|
|
@@ -382,10 +404,25 @@ declare class FsJobQueue implements JobQueue {
|
|
|
382
404
|
*/
|
|
383
405
|
updateJob(job: AnyJob, oldStatus?: JobStatus): Promise<void>;
|
|
384
406
|
/**
|
|
385
|
-
*
|
|
386
|
-
*
|
|
407
|
+
* Move a running job to `complete`. Returns false (and changes
|
|
408
|
+
* nothing) if the job is missing or not running — which also makes
|
|
409
|
+
* duplicate `job:complete` events harmless.
|
|
410
|
+
*/
|
|
411
|
+
completeJob(jobId: JobId, result: Record<string, unknown>): Promise<boolean>;
|
|
412
|
+
/**
|
|
413
|
+
* Retry-or-fail a running job. While `retryCount < maxRetries` the
|
|
414
|
+
* job goes back to `pending` with the count bumped (and is
|
|
415
|
+
* re-announced); after that it lands in `failed` with the error.
|
|
416
|
+
* Returns null (and changes nothing) if the job isn't running.
|
|
417
|
+
*/
|
|
418
|
+
failJob(jobId: JobId, error: string): Promise<'retried' | 'failed' | null>;
|
|
419
|
+
/**
|
|
420
|
+
* Write progress into a running job's file. Throttled per job, and
|
|
421
|
+
* a no-op for jobs that aren't running. Beyond surfacing live
|
|
422
|
+
* progress to `job:status-requested`, each write refreshes the
|
|
423
|
+
* file's mtime — the heartbeat `recoverStaleRunningJobs` watches.
|
|
387
424
|
*/
|
|
388
|
-
|
|
425
|
+
recordProgress(jobId: JobId, progress: Record<string, unknown>): Promise<void>;
|
|
389
426
|
/**
|
|
390
427
|
* List jobs with filters
|
|
391
428
|
*/
|
|
@@ -394,6 +431,21 @@ declare class FsJobQueue implements JobQueue {
|
|
|
394
431
|
* Cancel a job
|
|
395
432
|
*/
|
|
396
433
|
cancelJob(jobId: JobId): Promise<boolean>;
|
|
434
|
+
/**
|
|
435
|
+
* Cancel all pending jobs in a category — the granularity of the
|
|
436
|
+
* `job:cancel-requested` UI signal. Running jobs are left to finish:
|
|
437
|
+
* interrupting a worker mid-inference would need a worker-side kill
|
|
438
|
+
* channel that doesn't exist.
|
|
439
|
+
*/
|
|
440
|
+
cancelPendingJobs(category: 'annotation' | 'generation'): Promise<number>;
|
|
441
|
+
/**
|
|
442
|
+
* Recover running jobs orphaned by a dead worker: any `running/`
|
|
443
|
+
* file whose mtime is older than the stale window is fed through
|
|
444
|
+
* the same retry-or-fail path as `job:fail`. Progress writes
|
|
445
|
+
* refresh the mtime, so a live worker is never recovered out from
|
|
446
|
+
* under itself as long as it reports within the window.
|
|
447
|
+
*/
|
|
448
|
+
recoverStaleRunningJobs(): Promise<number>;
|
|
397
449
|
/**
|
|
398
450
|
* Clean up old completed/failed jobs (older than retention period)
|
|
399
451
|
*/
|
|
@@ -447,7 +499,7 @@ declare function processTagJob(content: string, inferenceClient: InferenceClient
|
|
|
447
499
|
declare function processGenerationJob(inferenceClient: InferenceClient, params: GenerationParams, onProgress: OnProgress, logger: Logger): Promise<{
|
|
448
500
|
content: string;
|
|
449
501
|
title: string;
|
|
450
|
-
format:
|
|
502
|
+
format: SupportedMediaType;
|
|
451
503
|
result: GenerationResult;
|
|
452
504
|
}>;
|
|
453
505
|
|
|
@@ -503,16 +555,11 @@ interface TagMatch {
|
|
|
503
555
|
* 2. Call AI inference
|
|
504
556
|
* 3. Parse and validate results using MotivationParsers
|
|
505
557
|
*
|
|
506
|
-
* All methods take content as a string parameter
|
|
507
|
-
*
|
|
558
|
+
* All methods take content as a string parameter — the worker process
|
|
559
|
+
* fetches it and hands it in.
|
|
508
560
|
*/
|
|
509
561
|
|
|
510
562
|
declare class AnnotationDetection {
|
|
511
|
-
/**
|
|
512
|
-
* Fetch content from a ContentFetcher and read the stream to a string.
|
|
513
|
-
* Shared helper for all workers.
|
|
514
|
-
*/
|
|
515
|
-
static fetchContent(contentFetcher: ContentFetcher, resourceId: ResourceId): Promise<string>;
|
|
516
563
|
/**
|
|
517
564
|
* Detect comments in content.
|
|
518
565
|
*
|
|
@@ -576,4 +623,4 @@ declare function generateResourceFromTopic(topic: string, entityTypes: string[],
|
|
|
576
623
|
}>;
|
|
577
624
|
|
|
578
625
|
export { AnnotationDetection, FsJobQueue, generateResourceFromTopic, isCancelledJob, isCompleteJob, isFailedJob, isPendingJob, isRunningJob, processAssessmentJob, processCommentJob, processGenerationJob, processHighlightJob, processReferenceJob, processTagJob };
|
|
579
|
-
export type { AnyJob, AssessmentDetectionJob, AssessmentDetectionParams, AssessmentDetectionProgress, AssessmentDetectionResult, CancelledJob, CommentDetectionJob, CommentDetectionParams, CommentDetectionProgress, CommentDetectionResult, CompleteJob,
|
|
626
|
+
export type { AnyJob, AssessmentDetectionJob, AssessmentDetectionParams, AssessmentDetectionProgress, AssessmentDetectionResult, CancelledJob, CommentDetectionJob, CommentDetectionParams, CommentDetectionProgress, CommentDetectionResult, CompleteJob, DetectionJob, DetectionParams, DetectionProgress, DetectionResult, FailedJob, GenerationJob, GenerationParams, GenerationResult, HighlightDetectionJob, HighlightDetectionParams, HighlightDetectionProgress, HighlightDetectionResult, JobMetadata, JobQueryFilters, JobQueue, JobStatus, JobType, OnProgress, PendingJob, ProcessorResult, RunningJob, TagDetectionJob, TagDetectionParams, TagDetectionProgress, TagDetectionResult, YieldProgress };
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
|
-
import { promises
|
|
1
|
+
import { promises } from 'fs';
|
|
2
2
|
import * as path from 'path';
|
|
3
|
-
import { reconcileSelector, getLocaleEnglishName, didToAgent } from '@semiont/core';
|
|
3
|
+
import { jobId, reconcileSelector, getLocaleEnglishName, didToAgent } from '@semiont/core';
|
|
4
4
|
import { generateAnnotationId } from '@semiont/event-sourcing';
|
|
5
5
|
|
|
6
6
|
// src/fs-job-queue.ts
|
|
7
|
+
var REANNOUNCE_INTERVAL_MS = 3e4;
|
|
8
|
+
var STALE_RUNNING_MS = 30 * 6e4;
|
|
9
|
+
var PROGRESS_WRITE_MIN_INTERVAL_MS = 5e3;
|
|
10
|
+
var RETENTION_HOURS = 24;
|
|
11
|
+
var CLEANUP_INTERVAL_MS = 36e5;
|
|
7
12
|
var FsJobQueue = class {
|
|
8
13
|
constructor(project, logger, eventBus) {
|
|
9
14
|
this.eventBus = eventBus;
|
|
@@ -13,12 +18,13 @@ var FsJobQueue = class {
|
|
|
13
18
|
eventBus;
|
|
14
19
|
jobsDir;
|
|
15
20
|
logger;
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
21
|
+
reannounceTimer = null;
|
|
22
|
+
cleanupTimer = null;
|
|
23
|
+
/** Per-job timestamp of the last progress write, for throttling. */
|
|
24
|
+
lastProgressWrite = /* @__PURE__ */ new Map();
|
|
20
25
|
/**
|
|
21
|
-
* Initialize job queue directories,
|
|
26
|
+
* Initialize job queue directories, announce any pending backlog,
|
|
27
|
+
* and start the re-announce interval. Idempotent.
|
|
22
28
|
*/
|
|
23
29
|
async initialize() {
|
|
24
30
|
const statuses = ["pending", "running", "complete", "failed", "cancelled"];
|
|
@@ -26,62 +32,83 @@ var FsJobQueue = class {
|
|
|
26
32
|
const dir = path.join(this.jobsDir, status);
|
|
27
33
|
await promises.mkdir(dir, { recursive: true });
|
|
28
34
|
}
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
35
|
+
if (this.eventBus && !this.reannounceTimer) {
|
|
36
|
+
await this.announcePendingJobs();
|
|
37
|
+
this.reannounceTimer = setInterval(() => {
|
|
38
|
+
this.announcePendingJobs().catch((error) => {
|
|
39
|
+
this.logger.warn("Pending-job re-announce failed", {
|
|
40
|
+
error: error instanceof Error ? error.message : String(error)
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
this.recoverStaleRunningJobs().catch((error) => {
|
|
44
|
+
this.logger.warn("Stale-running recovery failed", {
|
|
45
|
+
error: error instanceof Error ? error.message : String(error)
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
}, REANNOUNCE_INTERVAL_MS);
|
|
49
|
+
this.reannounceTimer.unref?.();
|
|
50
|
+
}
|
|
51
|
+
if (!this.cleanupTimer) {
|
|
52
|
+
this.cleanupTimer = setInterval(() => {
|
|
53
|
+
this.cleanupOldJobs(RETENTION_HOURS).catch((error) => {
|
|
54
|
+
this.logger.warn("Job retention cleanup failed", {
|
|
55
|
+
error: error instanceof Error ? error.message : String(error)
|
|
56
|
+
});
|
|
57
|
+
});
|
|
58
|
+
}, CLEANUP_INTERVAL_MS);
|
|
59
|
+
this.cleanupTimer.unref?.();
|
|
39
60
|
}
|
|
40
61
|
this.logger.info("Job queue initialized");
|
|
41
62
|
}
|
|
42
63
|
/**
|
|
43
|
-
*
|
|
64
|
+
* Stop the re-announce and retention intervals
|
|
44
65
|
*/
|
|
45
66
|
destroy() {
|
|
46
|
-
if (this.
|
|
47
|
-
this.
|
|
48
|
-
this.
|
|
67
|
+
if (this.reannounceTimer) {
|
|
68
|
+
clearInterval(this.reannounceTimer);
|
|
69
|
+
this.reannounceTimer = null;
|
|
49
70
|
}
|
|
50
|
-
if (this.
|
|
51
|
-
|
|
52
|
-
this.
|
|
71
|
+
if (this.cleanupTimer) {
|
|
72
|
+
clearInterval(this.cleanupTimer);
|
|
73
|
+
this.cleanupTimer = null;
|
|
53
74
|
}
|
|
54
75
|
}
|
|
55
76
|
/**
|
|
56
|
-
*
|
|
77
|
+
* Emit `job:queued` for a pending job, if an EventBus is wired and
|
|
78
|
+
* the job carries a `resourceId` (every current job type does).
|
|
57
79
|
*/
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
const content = await promises.readFile(path.join(pendingDir, file), "utf-8");
|
|
67
|
-
jobs.push(JSON.parse(content));
|
|
68
|
-
} catch {
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
this.pendingQueue = jobs;
|
|
72
|
-
} catch {
|
|
73
|
-
this.pendingQueue = [];
|
|
80
|
+
announce(job) {
|
|
81
|
+
if (this.eventBus && "params" in job && "resourceId" in job.params) {
|
|
82
|
+
this.eventBus.get("job:queued").next({
|
|
83
|
+
jobId: job.metadata.id,
|
|
84
|
+
jobType: job.metadata.type,
|
|
85
|
+
resourceId: job.params.resourceId,
|
|
86
|
+
userId: job.metadata.userId
|
|
87
|
+
});
|
|
74
88
|
}
|
|
75
89
|
}
|
|
76
90
|
/**
|
|
77
|
-
*
|
|
91
|
+
* Announce every job currently in `pending/`. Files that vanish or
|
|
92
|
+
* fail to parse mid-scan (claimed, cancelled, partially written)
|
|
93
|
+
* are skipped — they're either gone for a good reason or picked up
|
|
94
|
+
* on the next tick.
|
|
78
95
|
*/
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
await
|
|
84
|
-
}
|
|
96
|
+
async announcePendingJobs() {
|
|
97
|
+
const pendingDir = path.join(this.jobsDir, "pending");
|
|
98
|
+
let files;
|
|
99
|
+
try {
|
|
100
|
+
files = await promises.readdir(pendingDir);
|
|
101
|
+
} catch {
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
files.sort();
|
|
105
|
+
for (const file of files) {
|
|
106
|
+
try {
|
|
107
|
+
const content = await promises.readFile(path.join(pendingDir, file), "utf-8");
|
|
108
|
+
this.announce(JSON.parse(content));
|
|
109
|
+
} catch {
|
|
110
|
+
}
|
|
111
|
+
}
|
|
85
112
|
}
|
|
86
113
|
/**
|
|
87
114
|
* Create a new job
|
|
@@ -91,16 +118,7 @@ var FsJobQueue = class {
|
|
|
91
118
|
await promises.writeFile(jobPath, JSON.stringify(job, null, 2), "utf-8");
|
|
92
119
|
this.logger.info("Job created", { jobId: job.metadata.id, status: job.status });
|
|
93
120
|
if (job.status === "pending") {
|
|
94
|
-
this.
|
|
95
|
-
this.pendingQueue.sort((a, b) => a.metadata.id.localeCompare(b.metadata.id));
|
|
96
|
-
}
|
|
97
|
-
if (this.eventBus && "params" in job && "resourceId" in job.params) {
|
|
98
|
-
this.eventBus.get("job:queued").next({
|
|
99
|
-
jobId: job.metadata.id,
|
|
100
|
-
jobType: job.metadata.type,
|
|
101
|
-
resourceId: job.params.resourceId,
|
|
102
|
-
userId: job.metadata.userId
|
|
103
|
-
});
|
|
121
|
+
this.announce(job);
|
|
104
122
|
}
|
|
105
123
|
}
|
|
106
124
|
/**
|
|
@@ -129,34 +147,92 @@ var FsJobQueue = class {
|
|
|
129
147
|
await promises.unlink(oldPath);
|
|
130
148
|
} catch (error) {
|
|
131
149
|
}
|
|
132
|
-
if (oldStatus === "pending") {
|
|
133
|
-
const idx = this.pendingQueue.findIndex((j) => j.metadata.id === job.metadata.id);
|
|
134
|
-
if (idx !== -1) this.pendingQueue.splice(idx, 1);
|
|
135
|
-
}
|
|
136
|
-
if (job.status === "pending") {
|
|
137
|
-
this.pendingQueue.push(job);
|
|
138
|
-
this.pendingQueue.sort((a, b) => a.metadata.id.localeCompare(b.metadata.id));
|
|
139
|
-
}
|
|
140
150
|
}
|
|
141
151
|
const newPath = this.getJobPath(job.metadata.id, job.status);
|
|
142
152
|
await promises.writeFile(newPath, JSON.stringify(job, null, 2), "utf-8");
|
|
143
153
|
if (oldStatus && oldStatus !== job.status) {
|
|
144
154
|
this.logger.info("Job moved", { jobId: job.metadata.id, oldStatus, newStatus: job.status });
|
|
155
|
+
if (job.status === "pending") {
|
|
156
|
+
this.announce(job);
|
|
157
|
+
}
|
|
145
158
|
} else {
|
|
146
159
|
this.logger.info("Job updated", { jobId: job.metadata.id, status: job.status });
|
|
147
160
|
}
|
|
148
161
|
}
|
|
149
162
|
/**
|
|
150
|
-
*
|
|
151
|
-
*
|
|
163
|
+
* Move a running job to `complete`. Returns false (and changes
|
|
164
|
+
* nothing) if the job is missing or not running — which also makes
|
|
165
|
+
* duplicate `job:complete` events harmless.
|
|
152
166
|
*/
|
|
153
|
-
async
|
|
154
|
-
|
|
155
|
-
|
|
167
|
+
async completeJob(jobId, result) {
|
|
168
|
+
const job = await this.getJob(jobId);
|
|
169
|
+
if (!job || job.status !== "running") {
|
|
170
|
+
return false;
|
|
156
171
|
}
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
172
|
+
this.lastProgressWrite.delete(jobId);
|
|
173
|
+
const completed = {
|
|
174
|
+
status: "complete",
|
|
175
|
+
metadata: job.metadata,
|
|
176
|
+
params: job.params,
|
|
177
|
+
startedAt: job.startedAt,
|
|
178
|
+
completedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
179
|
+
result
|
|
180
|
+
};
|
|
181
|
+
await this.updateJob(completed, "running");
|
|
182
|
+
return true;
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Retry-or-fail a running job. While `retryCount < maxRetries` the
|
|
186
|
+
* job goes back to `pending` with the count bumped (and is
|
|
187
|
+
* re-announced); after that it lands in `failed` with the error.
|
|
188
|
+
* Returns null (and changes nothing) if the job isn't running.
|
|
189
|
+
*/
|
|
190
|
+
async failJob(jobId, error) {
|
|
191
|
+
const job = await this.getJob(jobId);
|
|
192
|
+
if (!job || job.status !== "running") {
|
|
193
|
+
return null;
|
|
194
|
+
}
|
|
195
|
+
this.lastProgressWrite.delete(jobId);
|
|
196
|
+
if (job.metadata.retryCount < job.metadata.maxRetries) {
|
|
197
|
+
const retried = {
|
|
198
|
+
status: "pending",
|
|
199
|
+
metadata: { ...job.metadata, retryCount: job.metadata.retryCount + 1 },
|
|
200
|
+
params: job.params
|
|
201
|
+
};
|
|
202
|
+
await this.updateJob(retried, "running");
|
|
203
|
+
return "retried";
|
|
204
|
+
}
|
|
205
|
+
const failed = {
|
|
206
|
+
status: "failed",
|
|
207
|
+
metadata: job.metadata,
|
|
208
|
+
params: job.params,
|
|
209
|
+
startedAt: job.startedAt,
|
|
210
|
+
completedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
211
|
+
error
|
|
212
|
+
};
|
|
213
|
+
await this.updateJob(failed, "running");
|
|
214
|
+
return "failed";
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Write progress into a running job's file. Throttled per job, and
|
|
218
|
+
* a no-op for jobs that aren't running. Beyond surfacing live
|
|
219
|
+
* progress to `job:status-requested`, each write refreshes the
|
|
220
|
+
* file's mtime — the heartbeat `recoverStaleRunningJobs` watches.
|
|
221
|
+
*/
|
|
222
|
+
async recordProgress(jobId, progress) {
|
|
223
|
+
const now = Date.now();
|
|
224
|
+
const lastWrite = this.lastProgressWrite.get(jobId) ?? 0;
|
|
225
|
+
if (now - lastWrite < PROGRESS_WRITE_MIN_INTERVAL_MS) {
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
228
|
+
this.lastProgressWrite.set(jobId, now);
|
|
229
|
+
const job = await this.getJob(jobId);
|
|
230
|
+
if (!job || job.status !== "running") {
|
|
231
|
+
this.lastProgressWrite.delete(jobId);
|
|
232
|
+
return;
|
|
233
|
+
}
|
|
234
|
+
const updated = { ...job, progress };
|
|
235
|
+
await promises.writeFile(this.getJobPath(jobId, "running"), JSON.stringify(updated, null, 2), "utf-8");
|
|
160
236
|
}
|
|
161
237
|
/**
|
|
162
238
|
* List jobs with filters
|
|
@@ -207,6 +283,63 @@ var FsJobQueue = class {
|
|
|
207
283
|
await this.updateJob(cancelledJob, oldStatus);
|
|
208
284
|
return true;
|
|
209
285
|
}
|
|
286
|
+
/**
|
|
287
|
+
* Cancel all pending jobs in a category — the granularity of the
|
|
288
|
+
* `job:cancel-requested` UI signal. Running jobs are left to finish:
|
|
289
|
+
* interrupting a worker mid-inference would need a worker-side kill
|
|
290
|
+
* channel that doesn't exist.
|
|
291
|
+
*/
|
|
292
|
+
async cancelPendingJobs(category) {
|
|
293
|
+
const matches = category === "generation" ? (type) => type === "generation" : (type) => type.endsWith("-annotation");
|
|
294
|
+
const pending = await this.listJobs({ status: "pending", limit: Number.MAX_SAFE_INTEGER });
|
|
295
|
+
let cancelled = 0;
|
|
296
|
+
for (const job of pending) {
|
|
297
|
+
if (!matches(job.metadata.type)) continue;
|
|
298
|
+
if (await this.cancelJob(job.metadata.id)) {
|
|
299
|
+
cancelled++;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
if (cancelled > 0) {
|
|
303
|
+
this.logger.info("Cancelled pending jobs", { category, cancelled });
|
|
304
|
+
}
|
|
305
|
+
return cancelled;
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Recover running jobs orphaned by a dead worker: any `running/`
|
|
309
|
+
* file whose mtime is older than the stale window is fed through
|
|
310
|
+
* the same retry-or-fail path as `job:fail`. Progress writes
|
|
311
|
+
* refresh the mtime, so a live worker is never recovered out from
|
|
312
|
+
* under itself as long as it reports within the window.
|
|
313
|
+
*/
|
|
314
|
+
async recoverStaleRunningJobs() {
|
|
315
|
+
const runningDir = path.join(this.jobsDir, "running");
|
|
316
|
+
let files;
|
|
317
|
+
try {
|
|
318
|
+
files = await promises.readdir(runningDir);
|
|
319
|
+
} catch {
|
|
320
|
+
return 0;
|
|
321
|
+
}
|
|
322
|
+
const now = Date.now();
|
|
323
|
+
let recovered = 0;
|
|
324
|
+
for (const file of files) {
|
|
325
|
+
if (!file.endsWith(".json")) continue;
|
|
326
|
+
try {
|
|
327
|
+
const stat = await promises.stat(path.join(runningDir, file));
|
|
328
|
+
if (now - stat.mtimeMs < STALE_RUNNING_MS) continue;
|
|
329
|
+
const staleId = jobId(file.slice(0, -".json".length));
|
|
330
|
+
const outcome = await this.failJob(
|
|
331
|
+
staleId,
|
|
332
|
+
`worker presumed dead \u2014 no progress within ${STALE_RUNNING_MS / 6e4} minutes`
|
|
333
|
+
);
|
|
334
|
+
if (outcome) {
|
|
335
|
+
this.logger.warn("Recovered stale running job", { jobId: staleId, outcome });
|
|
336
|
+
recovered++;
|
|
337
|
+
}
|
|
338
|
+
} catch {
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
return recovered;
|
|
342
|
+
}
|
|
210
343
|
/**
|
|
211
344
|
* Clean up old completed/failed jobs (older than retention period)
|
|
212
345
|
*/
|
|
@@ -805,21 +938,6 @@ function logAnchorMethod(motivation, exact, anchorMethod) {
|
|
|
805
938
|
|
|
806
939
|
// src/workers/annotation-detection.ts
|
|
807
940
|
var AnnotationDetection = class {
|
|
808
|
-
/**
|
|
809
|
-
* Fetch content from a ContentFetcher and read the stream to a string.
|
|
810
|
-
* Shared helper for all workers.
|
|
811
|
-
*/
|
|
812
|
-
static async fetchContent(contentFetcher, resourceId) {
|
|
813
|
-
const stream = await contentFetcher(resourceId);
|
|
814
|
-
if (!stream) {
|
|
815
|
-
throw new Error(`Could not load content for resource ${resourceId}`);
|
|
816
|
-
}
|
|
817
|
-
const chunks = [];
|
|
818
|
-
for await (const chunk of stream) {
|
|
819
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
820
|
-
}
|
|
821
|
-
return Buffer.concat(chunks).toString("utf-8");
|
|
822
|
-
}
|
|
823
941
|
/**
|
|
824
942
|
* Detect comments in content.
|
|
825
943
|
*
|
|
@@ -1065,10 +1183,19 @@ Knowledge graph context:
|
|
|
1065
1183
|
${parts.join("\n")}`;
|
|
1066
1184
|
}
|
|
1067
1185
|
}
|
|
1186
|
+
let semanticContextSection = "";
|
|
1187
|
+
const similar = context?.semanticContext?.similar ?? [];
|
|
1188
|
+
if (similar.length > 0) {
|
|
1189
|
+
const lines = [...similar].sort((a, b) => b.score - a.score).slice(0, 3).map((m) => `- (${m.score.toFixed(2)}) ${m.text.slice(0, 240)}`);
|
|
1190
|
+
semanticContextSection = `
|
|
1191
|
+
|
|
1192
|
+
Related passages from the knowledge base:
|
|
1193
|
+
${lines.join("\n")}`;
|
|
1194
|
+
}
|
|
1068
1195
|
const structureGuidance = finalMaxTokens >= 1e3 ? "organized into titled sections (## Section) with well-structured paragraphs" : "organized into well-structured paragraphs";
|
|
1069
1196
|
const prompt = `Generate a concise, informative resource about "${topic}".
|
|
1070
1197
|
${entityTypes.length > 0 ? `Focus on these entity types: ${entityTypes.join(", ")}.` : ""}
|
|
1071
|
-
${userPrompt ? `Additional context: ${userPrompt}` : ""}${annotationSection}${contextSection}${graphContextSection}${sourceLanguageInstruction}${languageInstruction}
|
|
1198
|
+
${userPrompt ? `Additional context: ${userPrompt}` : ""}${annotationSection}${contextSection}${graphContextSection}${semanticContextSection}${sourceLanguageInstruction}${languageInstruction}
|
|
1072
1199
|
|
|
1073
1200
|
Requirements:
|
|
1074
1201
|
- Start with a clear heading (# Title)
|