@semiont/jobs 0.5.6 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -20,10 +20,11 @@ npm install @semiont/jobs
20
20
 
21
21
  **Dependencies:**
22
22
  - `@semiont/core` — Core types, `SemiontProject`, EventBus
23
- - `@semiont/sdk` — `SemiontSession`, `JobClaimAdapter` (worker process)
24
- - `@semiont/api-client` — HTTP transport, OpenAPI types
23
+ - `@semiont/sdk` — `SemiontSession`, `WorkerBus` (worker process)
24
+ - `@semiont/http-transport` — HTTP transport, OpenAPI types
25
25
  - `@semiont/inference` — InferenceClient for AI operations
26
26
  - `@semiont/content` — Content storage URI derivation
27
+ - `@semiont/event-sourcing` — Annotation id generation
27
28
  - `@semiont/observability` — Spans and job-outcome metrics
28
29
 
29
30
  ## Quick Start
@@ -88,16 +89,16 @@ interface JobMetadata {
88
89
  id: JobId;
89
90
  type: JobType;
90
91
  userId: UserId;
91
- userName: string; // For building W3C Agent creator
92
- userEmail: string; // For building W3C Agent creator
93
- userDomain: string; // For building W3C Agent creator
92
+ userName: string; // Audit-only snapshot of the requesting user
93
+ userEmail: string; // Audit-only snapshot of the requesting user
94
+ userDomain: string; // Audit-only snapshot of the requesting user
94
95
  created: string;
95
96
  retryCount: number;
96
97
  maxRetries: number;
97
98
  }
98
99
  ```
99
100
 
100
- The `userName`, `userEmail`, and `userDomain` fields are used by workers to build the W3C `Agent` for annotation `creator` attribution via `userToAgent()`.
101
+ The `userName`, `userEmail`, and `userDomain` fields are an audit-only snapshot of the requesting user, persisted in the on-disk job file. Workers derive annotation `creator` attribution from `userId` via `didToAgent()`.
101
102
 
102
103
  ## Annotation Workers
103
104
 
@@ -112,9 +113,9 @@ The worker process (`worker-main.ts` → `startWorkerProcess` in `worker-process
112
113
  | `comment-annotation` | `processCommentJob` |
113
114
  | `tag-annotation` | `processTagJob` |
114
115
 
115
- Detection logic lives in the `AnnotationDetection` class (`src/workers/annotation-detection.ts`); generation synthesis in `generateResourceFromTopic()` (`src/workers/generation/resource-generation.ts`). Each processor fetches content via `session.client.browse.resourceContent(resourceId)`.
116
+ Detection logic lives in the `AnnotationDetection` class (`src/workers/annotation-detection.ts`); generation synthesis in `generateResourceFromTopic()` (`src/workers/generation/resource-generation.ts`). Processors never fetch content themselves — the worker process fetches it via `session.client.browse.resourceContent(resourceId)` and passes it in.
116
117
 
117
- Workers emit bus events via `session.client.transport.emit('mark:create' | 'job:start' | 'job:report-progress' | 'job:complete' | 'job:fail', payload)` — the Stower actor in @semiont/make-meaning handles persistence.
118
+ Workers emit bus events via `session.client.transport.emit('mark:create' | 'job:start' | 'job:report-progress' | 'job:complete' | 'job:fail', payload)` — the Stower actor in @semiont/make-meaning handles persistence to the event log, and the job command handlers mirror the same events into the queue files (completion, retry-on-failure with `maxRetries`, progress-as-heartbeat).
118
119
 
119
120
  ## Adding a Job Type
120
121
 
@@ -133,12 +134,12 @@ Jobs use TypeScript discriminated unions for type safety:
133
134
  ```typescript
134
135
  function handleJob(job: AnyJob) {
135
136
  if (job.status === 'running') {
136
- console.log(job:progress); // Available
137
+ console.log(job.progress); // Available
137
138
  // console.log(job.result); // Compile error
138
139
  }
139
140
  if (job.status === 'complete') {
140
141
  console.log(job.result); // Available
141
- // console.log(job:progress); // Compile error
142
+ // console.log(job.progress); // Compile error
142
143
  }
143
144
  }
144
145
  ```
@@ -148,7 +149,7 @@ function handleJob(job: AnyJob) {
148
149
  Jobs are stored as individual JSON files organized by status:
149
150
 
150
151
  ```
151
- data/jobs/
152
+ {project.jobsDir}/
152
153
  pending/job-abc123.json
153
154
  running/job-def456.json
154
155
  complete/job-ghi789.json
@@ -172,7 +173,7 @@ Apache-2.0
172
173
  ## Related Packages
173
174
 
174
175
  - [`@semiont/core`](../core/) — Domain types, `SemiontProject`, EventBus
175
- - [`@semiont/sdk`](../sdk/) — `SemiontSession`, `JobClaimAdapter`
176
- - [`@semiont/api-client`](../api-client/) — HTTP transport, OpenAPI types
176
+ - [`@semiont/sdk`](../sdk/) — `SemiontSession`, `WorkerBus`
177
+ - [`@semiont/http-transport`](../http-transport/) — HTTP transport, OpenAPI types
177
178
  - [`@semiont/inference`](../inference/) — AI inference client
178
179
  - [`@semiont/make-meaning`](../make-meaning/) — Actor model, Knowledge Base, service orchestration
package/dist/index.d.ts CHANGED
@@ -1,5 +1,4 @@
1
- import { Readable } from 'stream';
2
- import { ResourceId, JobId, UserId, EntityType, AnnotationId, Annotation, GatheredContext, TagSchema, Logger, EventBus, components } from '@semiont/core';
1
+ import { JobId, UserId, ResourceId, EntityType, AnnotationId, Annotation, GatheredContext, TagSchema, Logger, EventBus, components, SupportedMediaType } from '@semiont/core';
3
2
  import { SemiontProject } from '@semiont/core/node';
4
3
  import { InferenceClient } from '@semiont/inference';
5
4
 
@@ -16,12 +15,6 @@ import { InferenceClient } from '@semiont/inference';
16
15
  * - State machine is explicit and type-safe
17
16
  */
18
17
 
19
- /**
20
- * Content fetcher - turns a ResourceId into a readable stream.
21
- * Workers use this to access resource content on demand.
22
- * The implementation is provided by the backend at startup.
23
- */
24
- type ContentFetcher = (resourceId: ResourceId) => Promise<Readable | null>;
25
18
  type JobType = 'reference-annotation' | 'generation' | 'highlight-annotation' | 'assessment-annotation' | 'comment-annotation' | 'tag-annotation';
26
19
  type JobStatus = 'pending' | 'running' | 'complete' | 'failed' | 'cancelled';
27
20
  /**
@@ -31,6 +24,13 @@ interface JobMetadata {
31
24
  id: JobId;
32
25
  type: JobType;
33
26
  userId: UserId;
27
+ /**
28
+ * Audit-only snapshot of the requesting user (with `userEmail` and
29
+ * `userDomain` below), stamped at job creation and persisted in the
30
+ * on-disk job file. No code path reads these back — annotation
31
+ * `creator` attribution is derived from `userId` via `didToAgent()`.
32
+ * Kept intentionally so job files are self-describing to a human.
33
+ */
34
34
  userName: string;
35
35
  userEmail: string;
36
36
  userDomain: string;
@@ -327,7 +327,22 @@ interface JobQueue {
327
327
  createJob(job: AnyJob): Promise<void>;
328
328
  getJob(jobId: JobId): Promise<AnyJob | null>;
329
329
  updateJob(job: AnyJob, oldStatus?: JobStatus): Promise<void>;
330
- pollNextPendingJob(predicate?: (job: AnyJob) => boolean): Promise<AnyJob | null>;
330
+ /** Move a running job to `complete`. Returns false if the job isn't running. */
331
+ completeJob(jobId: JobId, result: Record<string, unknown>): Promise<boolean>;
332
+ /**
333
+ * Move a running job back to `pending` (retry, re-announced) while
334
+ * `retryCount < maxRetries`, else to `failed`. Returns what happened,
335
+ * or null if the job isn't running.
336
+ */
337
+ failJob(jobId: JobId, error: string): Promise<'retried' | 'failed' | null>;
338
+ /** Write progress into a running job's file (throttled, best-effort). */
339
+ recordProgress(jobId: JobId, progress: Record<string, unknown>): Promise<void>;
340
+ /**
341
+ * Cancel all pending jobs in a category — 'generation' is the
342
+ * `generation` type; 'annotation' is every `*-annotation` type.
343
+ * Running jobs are left to finish. Returns the number cancelled.
344
+ */
345
+ cancelPendingJobs(category: 'annotation' | 'generation'): Promise<number>;
331
346
  cancelJob(jobId: JobId): Promise<boolean>;
332
347
  getStats(): Promise<{
333
348
  pending: number;
@@ -342,33 +357,40 @@ interface JobQueue {
342
357
  * Job Queue Manager
343
358
  *
344
359
  * Filesystem-based job queue with atomic operations.
345
- * Jobs are stored in directories by status for easy polling.
360
+ * Jobs are stored in directories by status; status transitions are
361
+ * atomic delete + write across directories.
346
362
  */
347
363
 
348
364
  declare class FsJobQueue implements JobQueue {
349
365
  private eventBus?;
350
366
  private jobsDir;
351
367
  private logger;
352
- private pendingQueue;
353
- private watcher;
354
- private loadDebounceTimer;
368
+ private reannounceTimer;
369
+ private cleanupTimer;
370
+ /** Per-job timestamp of the last progress write, for throttling. */
371
+ private lastProgressWrite;
355
372
  constructor(project: SemiontProject, logger: Logger, eventBus?: EventBus | undefined);
356
373
  /**
357
- * Initialize job queue directories, load pending jobs, and start fs.watch
374
+ * Initialize job queue directories, announce any pending backlog,
375
+ * and start the re-announce interval. Idempotent.
358
376
  */
359
377
  initialize(): Promise<void>;
360
378
  /**
361
- * Clean up watcher
379
+ * Stop the re-announce and retention intervals
362
380
  */
363
381
  destroy(): void;
364
382
  /**
365
- * Load pending jobs from disk into in-memory queue
383
+ * Emit `job:queued` for a pending job, if an EventBus is wired and
384
+ * the job carries a `resourceId` (every current job type does).
366
385
  */
367
- private loadPendingJobs;
386
+ private announce;
368
387
  /**
369
- * Debounced version of loadPendingJobs fs.watch can fire rapidly
388
+ * Announce every job currently in `pending/`. Files that vanish or
389
+ * fail to parse mid-scan (claimed, cancelled, partially written)
390
+ * are skipped — they're either gone for a good reason or picked up
391
+ * on the next tick.
370
392
  */
371
- private debouncedLoadPendingJobs;
393
+ private announcePendingJobs;
372
394
  /**
373
395
  * Create a new job
374
396
  */
@@ -382,10 +404,25 @@ declare class FsJobQueue implements JobQueue {
382
404
  */
383
405
  updateJob(job: AnyJob, oldStatus?: JobStatus): Promise<void>;
384
406
  /**
385
- * Poll for next pending job (FIFO) from in-memory queue.
386
- * If a predicate is provided, returns the first matching job (skipping non-matching ones).
407
+ * Move a running job to `complete`. Returns false (and changes
408
+ * nothing) if the job is missing or not running which also makes
409
+ * duplicate `job:complete` events harmless.
410
+ */
411
+ completeJob(jobId: JobId, result: Record<string, unknown>): Promise<boolean>;
412
+ /**
413
+ * Retry-or-fail a running job. While `retryCount < maxRetries` the
414
+ * job goes back to `pending` with the count bumped (and is
415
+ * re-announced); after that it lands in `failed` with the error.
416
+ * Returns null (and changes nothing) if the job isn't running.
417
+ */
418
+ failJob(jobId: JobId, error: string): Promise<'retried' | 'failed' | null>;
419
+ /**
420
+ * Write progress into a running job's file. Throttled per job, and
421
+ * a no-op for jobs that aren't running. Beyond surfacing live
422
+ * progress to `job:status-requested`, each write refreshes the
423
+ * file's mtime — the heartbeat `recoverStaleRunningJobs` watches.
387
424
  */
388
- pollNextPendingJob(predicate?: (job: AnyJob) => boolean): Promise<AnyJob | null>;
425
+ recordProgress(jobId: JobId, progress: Record<string, unknown>): Promise<void>;
389
426
  /**
390
427
  * List jobs with filters
391
428
  */
@@ -394,6 +431,21 @@ declare class FsJobQueue implements JobQueue {
394
431
  * Cancel a job
395
432
  */
396
433
  cancelJob(jobId: JobId): Promise<boolean>;
434
+ /**
435
+ * Cancel all pending jobs in a category — the granularity of the
436
+ * `job:cancel-requested` UI signal. Running jobs are left to finish:
437
+ * interrupting a worker mid-inference would need a worker-side kill
438
+ * channel that doesn't exist.
439
+ */
440
+ cancelPendingJobs(category: 'annotation' | 'generation'): Promise<number>;
441
+ /**
442
+ * Recover running jobs orphaned by a dead worker: any `running/`
443
+ * file whose mtime is older than the stale window is fed through
444
+ * the same retry-or-fail path as `job:fail`. Progress writes
445
+ * refresh the mtime, so a live worker is never recovered out from
446
+ * under itself as long as it reports within the window.
447
+ */
448
+ recoverStaleRunningJobs(): Promise<number>;
397
449
  /**
398
450
  * Clean up old completed/failed jobs (older than retention period)
399
451
  */
@@ -447,7 +499,7 @@ declare function processTagJob(content: string, inferenceClient: InferenceClient
447
499
  declare function processGenerationJob(inferenceClient: InferenceClient, params: GenerationParams, onProgress: OnProgress, logger: Logger): Promise<{
448
500
  content: string;
449
501
  title: string;
450
- format: string;
502
+ format: SupportedMediaType;
451
503
  result: GenerationResult;
452
504
  }>;
453
505
 
@@ -503,16 +555,11 @@ interface TagMatch {
503
555
  * 2. Call AI inference
504
556
  * 3. Parse and validate results using MotivationParsers
505
557
  *
506
- * All methods take content as a string parameter.
507
- * Workers are responsible for fetching content via ContentFetcher.
558
+ * All methods take content as a string parameter — the worker process
559
+ * fetches it and hands it in.
508
560
  */
509
561
 
510
562
  declare class AnnotationDetection {
511
- /**
512
- * Fetch content from a ContentFetcher and read the stream to a string.
513
- * Shared helper for all workers.
514
- */
515
- static fetchContent(contentFetcher: ContentFetcher, resourceId: ResourceId): Promise<string>;
516
563
  /**
517
564
  * Detect comments in content.
518
565
  *
@@ -576,4 +623,4 @@ declare function generateResourceFromTopic(topic: string, entityTypes: string[],
576
623
  }>;
577
624
 
578
625
  export { AnnotationDetection, FsJobQueue, generateResourceFromTopic, isCancelledJob, isCompleteJob, isFailedJob, isPendingJob, isRunningJob, processAssessmentJob, processCommentJob, processGenerationJob, processHighlightJob, processReferenceJob, processTagJob };
579
- export type { AnyJob, AssessmentDetectionJob, AssessmentDetectionParams, AssessmentDetectionProgress, AssessmentDetectionResult, CancelledJob, CommentDetectionJob, CommentDetectionParams, CommentDetectionProgress, CommentDetectionResult, CompleteJob, ContentFetcher, DetectionJob, DetectionParams, DetectionProgress, DetectionResult, FailedJob, GenerationJob, GenerationParams, GenerationResult, HighlightDetectionJob, HighlightDetectionParams, HighlightDetectionProgress, HighlightDetectionResult, JobMetadata, JobQueryFilters, JobQueue, JobStatus, JobType, OnProgress, PendingJob, ProcessorResult, RunningJob, TagDetectionJob, TagDetectionParams, TagDetectionProgress, TagDetectionResult, YieldProgress };
626
+ export type { AnyJob, AssessmentDetectionJob, AssessmentDetectionParams, AssessmentDetectionProgress, AssessmentDetectionResult, CancelledJob, CommentDetectionJob, CommentDetectionParams, CommentDetectionProgress, CommentDetectionResult, CompleteJob, DetectionJob, DetectionParams, DetectionProgress, DetectionResult, FailedJob, GenerationJob, GenerationParams, GenerationResult, HighlightDetectionJob, HighlightDetectionParams, HighlightDetectionProgress, HighlightDetectionResult, JobMetadata, JobQueryFilters, JobQueue, JobStatus, JobType, OnProgress, PendingJob, ProcessorResult, RunningJob, TagDetectionJob, TagDetectionParams, TagDetectionProgress, TagDetectionResult, YieldProgress };
package/dist/index.js CHANGED
@@ -1,9 +1,14 @@
1
- import { promises, watch } from 'fs';
1
+ import { promises } from 'fs';
2
2
  import * as path from 'path';
3
- import { reconcileSelector, getLocaleEnglishName, didToAgent } from '@semiont/core';
3
+ import { jobId, reconcileSelector, getLocaleEnglishName, didToAgent } from '@semiont/core';
4
4
  import { generateAnnotationId } from '@semiont/event-sourcing';
5
5
 
6
6
  // src/fs-job-queue.ts
7
+ var REANNOUNCE_INTERVAL_MS = 3e4;
8
+ var STALE_RUNNING_MS = 30 * 6e4;
9
+ var PROGRESS_WRITE_MIN_INTERVAL_MS = 5e3;
10
+ var RETENTION_HOURS = 24;
11
+ var CLEANUP_INTERVAL_MS = 36e5;
7
12
  var FsJobQueue = class {
8
13
  constructor(project, logger, eventBus) {
9
14
  this.eventBus = eventBus;
@@ -13,12 +18,13 @@ var FsJobQueue = class {
13
18
  eventBus;
14
19
  jobsDir;
15
20
  logger;
16
- // In-memory pending queue: avoids fs.readdir() on every poll (6×/sec with 6 workers)
17
- pendingQueue = [];
18
- watcher = null;
19
- loadDebounceTimer = null;
21
+ reannounceTimer = null;
22
+ cleanupTimer = null;
23
+ /** Per-job timestamp of the last progress write, for throttling. */
24
+ lastProgressWrite = /* @__PURE__ */ new Map();
20
25
  /**
21
- * Initialize job queue directories, load pending jobs, and start fs.watch
26
+ * Initialize job queue directories, announce any pending backlog,
27
+ * and start the re-announce interval. Idempotent.
22
28
  */
23
29
  async initialize() {
24
30
  const statuses = ["pending", "running", "complete", "failed", "cancelled"];
@@ -26,62 +32,83 @@ var FsJobQueue = class {
26
32
  const dir = path.join(this.jobsDir, status);
27
33
  await promises.mkdir(dir, { recursive: true });
28
34
  }
29
- await this.loadPendingJobs();
30
- const pendingDir = path.join(this.jobsDir, "pending");
31
- try {
32
- this.watcher = watch(pendingDir, () => {
33
- this.debouncedLoadPendingJobs();
34
- });
35
- } catch (error) {
36
- this.logger.warn("Failed to watch pending directory", {
37
- error: error instanceof Error ? error.message : String(error)
38
- });
35
+ if (this.eventBus && !this.reannounceTimer) {
36
+ await this.announcePendingJobs();
37
+ this.reannounceTimer = setInterval(() => {
38
+ this.announcePendingJobs().catch((error) => {
39
+ this.logger.warn("Pending-job re-announce failed", {
40
+ error: error instanceof Error ? error.message : String(error)
41
+ });
42
+ });
43
+ this.recoverStaleRunningJobs().catch((error) => {
44
+ this.logger.warn("Stale-running recovery failed", {
45
+ error: error instanceof Error ? error.message : String(error)
46
+ });
47
+ });
48
+ }, REANNOUNCE_INTERVAL_MS);
49
+ this.reannounceTimer.unref?.();
50
+ }
51
+ if (!this.cleanupTimer) {
52
+ this.cleanupTimer = setInterval(() => {
53
+ this.cleanupOldJobs(RETENTION_HOURS).catch((error) => {
54
+ this.logger.warn("Job retention cleanup failed", {
55
+ error: error instanceof Error ? error.message : String(error)
56
+ });
57
+ });
58
+ }, CLEANUP_INTERVAL_MS);
59
+ this.cleanupTimer.unref?.();
39
60
  }
40
61
  this.logger.info("Job queue initialized");
41
62
  }
42
63
  /**
43
- * Clean up watcher
64
+ * Stop the re-announce and retention intervals
44
65
  */
45
66
  destroy() {
46
- if (this.watcher) {
47
- this.watcher.close();
48
- this.watcher = null;
67
+ if (this.reannounceTimer) {
68
+ clearInterval(this.reannounceTimer);
69
+ this.reannounceTimer = null;
49
70
  }
50
- if (this.loadDebounceTimer) {
51
- clearTimeout(this.loadDebounceTimer);
52
- this.loadDebounceTimer = null;
71
+ if (this.cleanupTimer) {
72
+ clearInterval(this.cleanupTimer);
73
+ this.cleanupTimer = null;
53
74
  }
54
75
  }
55
76
  /**
56
- * Load pending jobs from disk into in-memory queue
77
+ * Emit `job:queued` for a pending job, if an EventBus is wired and
78
+ * the job carries a `resourceId` (every current job type does).
57
79
  */
58
- async loadPendingJobs() {
59
- const pendingDir = path.join(this.jobsDir, "pending");
60
- try {
61
- const files = await promises.readdir(pendingDir);
62
- files.sort();
63
- const jobs = [];
64
- for (const file of files) {
65
- try {
66
- const content = await promises.readFile(path.join(pendingDir, file), "utf-8");
67
- jobs.push(JSON.parse(content));
68
- } catch {
69
- }
70
- }
71
- this.pendingQueue = jobs;
72
- } catch {
73
- this.pendingQueue = [];
80
+ announce(job) {
81
+ if (this.eventBus && "params" in job && "resourceId" in job.params) {
82
+ this.eventBus.get("job:queued").next({
83
+ jobId: job.metadata.id,
84
+ jobType: job.metadata.type,
85
+ resourceId: job.params.resourceId,
86
+ userId: job.metadata.userId
87
+ });
74
88
  }
75
89
  }
76
90
  /**
77
- * Debounced version of loadPendingJobs fs.watch can fire rapidly
91
+ * Announce every job currently in `pending/`. Files that vanish or
92
+ * fail to parse mid-scan (claimed, cancelled, partially written)
93
+ * are skipped — they're either gone for a good reason or picked up
94
+ * on the next tick.
78
95
  */
79
- debouncedLoadPendingJobs() {
80
- if (this.loadDebounceTimer) return;
81
- this.loadDebounceTimer = setTimeout(async () => {
82
- this.loadDebounceTimer = null;
83
- await this.loadPendingJobs();
84
- }, 100);
96
+ async announcePendingJobs() {
97
+ const pendingDir = path.join(this.jobsDir, "pending");
98
+ let files;
99
+ try {
100
+ files = await promises.readdir(pendingDir);
101
+ } catch {
102
+ return;
103
+ }
104
+ files.sort();
105
+ for (const file of files) {
106
+ try {
107
+ const content = await promises.readFile(path.join(pendingDir, file), "utf-8");
108
+ this.announce(JSON.parse(content));
109
+ } catch {
110
+ }
111
+ }
85
112
  }
86
113
  /**
87
114
  * Create a new job
@@ -91,16 +118,7 @@ var FsJobQueue = class {
91
118
  await promises.writeFile(jobPath, JSON.stringify(job, null, 2), "utf-8");
92
119
  this.logger.info("Job created", { jobId: job.metadata.id, status: job.status });
93
120
  if (job.status === "pending") {
94
- this.pendingQueue.push(job);
95
- this.pendingQueue.sort((a, b) => a.metadata.id.localeCompare(b.metadata.id));
96
- }
97
- if (this.eventBus && "params" in job && "resourceId" in job.params) {
98
- this.eventBus.get("job:queued").next({
99
- jobId: job.metadata.id,
100
- jobType: job.metadata.type,
101
- resourceId: job.params.resourceId,
102
- userId: job.metadata.userId
103
- });
121
+ this.announce(job);
104
122
  }
105
123
  }
106
124
  /**
@@ -129,34 +147,92 @@ var FsJobQueue = class {
129
147
  await promises.unlink(oldPath);
130
148
  } catch (error) {
131
149
  }
132
- if (oldStatus === "pending") {
133
- const idx = this.pendingQueue.findIndex((j) => j.metadata.id === job.metadata.id);
134
- if (idx !== -1) this.pendingQueue.splice(idx, 1);
135
- }
136
- if (job.status === "pending") {
137
- this.pendingQueue.push(job);
138
- this.pendingQueue.sort((a, b) => a.metadata.id.localeCompare(b.metadata.id));
139
- }
140
150
  }
141
151
  const newPath = this.getJobPath(job.metadata.id, job.status);
142
152
  await promises.writeFile(newPath, JSON.stringify(job, null, 2), "utf-8");
143
153
  if (oldStatus && oldStatus !== job.status) {
144
154
  this.logger.info("Job moved", { jobId: job.metadata.id, oldStatus, newStatus: job.status });
155
+ if (job.status === "pending") {
156
+ this.announce(job);
157
+ }
145
158
  } else {
146
159
  this.logger.info("Job updated", { jobId: job.metadata.id, status: job.status });
147
160
  }
148
161
  }
149
162
  /**
150
- * Poll for next pending job (FIFO) from in-memory queue.
151
- * If a predicate is provided, returns the first matching job (skipping non-matching ones).
163
+ * Move a running job to `complete`. Returns false (and changes
164
+ * nothing) if the job is missing or not running which also makes
165
+ * duplicate `job:complete` events harmless.
152
166
  */
153
- async pollNextPendingJob(predicate) {
154
- if (!predicate) {
155
- return this.pendingQueue.shift() ?? null;
167
+ async completeJob(jobId, result) {
168
+ const job = await this.getJob(jobId);
169
+ if (!job || job.status !== "running") {
170
+ return false;
156
171
  }
157
- const index = this.pendingQueue.findIndex(predicate);
158
- if (index === -1) return null;
159
- return this.pendingQueue.splice(index, 1)[0] ?? null;
172
+ this.lastProgressWrite.delete(jobId);
173
+ const completed = {
174
+ status: "complete",
175
+ metadata: job.metadata,
176
+ params: job.params,
177
+ startedAt: job.startedAt,
178
+ completedAt: (/* @__PURE__ */ new Date()).toISOString(),
179
+ result
180
+ };
181
+ await this.updateJob(completed, "running");
182
+ return true;
183
+ }
184
+ /**
185
+ * Retry-or-fail a running job. While `retryCount < maxRetries` the
186
+ * job goes back to `pending` with the count bumped (and is
187
+ * re-announced); after that it lands in `failed` with the error.
188
+ * Returns null (and changes nothing) if the job isn't running.
189
+ */
190
+ async failJob(jobId, error) {
191
+ const job = await this.getJob(jobId);
192
+ if (!job || job.status !== "running") {
193
+ return null;
194
+ }
195
+ this.lastProgressWrite.delete(jobId);
196
+ if (job.metadata.retryCount < job.metadata.maxRetries) {
197
+ const retried = {
198
+ status: "pending",
199
+ metadata: { ...job.metadata, retryCount: job.metadata.retryCount + 1 },
200
+ params: job.params
201
+ };
202
+ await this.updateJob(retried, "running");
203
+ return "retried";
204
+ }
205
+ const failed = {
206
+ status: "failed",
207
+ metadata: job.metadata,
208
+ params: job.params,
209
+ startedAt: job.startedAt,
210
+ completedAt: (/* @__PURE__ */ new Date()).toISOString(),
211
+ error
212
+ };
213
+ await this.updateJob(failed, "running");
214
+ return "failed";
215
+ }
216
+ /**
217
+ * Write progress into a running job's file. Throttled per job, and
218
+ * a no-op for jobs that aren't running. Beyond surfacing live
219
+ * progress to `job:status-requested`, each write refreshes the
220
+ * file's mtime — the heartbeat `recoverStaleRunningJobs` watches.
221
+ */
222
+ async recordProgress(jobId, progress) {
223
+ const now = Date.now();
224
+ const lastWrite = this.lastProgressWrite.get(jobId) ?? 0;
225
+ if (now - lastWrite < PROGRESS_WRITE_MIN_INTERVAL_MS) {
226
+ return;
227
+ }
228
+ this.lastProgressWrite.set(jobId, now);
229
+ const job = await this.getJob(jobId);
230
+ if (!job || job.status !== "running") {
231
+ this.lastProgressWrite.delete(jobId);
232
+ return;
233
+ }
234
+ const updated = { ...job, progress };
235
+ await promises.writeFile(this.getJobPath(jobId, "running"), JSON.stringify(updated, null, 2), "utf-8");
160
236
  }
161
237
  /**
162
238
  * List jobs with filters
@@ -207,6 +283,63 @@ var FsJobQueue = class {
207
283
  await this.updateJob(cancelledJob, oldStatus);
208
284
  return true;
209
285
  }
286
+ /**
287
+ * Cancel all pending jobs in a category — the granularity of the
288
+ * `job:cancel-requested` UI signal. Running jobs are left to finish:
289
+ * interrupting a worker mid-inference would need a worker-side kill
290
+ * channel that doesn't exist.
291
+ */
292
+ async cancelPendingJobs(category) {
293
+ const matches = category === "generation" ? (type) => type === "generation" : (type) => type.endsWith("-annotation");
294
+ const pending = await this.listJobs({ status: "pending", limit: Number.MAX_SAFE_INTEGER });
295
+ let cancelled = 0;
296
+ for (const job of pending) {
297
+ if (!matches(job.metadata.type)) continue;
298
+ if (await this.cancelJob(job.metadata.id)) {
299
+ cancelled++;
300
+ }
301
+ }
302
+ if (cancelled > 0) {
303
+ this.logger.info("Cancelled pending jobs", { category, cancelled });
304
+ }
305
+ return cancelled;
306
+ }
307
+ /**
308
+ * Recover running jobs orphaned by a dead worker: any `running/`
309
+ * file whose mtime is older than the stale window is fed through
310
+ * the same retry-or-fail path as `job:fail`. Progress writes
311
+ * refresh the mtime, so a live worker is never recovered out from
312
+ * under itself as long as it reports within the window.
313
+ */
314
+ async recoverStaleRunningJobs() {
315
+ const runningDir = path.join(this.jobsDir, "running");
316
+ let files;
317
+ try {
318
+ files = await promises.readdir(runningDir);
319
+ } catch {
320
+ return 0;
321
+ }
322
+ const now = Date.now();
323
+ let recovered = 0;
324
+ for (const file of files) {
325
+ if (!file.endsWith(".json")) continue;
326
+ try {
327
+ const stat = await promises.stat(path.join(runningDir, file));
328
+ if (now - stat.mtimeMs < STALE_RUNNING_MS) continue;
329
+ const staleId = jobId(file.slice(0, -".json".length));
330
+ const outcome = await this.failJob(
331
+ staleId,
332
+ `worker presumed dead \u2014 no progress within ${STALE_RUNNING_MS / 6e4} minutes`
333
+ );
334
+ if (outcome) {
335
+ this.logger.warn("Recovered stale running job", { jobId: staleId, outcome });
336
+ recovered++;
337
+ }
338
+ } catch {
339
+ }
340
+ }
341
+ return recovered;
342
+ }
210
343
  /**
211
344
  * Clean up old completed/failed jobs (older than retention period)
212
345
  */
@@ -805,21 +938,6 @@ function logAnchorMethod(motivation, exact, anchorMethod) {
805
938
 
806
939
  // src/workers/annotation-detection.ts
807
940
  var AnnotationDetection = class {
808
- /**
809
- * Fetch content from a ContentFetcher and read the stream to a string.
810
- * Shared helper for all workers.
811
- */
812
- static async fetchContent(contentFetcher, resourceId) {
813
- const stream = await contentFetcher(resourceId);
814
- if (!stream) {
815
- throw new Error(`Could not load content for resource ${resourceId}`);
816
- }
817
- const chunks = [];
818
- for await (const chunk of stream) {
819
- chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
820
- }
821
- return Buffer.concat(chunks).toString("utf-8");
822
- }
823
941
  /**
824
942
  * Detect comments in content.
825
943
  *
@@ -1065,10 +1183,19 @@ Knowledge graph context:
1065
1183
  ${parts.join("\n")}`;
1066
1184
  }
1067
1185
  }
1186
+ let semanticContextSection = "";
1187
+ const similar = context?.semanticContext?.similar ?? [];
1188
+ if (similar.length > 0) {
1189
+ const lines = [...similar].sort((a, b) => b.score - a.score).slice(0, 3).map((m) => `- (${m.score.toFixed(2)}) ${m.text.slice(0, 240)}`);
1190
+ semanticContextSection = `
1191
+
1192
+ Related passages from the knowledge base:
1193
+ ${lines.join("\n")}`;
1194
+ }
1068
1195
  const structureGuidance = finalMaxTokens >= 1e3 ? "organized into titled sections (## Section) with well-structured paragraphs" : "organized into well-structured paragraphs";
1069
1196
  const prompt = `Generate a concise, informative resource about "${topic}".
1070
1197
  ${entityTypes.length > 0 ? `Focus on these entity types: ${entityTypes.join(", ")}.` : ""}
1071
- ${userPrompt ? `Additional context: ${userPrompt}` : ""}${annotationSection}${contextSection}${graphContextSection}${sourceLanguageInstruction}${languageInstruction}
1198
+ ${userPrompt ? `Additional context: ${userPrompt}` : ""}${annotationSection}${contextSection}${graphContextSection}${semanticContextSection}${sourceLanguageInstruction}${languageInstruction}
1072
1199
 
1073
1200
  Requirements:
1074
1201
  - Start with a clear heading (# Title)