@semiont/jobs 0.5.4 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +644 -20
- package/dist/worker-main.d.ts +2 -22
- package/dist/worker-main.js +1197 -1197
- package/dist/worker-main.js.map +1 -1
- package/package.json +4 -2
- package/dist/fs-job-queue.d.ts +0 -79
- package/dist/fs-job-queue.d.ts.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/job-claim-adapter.d.ts +0 -76
- package/dist/job-claim-adapter.d.ts.map +0 -1
- package/dist/job-queue-interface.d.ts +0 -19
- package/dist/job-queue-interface.d.ts.map +0 -1
- package/dist/job-queue-state-unit.d.ts +0 -26
- package/dist/job-queue-state-unit.d.ts.map +0 -1
- package/dist/job-worker.d.ts +0 -67
- package/dist/job-worker.d.ts.map +0 -1
- package/dist/processors.d.ts +0 -41
- package/dist/processors.d.ts.map +0 -1
- package/dist/types.d.ts +0 -319
- package/dist/types.d.ts.map +0 -1
- package/dist/worker-main.d.ts.map +0 -1
- package/dist/worker-process.d.ts +0 -47
- package/dist/worker-process.d.ts.map +0 -1
- package/dist/workers/annotation-detection.d.ts +0 -61
- package/dist/workers/annotation-detection.d.ts.map +0 -1
- package/dist/workers/detection/entity-extractor.d.ts +0 -42
- package/dist/workers/detection/entity-extractor.d.ts.map +0 -1
- package/dist/workers/detection/motivation-parsers.d.ts +0 -116
- package/dist/workers/detection/motivation-parsers.d.ts.map +0 -1
- package/dist/workers/detection/motivation-prompts.d.ts +0 -57
- package/dist/workers/detection/motivation-prompts.d.ts.map +0 -1
- package/dist/workers/generation/resource-generation.d.ts +0 -23
- package/dist/workers/generation/resource-generation.d.ts.map +0 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,21 +1,645 @@
|
|
|
1
|
+
import { Readable } from 'stream';
|
|
2
|
+
import { ResourceId, JobId, UserId, EntityType, AnnotationId, Annotation, GatheredContext, TagSchema, Logger, EventBus, components } from '@semiont/core';
|
|
3
|
+
import { SemiontProject } from '@semiont/core/node';
|
|
4
|
+
import { InferenceClient } from '@semiont/inference';
|
|
5
|
+
|
|
1
6
|
/**
|
|
2
|
-
*
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
* -
|
|
9
|
-
* -
|
|
10
|
-
* -
|
|
11
|
-
* -
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
7
|
+
* Job Queue Type Definitions - Discriminated Union Design
|
|
8
|
+
*
|
|
9
|
+
* Jobs represent async work that can be queued, processed, and monitored.
|
|
10
|
+
* Uses TypeScript discriminated unions to enforce valid state transitions.
|
|
11
|
+
*
|
|
12
|
+
* Design principles:
|
|
13
|
+
* - Each job status has specific valid fields
|
|
14
|
+
* - Type narrowing works automatically via status discriminant
|
|
15
|
+
* - No optional fields that may or may not exist
|
|
16
|
+
* - State machine is explicit and type-safe
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Content fetcher - turns a ResourceId into a readable stream.
|
|
21
|
+
* Workers use this to access resource content on demand.
|
|
22
|
+
* The implementation is provided by the backend at startup.
|
|
23
|
+
*/
|
|
24
|
+
type ContentFetcher = (resourceId: ResourceId) => Promise<Readable | null>;
|
|
25
|
+
type JobType = 'reference-annotation' | 'generation' | 'highlight-annotation' | 'assessment-annotation' | 'comment-annotation' | 'tag-annotation';
|
|
26
|
+
type JobStatus = 'pending' | 'running' | 'complete' | 'failed' | 'cancelled';
|
|
27
|
+
/**
|
|
28
|
+
* Job metadata - common to all states
|
|
29
|
+
*/
|
|
30
|
+
interface JobMetadata {
|
|
31
|
+
id: JobId;
|
|
32
|
+
type: JobType;
|
|
33
|
+
userId: UserId;
|
|
34
|
+
userName: string;
|
|
35
|
+
userEmail: string;
|
|
36
|
+
userDomain: string;
|
|
37
|
+
created: string;
|
|
38
|
+
retryCount: number;
|
|
39
|
+
maxRetries: number;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Locale conventions for detection/generation params.
|
|
43
|
+
*
|
|
44
|
+
* Two independent locales flow through these jobs:
|
|
45
|
+
*
|
|
46
|
+
* - `language` — *annotation body* locale. The BCP-47 tag the LLM should
|
|
47
|
+
* write generated body text in (comment text, assessment text, generated
|
|
48
|
+
* resource content, tag category label). Sourced from the user's UI
|
|
49
|
+
* locale. Stamped onto the W3C `TextualBody.language` field.
|
|
50
|
+
*
|
|
51
|
+
* - `sourceLanguage` — *source resource* locale. The BCP-47 tag of the
|
|
52
|
+
* content being analyzed. Sourced from `ResourceDescriptor` (carried as
|
|
53
|
+
* `Representation.language` on the primary representation). Used in
|
|
54
|
+
* prompts so the LLM analyzes non-English source correctly even when
|
|
55
|
+
* the user's UI locale differs.
|
|
56
|
+
*
|
|
57
|
+
* Examples: a German user analyzing an English document → `language='de'`,
|
|
58
|
+
* `sourceLanguage='en'`. An English user detecting entities in a French
|
|
59
|
+
* document → `language='en'` (unused for entity references), `sourceLanguage='fr'`.
|
|
60
|
+
*/
|
|
61
|
+
/**
|
|
62
|
+
* Detection job parameters
|
|
63
|
+
*/
|
|
64
|
+
interface DetectionParams {
|
|
65
|
+
resourceId: ResourceId;
|
|
66
|
+
entityTypes: EntityType[];
|
|
67
|
+
includeDescriptiveReferences?: boolean;
|
|
68
|
+
/** Annotation body locale — see locale conventions above. */
|
|
69
|
+
language?: string;
|
|
70
|
+
/** Source-resource locale — see locale conventions above. */
|
|
71
|
+
sourceLanguage?: string;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Generation job parameters
|
|
75
|
+
*/
|
|
76
|
+
interface GenerationParams {
|
|
77
|
+
referenceId: AnnotationId;
|
|
78
|
+
sourceResourceId: ResourceId;
|
|
79
|
+
sourceResourceName: string;
|
|
80
|
+
annotation: Annotation;
|
|
81
|
+
prompt?: string;
|
|
82
|
+
title?: string;
|
|
83
|
+
entityTypes?: EntityType[];
|
|
84
|
+
/** Annotation body locale — language the *generated resource* is written in. */
|
|
85
|
+
language?: string;
|
|
86
|
+
/**
|
|
87
|
+
* Source-resource locale — language of the resource being referenced.
|
|
88
|
+
* Used in the prompt so the LLM understands the embedded source-context
|
|
89
|
+
* snippet correctly when source ≠ target language.
|
|
90
|
+
*/
|
|
91
|
+
sourceLanguage?: string;
|
|
92
|
+
context?: GatheredContext;
|
|
93
|
+
temperature?: number;
|
|
94
|
+
maxTokens?: number;
|
|
95
|
+
storageUri?: string;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Highlight detection job parameters
|
|
99
|
+
*/
|
|
100
|
+
interface HighlightDetectionParams {
|
|
101
|
+
resourceId: ResourceId;
|
|
102
|
+
instructions?: string;
|
|
103
|
+
density?: number;
|
|
104
|
+
/** Source-resource locale — see locale conventions above. */
|
|
105
|
+
sourceLanguage?: string;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Assessment detection job parameters
|
|
109
|
+
*/
|
|
110
|
+
interface AssessmentDetectionParams {
|
|
111
|
+
resourceId: ResourceId;
|
|
112
|
+
instructions?: string;
|
|
113
|
+
tone?: 'analytical' | 'critical' | 'balanced' | 'constructive';
|
|
114
|
+
density?: number;
|
|
115
|
+
/** Annotation body locale — see locale conventions above. */
|
|
116
|
+
language?: string;
|
|
117
|
+
/** Source-resource locale — see locale conventions above. */
|
|
118
|
+
sourceLanguage?: string;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Comment detection job parameters
|
|
122
|
+
*/
|
|
123
|
+
interface CommentDetectionParams {
|
|
124
|
+
resourceId: ResourceId;
|
|
125
|
+
instructions?: string;
|
|
126
|
+
tone?: 'scholarly' | 'explanatory' | 'conversational' | 'technical';
|
|
127
|
+
density?: number;
|
|
128
|
+
/** Annotation body locale — see locale conventions above. */
|
|
129
|
+
language?: string;
|
|
130
|
+
/** Source-resource locale — see locale conventions above. */
|
|
131
|
+
sourceLanguage?: string;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Tag detection job parameters.
|
|
135
|
+
*
|
|
136
|
+
* Carries the *full* `TagSchema` (not just an id). The dispatcher resolves
|
|
137
|
+
* the caller-supplied `schemaId` against the per-KB tag-schema projection
|
|
138
|
+
* at job-creation time and embeds the resolved schema here, keeping the
|
|
139
|
+
* worker independent of the registry.
|
|
140
|
+
*/
|
|
141
|
+
interface TagDetectionParams {
|
|
142
|
+
resourceId: ResourceId;
|
|
143
|
+
schema: TagSchema;
|
|
144
|
+
categories: string[];
|
|
145
|
+
/** Annotation body locale — see locale conventions above. */
|
|
146
|
+
language?: string;
|
|
147
|
+
/** Source-resource locale — see locale conventions above. */
|
|
148
|
+
sourceLanguage?: string;
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Detection job progress
|
|
152
|
+
*/
|
|
153
|
+
interface DetectionProgress {
|
|
154
|
+
totalEntityTypes: number;
|
|
155
|
+
processedEntityTypes: number;
|
|
156
|
+
currentEntityType?: string;
|
|
157
|
+
entitiesFound: number;
|
|
158
|
+
entitiesEmitted: number;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Detection job result
|
|
162
|
+
*/
|
|
163
|
+
interface DetectionResult {
|
|
164
|
+
totalFound: number;
|
|
165
|
+
totalEmitted: number;
|
|
166
|
+
errors: number;
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Generation job progress
|
|
170
|
+
*/
|
|
171
|
+
interface YieldProgress {
|
|
172
|
+
stage: 'fetching' | 'generating' | 'creating' | 'linking';
|
|
173
|
+
percentage: number;
|
|
174
|
+
message?: string;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Generation job result
|
|
178
|
+
*/
|
|
179
|
+
interface GenerationResult {
|
|
180
|
+
resourceId: ResourceId;
|
|
181
|
+
resourceName: string;
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Highlight detection job progress
|
|
185
|
+
*/
|
|
186
|
+
interface HighlightDetectionProgress {
|
|
187
|
+
stage: 'analyzing' | 'creating';
|
|
188
|
+
percentage: number;
|
|
189
|
+
message?: string;
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Highlight detection job result
|
|
193
|
+
*/
|
|
194
|
+
interface HighlightDetectionResult {
|
|
195
|
+
highlightsFound: number;
|
|
196
|
+
highlightsCreated: number;
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Assessment detection job progress
|
|
200
|
+
*/
|
|
201
|
+
interface AssessmentDetectionProgress {
|
|
202
|
+
stage: 'analyzing' | 'creating';
|
|
203
|
+
percentage: number;
|
|
204
|
+
message?: string;
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Assessment detection job result
|
|
208
|
+
*/
|
|
209
|
+
interface AssessmentDetectionResult {
|
|
210
|
+
assessmentsFound: number;
|
|
211
|
+
assessmentsCreated: number;
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Comment detection job progress
|
|
215
|
+
*/
|
|
216
|
+
interface CommentDetectionProgress {
|
|
217
|
+
stage: 'analyzing' | 'creating';
|
|
218
|
+
percentage: number;
|
|
219
|
+
message?: string;
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Comment detection job result
|
|
223
|
+
*/
|
|
224
|
+
interface CommentDetectionResult {
|
|
225
|
+
commentsFound: number;
|
|
226
|
+
commentsCreated: number;
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Tag detection job progress
|
|
230
|
+
*/
|
|
231
|
+
interface TagDetectionProgress {
|
|
232
|
+
stage: 'analyzing' | 'creating';
|
|
233
|
+
percentage: number;
|
|
234
|
+
currentCategory?: string;
|
|
235
|
+
processedCategories: number;
|
|
236
|
+
totalCategories: number;
|
|
237
|
+
message?: string;
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Tag detection job result
|
|
241
|
+
*/
|
|
242
|
+
interface TagDetectionResult {
|
|
243
|
+
tagsFound: number;
|
|
244
|
+
tagsCreated: number;
|
|
245
|
+
byCategory: Record<string, number>;
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Pending job - just created, waiting to be picked up
|
|
249
|
+
*/
|
|
250
|
+
interface PendingJob<P> {
|
|
251
|
+
status: 'pending';
|
|
252
|
+
metadata: JobMetadata;
|
|
253
|
+
params: P;
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Running job - actively being processed
|
|
257
|
+
*/
|
|
258
|
+
interface RunningJob<P, PG> {
|
|
259
|
+
status: 'running';
|
|
260
|
+
metadata: JobMetadata;
|
|
261
|
+
params: P;
|
|
262
|
+
startedAt: string;
|
|
263
|
+
progress: PG;
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Complete job - successfully finished
|
|
267
|
+
*/
|
|
268
|
+
interface CompleteJob<P, R> {
|
|
269
|
+
status: 'complete';
|
|
270
|
+
metadata: JobMetadata;
|
|
271
|
+
params: P;
|
|
272
|
+
startedAt: string;
|
|
273
|
+
completedAt: string;
|
|
274
|
+
result: R;
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Failed job - encountered an error
|
|
278
|
+
*/
|
|
279
|
+
interface FailedJob<P> {
|
|
280
|
+
status: 'failed';
|
|
281
|
+
metadata: JobMetadata;
|
|
282
|
+
params: P;
|
|
283
|
+
startedAt?: string;
|
|
284
|
+
completedAt: string;
|
|
285
|
+
error: string;
|
|
286
|
+
}
|
|
287
|
+
/**
|
|
288
|
+
* Cancelled job - stopped by user
|
|
289
|
+
*/
|
|
290
|
+
interface CancelledJob<P> {
|
|
291
|
+
status: 'cancelled';
|
|
292
|
+
metadata: JobMetadata;
|
|
293
|
+
params: P;
|
|
294
|
+
startedAt?: string;
|
|
295
|
+
completedAt: string;
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* Generic job - discriminated union of all states
|
|
299
|
+
*/
|
|
300
|
+
type Job<P, PG, R> = PendingJob<P> | RunningJob<P, PG> | CompleteJob<P, R> | FailedJob<P> | CancelledJob<P>;
|
|
301
|
+
type DetectionJob = Job<DetectionParams, DetectionProgress, DetectionResult>;
|
|
302
|
+
type GenerationJob = Job<GenerationParams, YieldProgress, GenerationResult>;
|
|
303
|
+
type HighlightDetectionJob = Job<HighlightDetectionParams, HighlightDetectionProgress, HighlightDetectionResult>;
|
|
304
|
+
type AssessmentDetectionJob = Job<AssessmentDetectionParams, AssessmentDetectionProgress, AssessmentDetectionResult>;
|
|
305
|
+
type CommentDetectionJob = Job<CommentDetectionParams, CommentDetectionProgress, CommentDetectionResult>;
|
|
306
|
+
type TagDetectionJob = Job<TagDetectionParams, TagDetectionProgress, TagDetectionResult>;
|
|
307
|
+
/**
|
|
308
|
+
* Discriminated union of all job types
|
|
309
|
+
*/
|
|
310
|
+
type AnyJob = DetectionJob | GenerationJob | HighlightDetectionJob | AssessmentDetectionJob | CommentDetectionJob | TagDetectionJob;
|
|
311
|
+
declare function isPendingJob(job: AnyJob): job is PendingJob<any>;
|
|
312
|
+
declare function isRunningJob(job: AnyJob): job is RunningJob<any, any>;
|
|
313
|
+
declare function isCompleteJob(job: AnyJob): job is CompleteJob<any, any>;
|
|
314
|
+
declare function isFailedJob(job: AnyJob): job is FailedJob<any>;
|
|
315
|
+
declare function isCancelledJob(job: AnyJob): job is CancelledJob<any>;
|
|
316
|
+
interface JobQueryFilters {
|
|
317
|
+
status?: JobStatus;
|
|
318
|
+
type?: JobType;
|
|
319
|
+
userId?: UserId;
|
|
320
|
+
limit?: number;
|
|
321
|
+
offset?: number;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
interface JobQueue {
|
|
325
|
+
initialize(): Promise<void>;
|
|
326
|
+
destroy(): void;
|
|
327
|
+
createJob(job: AnyJob): Promise<void>;
|
|
328
|
+
getJob(jobId: JobId): Promise<AnyJob | null>;
|
|
329
|
+
updateJob(job: AnyJob, oldStatus?: JobStatus): Promise<void>;
|
|
330
|
+
pollNextPendingJob(predicate?: (job: AnyJob) => boolean): Promise<AnyJob | null>;
|
|
331
|
+
cancelJob(jobId: JobId): Promise<boolean>;
|
|
332
|
+
getStats(): Promise<{
|
|
333
|
+
pending: number;
|
|
334
|
+
running: number;
|
|
335
|
+
complete: number;
|
|
336
|
+
failed: number;
|
|
337
|
+
cancelled: number;
|
|
338
|
+
}>;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Job Queue Manager
|
|
343
|
+
*
|
|
344
|
+
* Filesystem-based job queue with atomic operations.
|
|
345
|
+
* Jobs are stored in directories by status for easy polling.
|
|
346
|
+
*/
|
|
347
|
+
|
|
348
|
+
declare class FsJobQueue implements JobQueue {
|
|
349
|
+
private eventBus?;
|
|
350
|
+
private jobsDir;
|
|
351
|
+
private logger;
|
|
352
|
+
private pendingQueue;
|
|
353
|
+
private watcher;
|
|
354
|
+
private loadDebounceTimer;
|
|
355
|
+
constructor(project: SemiontProject, logger: Logger, eventBus?: EventBus | undefined);
|
|
356
|
+
/**
|
|
357
|
+
* Initialize job queue directories, load pending jobs, and start fs.watch
|
|
358
|
+
*/
|
|
359
|
+
initialize(): Promise<void>;
|
|
360
|
+
/**
|
|
361
|
+
* Clean up watcher
|
|
362
|
+
*/
|
|
363
|
+
destroy(): void;
|
|
364
|
+
/**
|
|
365
|
+
* Load pending jobs from disk into in-memory queue
|
|
366
|
+
*/
|
|
367
|
+
private loadPendingJobs;
|
|
368
|
+
/**
|
|
369
|
+
* Debounced version of loadPendingJobs — fs.watch can fire rapidly
|
|
370
|
+
*/
|
|
371
|
+
private debouncedLoadPendingJobs;
|
|
372
|
+
/**
|
|
373
|
+
* Create a new job
|
|
374
|
+
*/
|
|
375
|
+
createJob(job: AnyJob): Promise<void>;
|
|
376
|
+
/**
|
|
377
|
+
* Get a job by ID (searches all status directories)
|
|
378
|
+
*/
|
|
379
|
+
getJob(jobId: JobId): Promise<AnyJob | null>;
|
|
380
|
+
/**
|
|
381
|
+
* Update a job (atomic: delete old, write new)
|
|
382
|
+
*/
|
|
383
|
+
updateJob(job: AnyJob, oldStatus?: JobStatus): Promise<void>;
|
|
384
|
+
/**
|
|
385
|
+
* Poll for next pending job (FIFO) from in-memory queue.
|
|
386
|
+
* If a predicate is provided, returns the first matching job (skipping non-matching ones).
|
|
387
|
+
*/
|
|
388
|
+
pollNextPendingJob(predicate?: (job: AnyJob) => boolean): Promise<AnyJob | null>;
|
|
389
|
+
/**
|
|
390
|
+
* List jobs with filters
|
|
391
|
+
*/
|
|
392
|
+
listJobs(filters?: JobQueryFilters): Promise<AnyJob[]>;
|
|
393
|
+
/**
|
|
394
|
+
* Cancel a job
|
|
395
|
+
*/
|
|
396
|
+
cancelJob(jobId: JobId): Promise<boolean>;
|
|
397
|
+
/**
|
|
398
|
+
* Clean up old completed/failed jobs (older than retention period)
|
|
399
|
+
*/
|
|
400
|
+
cleanupOldJobs(retentionHours?: number): Promise<number>;
|
|
401
|
+
/**
|
|
402
|
+
* Get job file path
|
|
403
|
+
*/
|
|
404
|
+
private getJobPath;
|
|
405
|
+
/**
|
|
406
|
+
* Get statistics about the queue
|
|
407
|
+
*/
|
|
408
|
+
getStats(): Promise<{
|
|
409
|
+
pending: number;
|
|
410
|
+
running: number;
|
|
411
|
+
complete: number;
|
|
412
|
+
failed: number;
|
|
413
|
+
cancelled: number;
|
|
414
|
+
}>;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Job Worker Base Class
|
|
419
|
+
*
|
|
420
|
+
* Abstract worker that polls the job queue and processes jobs.
|
|
421
|
+
* Subclasses implement specific job processing logic.
|
|
422
|
+
*/
|
|
423
|
+
|
|
424
|
+
declare abstract class JobWorker {
|
|
425
|
+
private running;
|
|
426
|
+
private currentJob;
|
|
427
|
+
private pollIntervalMs;
|
|
428
|
+
private errorBackoffMs;
|
|
429
|
+
protected jobQueue: JobQueue;
|
|
430
|
+
protected logger: Logger;
|
|
431
|
+
constructor(jobQueue: JobQueue, pollIntervalMs: number | undefined, errorBackoffMs: number | undefined, logger: Logger);
|
|
432
|
+
/**
|
|
433
|
+
* Start the worker (polls queue in loop)
|
|
434
|
+
*/
|
|
435
|
+
start(): Promise<void>;
|
|
436
|
+
/**
|
|
437
|
+
* Stop the worker (graceful shutdown)
|
|
438
|
+
*/
|
|
439
|
+
stop(): Promise<void>;
|
|
440
|
+
/**
|
|
441
|
+
* Poll for next job to process
|
|
442
|
+
*/
|
|
443
|
+
private pollNextJob;
|
|
444
|
+
/**
|
|
445
|
+
* Process a job (handles state transitions and error handling)
|
|
446
|
+
*/
|
|
447
|
+
private processJob;
|
|
448
|
+
/**
|
|
449
|
+
* Handle job failure (retry or move to failed)
|
|
450
|
+
*/
|
|
451
|
+
protected handleJobFailure(job: AnyJob, error: any): Promise<void>;
|
|
452
|
+
/**
|
|
453
|
+
* Update job progress (best-effort, doesn't throw)
|
|
454
|
+
*/
|
|
455
|
+
protected updateJobProgress(job: AnyJob): Promise<void>;
|
|
456
|
+
/**
|
|
457
|
+
* Sleep utility
|
|
458
|
+
*/
|
|
459
|
+
protected sleep(ms: number): Promise<void>;
|
|
460
|
+
/**
|
|
461
|
+
* Emit completion event (optional hook for subclasses)
|
|
462
|
+
* Override this to emit job-specific completion events (e.g., job.completed)
|
|
463
|
+
*/
|
|
464
|
+
protected emitCompletionEvent(_job: RunningJob<any, any>, _result: any): Promise<void>;
|
|
465
|
+
/**
|
|
466
|
+
* Get worker name (for logging)
|
|
467
|
+
*/
|
|
468
|
+
protected abstract getWorkerName(): string;
|
|
469
|
+
/**
|
|
470
|
+
* Check if this worker can process the given job
|
|
471
|
+
*/
|
|
472
|
+
protected abstract canProcessJob(job: AnyJob): boolean;
|
|
473
|
+
/**
|
|
474
|
+
* Execute the job (job-specific logic)
|
|
475
|
+
* This is where the actual work happens
|
|
476
|
+
* Return the result object (or void for jobs without results)
|
|
477
|
+
* Throw an error to trigger retry logic
|
|
478
|
+
*/
|
|
479
|
+
protected abstract executeJob(job: AnyJob): Promise<any>;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* Job Processors — extracted from JobWorker subclasses
|
|
484
|
+
*
|
|
485
|
+
* Pure functions that take content + inference client + params,
|
|
486
|
+
* report progress via callback, and return annotations + results.
|
|
487
|
+
*
|
|
488
|
+
* No EventBus, no JobQueue, no side effects except calling inference.
|
|
489
|
+
* Two callers:
|
|
490
|
+
* 1. In-process JobWorker subclasses (existing path)
|
|
491
|
+
* 2. Remote WorkerStateUnit via worker-process.ts (new path)
|
|
492
|
+
*/
|
|
493
|
+
|
|
494
|
+
type Agent = components['schemas']['Agent'];
|
|
495
|
+
/**
|
|
496
|
+
* Progress callback. The three positional args satisfy the minimum
|
|
497
|
+
* `JobProgress` required fields (`percentage`, `message`, `stage`).
|
|
498
|
+
* The fourth optional arg carries job-type-specific fields
|
|
499
|
+
* (`currentEntityType`, `completedEntityTypes`, `requestParams`, etc.)
|
|
500
|
+
* that the progress UI renders.
|
|
501
|
+
*/
|
|
502
|
+
type OnProgress = (percentage: number, message: string, stage: string, extra?: Partial<JobProgress>) => void;
|
|
503
|
+
type JobProgress = components['schemas']['JobProgress'];
|
|
504
|
+
interface ProcessorResult<R> {
|
|
505
|
+
annotations: Record<string, unknown>[];
|
|
506
|
+
result: R;
|
|
507
|
+
}
|
|
508
|
+
declare function processHighlightJob(content: string, inferenceClient: InferenceClient, params: HighlightDetectionParams, userId: string, generator: Agent, onProgress: OnProgress): Promise<ProcessorResult<HighlightDetectionResult>>;
|
|
509
|
+
declare function processCommentJob(content: string, inferenceClient: InferenceClient, params: CommentDetectionParams, userId: string, generator: Agent, onProgress: OnProgress): Promise<ProcessorResult<CommentDetectionResult>>;
|
|
510
|
+
declare function processAssessmentJob(content: string, inferenceClient: InferenceClient, params: AssessmentDetectionParams, userId: string, generator: Agent, onProgress: OnProgress): Promise<ProcessorResult<AssessmentDetectionResult>>;
|
|
511
|
+
declare function processReferenceJob(content: string, inferenceClient: InferenceClient, params: DetectionParams, userId: string, generator: Agent, onProgress: OnProgress, logger: Logger): Promise<ProcessorResult<DetectionResult>>;
|
|
512
|
+
declare function processTagJob(content: string, inferenceClient: InferenceClient, params: TagDetectionParams, userId: string, generator: Agent, onProgress: OnProgress): Promise<ProcessorResult<TagDetectionResult>>;
|
|
513
|
+
declare function processGenerationJob(inferenceClient: InferenceClient, params: GenerationParams, onProgress: OnProgress, logger: Logger): Promise<{
|
|
514
|
+
content: string;
|
|
515
|
+
title: string;
|
|
516
|
+
format: string;
|
|
517
|
+
result: GenerationResult;
|
|
518
|
+
}>;
|
|
519
|
+
|
|
520
|
+
/**
|
|
521
|
+
* Represents a detected comment with validated position
|
|
522
|
+
*/
|
|
523
|
+
interface CommentMatch {
|
|
524
|
+
exact: string;
|
|
525
|
+
start: number;
|
|
526
|
+
end: number;
|
|
527
|
+
prefix?: string;
|
|
528
|
+
suffix?: string;
|
|
529
|
+
comment: string;
|
|
530
|
+
}
|
|
531
|
+
/**
|
|
532
|
+
* Represents a detected highlight with validated position
|
|
533
|
+
*/
|
|
534
|
+
interface HighlightMatch {
|
|
535
|
+
exact: string;
|
|
536
|
+
start: number;
|
|
537
|
+
end: number;
|
|
538
|
+
prefix?: string;
|
|
539
|
+
suffix?: string;
|
|
540
|
+
}
|
|
541
|
+
/**
|
|
542
|
+
* Represents a detected assessment with validated position
|
|
543
|
+
*/
|
|
544
|
+
interface AssessmentMatch {
|
|
545
|
+
exact: string;
|
|
546
|
+
start: number;
|
|
547
|
+
end: number;
|
|
548
|
+
prefix?: string;
|
|
549
|
+
suffix?: string;
|
|
550
|
+
assessment: string;
|
|
551
|
+
}
|
|
552
|
+
/**
|
|
553
|
+
* Represents a detected tag with validated position
|
|
554
|
+
*/
|
|
555
|
+
interface TagMatch {
|
|
556
|
+
exact: string;
|
|
557
|
+
start: number;
|
|
558
|
+
end: number;
|
|
559
|
+
prefix?: string;
|
|
560
|
+
suffix?: string;
|
|
561
|
+
category: string;
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
/**
|
|
565
|
+
* Annotation Detection
|
|
566
|
+
*
|
|
567
|
+
* Orchestrates the full annotation detection pipeline:
|
|
568
|
+
* 1. Build AI prompts using MotivationPrompts
|
|
569
|
+
* 2. Call AI inference
|
|
570
|
+
* 3. Parse and validate results using MotivationParsers
|
|
571
|
+
*
|
|
572
|
+
* All methods take content as a string parameter.
|
|
573
|
+
* Workers are responsible for fetching content via ContentFetcher.
|
|
574
|
+
*/
|
|
575
|
+
|
|
576
|
+
declare class AnnotationDetection {
|
|
577
|
+
/**
|
|
578
|
+
* Fetch content from a ContentFetcher and read the stream to a string.
|
|
579
|
+
* Shared helper for all workers.
|
|
580
|
+
*/
|
|
581
|
+
static fetchContent(contentFetcher: ContentFetcher, resourceId: ResourceId): Promise<string>;
|
|
582
|
+
/**
|
|
583
|
+
* Detect comments in content.
|
|
584
|
+
*
|
|
585
|
+
* `language` is the locale the LLM should write comment text in (annotation
|
|
586
|
+
* body locale). `sourceLanguage` is the locale of the content being analyzed
|
|
587
|
+
* (source-resource locale). See `types.ts` "Locale conventions" for the
|
|
588
|
+
* full discussion.
|
|
589
|
+
*/
|
|
590
|
+
static detectComments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number, language?: string, sourceLanguage?: string): Promise<CommentMatch[]>;
|
|
591
|
+
/**
|
|
592
|
+
* Detect highlights in content.
|
|
593
|
+
*
|
|
594
|
+
* Highlights have no body — only `sourceLanguage` (source-resource locale)
|
|
595
|
+
* applies, used in the prompt so the LLM analyzes non-English source
|
|
596
|
+
* correctly.
|
|
597
|
+
*/
|
|
598
|
+
static detectHighlights(content: string, client: InferenceClient, instructions?: string, density?: number, sourceLanguage?: string): Promise<HighlightMatch[]>;
|
|
599
|
+
/**
|
|
600
|
+
* Detect assessments in content.
|
|
601
|
+
*
|
|
602
|
+
* `language` is the locale the LLM should write assessment text in
|
|
603
|
+
* (annotation body locale). `sourceLanguage` is the locale of the content
|
|
604
|
+
* being analyzed (source-resource locale).
|
|
605
|
+
*/
|
|
606
|
+
static detectAssessments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number, language?: string, sourceLanguage?: string): Promise<AssessmentMatch[]>;
|
|
607
|
+
/**
|
|
608
|
+
* Detect tags in content for a specific category.
|
|
609
|
+
*
|
|
610
|
+
* The full `TagSchema` is supplied by the dispatcher (resolved against
|
|
611
|
+
* the per-KB tag-schema projection at job-creation time) so the worker
|
|
612
|
+
* is independent of the registry.
|
|
613
|
+
*
|
|
614
|
+
* `sourceLanguage` is the locale of the content being analyzed. Body-locale
|
|
615
|
+
* (`language`) doesn't influence the tag prompt — categories are schema
|
|
616
|
+
* identifiers, not LLM-generated text — so it's consumed at the body-stamp
|
|
617
|
+
* site, not here.
|
|
618
|
+
*/
|
|
619
|
+
static detectTags(content: string, client: InferenceClient, schema: TagSchema, category: string, sourceLanguage?: string): Promise<TagMatch[]>;
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
/**
|
|
623
|
+
* Resource Generation
|
|
624
|
+
*
|
|
625
|
+
* Generates markdown resources from topics using AI inference.
|
|
626
|
+
*/
|
|
627
|
+
|
|
628
|
+
/**
|
|
629
|
+
* Generate resource content using inference.
|
|
630
|
+
*
|
|
631
|
+
* Locale parameters: `locale` is the *body* locale — the language the
|
|
632
|
+
* generated resource should be written in (sourced from the user's UI
|
|
633
|
+
* locale). `sourceLanguage` is the *source* locale — the language of the
|
|
634
|
+
* referenced resource whose context (selected passage, surrounding text)
|
|
635
|
+
* is embedded into the prompt. They're independent: a German user can
|
|
636
|
+
* generate German content from an English source resource. See
|
|
637
|
+
* `types.ts` "Locale conventions" for the full discussion.
|
|
638
|
+
*/
|
|
639
|
+
declare function generateResourceFromTopic(topic: string, entityTypes: string[], client: InferenceClient, logger: Logger, userPrompt?: string, locale?: string, context?: GatheredContext, temperature?: number, maxTokens?: number, sourceLanguage?: string): Promise<{
|
|
640
|
+
title: string;
|
|
641
|
+
content: string;
|
|
642
|
+
}>;
|
|
643
|
+
|
|
644
|
+
export { AnnotationDetection, FsJobQueue, JobWorker, generateResourceFromTopic, isCancelledJob, isCompleteJob, isFailedJob, isPendingJob, isRunningJob, processAssessmentJob, processCommentJob, processGenerationJob, processHighlightJob, processReferenceJob, processTagJob };
|
|
645
|
+
export type { AnyJob, AssessmentDetectionJob, AssessmentDetectionParams, AssessmentDetectionProgress, AssessmentDetectionResult, CancelledJob, CommentDetectionJob, CommentDetectionParams, CommentDetectionProgress, CommentDetectionResult, CompleteJob, ContentFetcher, DetectionJob, DetectionParams, DetectionProgress, DetectionResult, FailedJob, GenerationJob, GenerationParams, GenerationResult, HighlightDetectionJob, HighlightDetectionParams, HighlightDetectionProgress, HighlightDetectionResult, JobMetadata, JobQueryFilters, JobQueue, JobStatus, JobType, OnProgress, PendingJob, ProcessorResult, RunningJob, TagDetectionJob, TagDetectionParams, TagDetectionProgress, TagDetectionResult, YieldProgress };
|
package/dist/worker-main.d.ts
CHANGED
|
@@ -1,22 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
*
|
|
4
|
-
* One worker host runs N parallel worker processes, one per distinct
|
|
5
|
-
* `(inferenceProvider, model)` configured in `~/.semiontconfig`. Each
|
|
6
|
-
* authenticates with the KS via `/api/tokens/agent` for *its* agent
|
|
7
|
-
* identity, and that JWT is what the bus stamps onto every event the
|
|
8
|
-
* process emits — so `_userId` on the bus and the `generator` on every
|
|
9
|
-
* annotation refer to the same software peer.
|
|
10
|
-
*
|
|
11
|
-
* Multiple job types may share an inference engine; in that case they
|
|
12
|
-
* share a worker process (and an agent identity). Different engines
|
|
13
|
-
* mean different processes and different agents.
|
|
14
|
-
*
|
|
15
|
-
* Environment variables (only two):
|
|
16
|
-
* SEMIONT_WORKER_SECRET — shared secret for /api/tokens/agent auth
|
|
17
|
-
* ANTHROPIC_API_KEY — only when using Anthropic inference
|
|
18
|
-
*
|
|
19
|
-
* Everything else comes from ~/.semiontconfig.
|
|
20
|
-
*/
|
|
21
|
-
export {};
|
|
22
|
-
//# sourceMappingURL=worker-main.d.ts.map
|
|
1
|
+
|
|
2
|
+
export { };
|