@semiont/jobs 0.5.5 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -42
- package/dist/index.d.ts +4 -70
- package/dist/index.js +224 -430
- package/dist/index.js.map +1 -1
- package/dist/worker-main.js +228 -288
- package/dist/worker-main.js.map +1 -1
- package/package.json +8 -4
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { promises, watch } from 'fs';
|
|
2
2
|
import * as path from 'path';
|
|
3
|
-
import {
|
|
3
|
+
import { reconcileSelector, getLocaleEnglishName, didToAgent } from '@semiont/core';
|
|
4
4
|
import { generateAnnotationId } from '@semiont/event-sourcing';
|
|
5
5
|
|
|
6
6
|
// src/fs-job-queue.ts
|
|
@@ -10,6 +10,7 @@ var FsJobQueue = class {
|
|
|
10
10
|
this.jobsDir = project.jobsDir;
|
|
11
11
|
this.logger = logger;
|
|
12
12
|
}
|
|
13
|
+
eventBus;
|
|
13
14
|
jobsDir;
|
|
14
15
|
logger;
|
|
15
16
|
// In-memory pending queue: avoids fs.readdir() on every poll (6×/sec with 6 workers)
|
|
@@ -269,156 +270,6 @@ var FsJobQueue = class {
|
|
|
269
270
|
}
|
|
270
271
|
};
|
|
271
272
|
|
|
272
|
-
// src/job-worker.ts
|
|
273
|
-
var JobWorker = class {
|
|
274
|
-
running = false;
|
|
275
|
-
currentJob = null;
|
|
276
|
-
pollIntervalMs;
|
|
277
|
-
errorBackoffMs;
|
|
278
|
-
jobQueue;
|
|
279
|
-
logger;
|
|
280
|
-
constructor(jobQueue, pollIntervalMs = 1e3, errorBackoffMs = 5e3, logger) {
|
|
281
|
-
this.jobQueue = jobQueue;
|
|
282
|
-
this.pollIntervalMs = pollIntervalMs;
|
|
283
|
-
this.errorBackoffMs = errorBackoffMs;
|
|
284
|
-
this.logger = logger;
|
|
285
|
-
}
|
|
286
|
-
/**
|
|
287
|
-
* Start the worker (polls queue in loop)
|
|
288
|
-
*/
|
|
289
|
-
async start() {
|
|
290
|
-
this.running = true;
|
|
291
|
-
this.logger.info("Worker started", { worker: this.getWorkerName() });
|
|
292
|
-
while (this.running) {
|
|
293
|
-
try {
|
|
294
|
-
const job = await this.pollNextJob();
|
|
295
|
-
if (job) {
|
|
296
|
-
await this.processJob(job);
|
|
297
|
-
} else {
|
|
298
|
-
await this.sleep(this.pollIntervalMs);
|
|
299
|
-
}
|
|
300
|
-
} catch (error) {
|
|
301
|
-
this.logger.error("Error in worker main loop", { worker: this.getWorkerName(), error: error instanceof Error ? error.message : String(error) });
|
|
302
|
-
await this.sleep(this.errorBackoffMs);
|
|
303
|
-
}
|
|
304
|
-
}
|
|
305
|
-
this.logger.info("Worker stopped", { worker: this.getWorkerName() });
|
|
306
|
-
}
|
|
307
|
-
/**
|
|
308
|
-
* Stop the worker (graceful shutdown)
|
|
309
|
-
*/
|
|
310
|
-
async stop() {
|
|
311
|
-
this.logger.info("Stopping worker", { worker: this.getWorkerName() });
|
|
312
|
-
this.running = false;
|
|
313
|
-
const timeout = 6e4;
|
|
314
|
-
const startTime = Date.now();
|
|
315
|
-
while (this.currentJob && Date.now() - startTime < timeout) {
|
|
316
|
-
await this.sleep(100);
|
|
317
|
-
}
|
|
318
|
-
if (this.currentJob) {
|
|
319
|
-
this.logger.warn("Forced worker shutdown", { worker: this.getWorkerName(), jobId: this.currentJob.metadata.id });
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
/**
|
|
323
|
-
* Poll for next job to process
|
|
324
|
-
*/
|
|
325
|
-
async pollNextJob() {
|
|
326
|
-
return this.jobQueue.pollNextPendingJob((job) => this.canProcessJob(job));
|
|
327
|
-
}
|
|
328
|
-
/**
|
|
329
|
-
* Process a job (handles state transitions and error handling)
|
|
330
|
-
*/
|
|
331
|
-
async processJob(job) {
|
|
332
|
-
this.currentJob = job;
|
|
333
|
-
try {
|
|
334
|
-
if (job.status !== "pending") {
|
|
335
|
-
this.logger.warn("Skipping non-pending job", { worker: this.getWorkerName(), jobId: job.metadata.id, status: job.status });
|
|
336
|
-
return;
|
|
337
|
-
}
|
|
338
|
-
const runningJob = {
|
|
339
|
-
status: "running",
|
|
340
|
-
metadata: job.metadata,
|
|
341
|
-
params: job.params,
|
|
342
|
-
startedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
343
|
-
progress: {}
|
|
344
|
-
// Initialize with empty progress
|
|
345
|
-
};
|
|
346
|
-
await this.jobQueue.updateJob(runningJob, "pending");
|
|
347
|
-
this.logger.info("Processing job", { worker: this.getWorkerName(), jobId: job.metadata.id, jobType: job.metadata.type });
|
|
348
|
-
const result = await this.executeJob(runningJob);
|
|
349
|
-
await this.emitCompletionEvent(runningJob, result);
|
|
350
|
-
const completeJob = {
|
|
351
|
-
status: "complete",
|
|
352
|
-
metadata: runningJob.metadata,
|
|
353
|
-
params: runningJob.params,
|
|
354
|
-
startedAt: runningJob.startedAt,
|
|
355
|
-
completedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
356
|
-
result: result ?? {}
|
|
357
|
-
// Use returned result or empty object
|
|
358
|
-
};
|
|
359
|
-
await this.jobQueue.updateJob(completeJob, "running");
|
|
360
|
-
this.logger.info("Job completed successfully", { worker: this.getWorkerName(), jobId: job.metadata.id });
|
|
361
|
-
} catch (error) {
|
|
362
|
-
await this.handleJobFailure(job, error);
|
|
363
|
-
} finally {
|
|
364
|
-
this.currentJob = null;
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
/**
|
|
368
|
-
* Handle job failure (retry or move to failed)
|
|
369
|
-
*/
|
|
370
|
-
async handleJobFailure(job, error) {
|
|
371
|
-
const updatedMetadata = {
|
|
372
|
-
...job.metadata,
|
|
373
|
-
retryCount: job.metadata.retryCount + 1
|
|
374
|
-
};
|
|
375
|
-
if (updatedMetadata.retryCount < updatedMetadata.maxRetries) {
|
|
376
|
-
this.logger.info("Job failed, will retry", { worker: this.getWorkerName(), jobId: job.metadata.id, retryCount: updatedMetadata.retryCount, maxRetries: updatedMetadata.maxRetries });
|
|
377
|
-
this.logger.debug("Job error details", { error: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : void 0 });
|
|
378
|
-
const retryJob = {
|
|
379
|
-
status: "pending",
|
|
380
|
-
metadata: updatedMetadata,
|
|
381
|
-
params: job.status === "pending" ? job.params : job.params
|
|
382
|
-
};
|
|
383
|
-
await this.jobQueue.updateJob(retryJob, job.status);
|
|
384
|
-
} else {
|
|
385
|
-
this.logger.error("Job failed permanently", { worker: this.getWorkerName(), jobId: job.metadata.id, retryCount: updatedMetadata.retryCount });
|
|
386
|
-
this.logger.error("Job error details", { error: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : void 0 });
|
|
387
|
-
const failedJob = {
|
|
388
|
-
status: "failed",
|
|
389
|
-
metadata: updatedMetadata,
|
|
390
|
-
params: job.status === "pending" ? job.params : job.params,
|
|
391
|
-
startedAt: job.status === "running" ? job.startedAt : void 0,
|
|
392
|
-
completedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
393
|
-
error: error instanceof Error ? error.message : String(error)
|
|
394
|
-
};
|
|
395
|
-
await this.jobQueue.updateJob(failedJob, job.status);
|
|
396
|
-
}
|
|
397
|
-
}
|
|
398
|
-
/**
|
|
399
|
-
* Update job progress (best-effort, doesn't throw)
|
|
400
|
-
*/
|
|
401
|
-
async updateJobProgress(job) {
|
|
402
|
-
try {
|
|
403
|
-
await this.jobQueue.updateJob(job);
|
|
404
|
-
} catch (error) {
|
|
405
|
-
this.logger.warn("Failed to update job progress", { worker: this.getWorkerName(), error: error instanceof Error ? error.message : String(error) });
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
/**
|
|
409
|
-
* Sleep utility
|
|
410
|
-
*/
|
|
411
|
-
sleep(ms) {
|
|
412
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
413
|
-
}
|
|
414
|
-
/**
|
|
415
|
-
* Emit completion event (optional hook for subclasses)
|
|
416
|
-
* Override this to emit job-specific completion events (e.g., job.completed)
|
|
417
|
-
*/
|
|
418
|
-
async emitCompletionEvent(_job, _result) {
|
|
419
|
-
}
|
|
420
|
-
};
|
|
421
|
-
|
|
422
273
|
// src/types.ts
|
|
423
274
|
function isPendingJob(job) {
|
|
424
275
|
return job.status === "pending";
|
|
@@ -480,17 +331,15 @@ ${content.substring(0, 8e3)}
|
|
|
480
331
|
|
|
481
332
|
Return a JSON array of comments. Each comment must have:
|
|
482
333
|
- "exact": the exact text passage being commented on (quoted verbatim from source)
|
|
483
|
-
- "
|
|
484
|
-
- "
|
|
485
|
-
- "prefix": up to 32 characters of text immediately before the passage
|
|
486
|
-
- "suffix": up to 32 characters of text immediately after the passage
|
|
334
|
+
- "prefix": up to 64 characters of text immediately before the passage
|
|
335
|
+
- "suffix": up to 64 characters of text immediately after the passage
|
|
487
336
|
- "comment": your comment following the instructions above
|
|
488
337
|
|
|
489
338
|
Respond with a valid JSON array.
|
|
490
339
|
|
|
491
340
|
Example:
|
|
492
341
|
[
|
|
493
|
-
{"exact": "the quarterly review meeting", "
|
|
342
|
+
{"exact": "the quarterly review meeting", "prefix": "We need to schedule ", "suffix": " for next month.", "comment": "Who will lead this? Should we invite the external auditors?"}
|
|
494
343
|
]`;
|
|
495
344
|
} else {
|
|
496
345
|
const toneGuidance = tone ? `
|
|
@@ -516,17 +365,15 @@ ${content.substring(0, 8e3)}
|
|
|
516
365
|
|
|
517
366
|
Return a JSON array of comments. Each comment should have:
|
|
518
367
|
- "exact": the exact text passage being commented on (quoted verbatim from source)
|
|
519
|
-
- "
|
|
520
|
-
- "
|
|
521
|
-
- "prefix": up to 32 characters of text immediately before the passage
|
|
522
|
-
- "suffix": up to 32 characters of text immediately after the passage
|
|
368
|
+
- "prefix": up to 64 characters of text immediately before the passage
|
|
369
|
+
- "suffix": up to 64 characters of text immediately after the passage
|
|
523
370
|
- "comment": your explanatory comment (1-3 sentences, provide context/background/clarification)
|
|
524
371
|
|
|
525
372
|
Respond with a valid JSON array.
|
|
526
373
|
|
|
527
374
|
Example format:
|
|
528
375
|
[
|
|
529
|
-
{"exact": "Ouranos", "
|
|
376
|
+
{"exact": "Ouranos", "prefix": "In the beginning, ", "suffix": " ruled the universe", "comment": "Ouranos (also spelled Uranus) is the primordial Greek deity personifying the sky. In Hesiod's Theogony, he is the son and husband of Gaia (Earth) and father of the Titans."}
|
|
530
377
|
]`;
|
|
531
378
|
}
|
|
532
379
|
return prompt;
|
|
@@ -557,16 +404,14 @@ ${content.substring(0, 8e3)}
|
|
|
557
404
|
|
|
558
405
|
Return a JSON array of highlights. Each highlight must have:
|
|
559
406
|
- "exact": the exact text passage to highlight (quoted verbatim from source)
|
|
560
|
-
- "
|
|
561
|
-
- "
|
|
562
|
-
- "prefix": up to 32 characters of text immediately before the passage
|
|
563
|
-
- "suffix": up to 32 characters of text immediately after the passage
|
|
407
|
+
- "prefix": up to 64 characters of text immediately before the passage
|
|
408
|
+
- "suffix": up to 64 characters of text immediately after the passage
|
|
564
409
|
|
|
565
410
|
Respond with a valid JSON array.
|
|
566
411
|
|
|
567
412
|
Example:
|
|
568
413
|
[
|
|
569
|
-
{"exact": "revenue grew 45% year-over-year", "
|
|
414
|
+
{"exact": "revenue grew 45% year-over-year", "prefix": "In Q3 2024, ", "suffix": ", exceeding all forecasts."}
|
|
570
415
|
]`;
|
|
571
416
|
} else {
|
|
572
417
|
const densityGuidance = density ? `
|
|
@@ -590,16 +435,14 @@ ${content.substring(0, 8e3)}
|
|
|
590
435
|
|
|
591
436
|
Return a JSON array of highlights. Each highlight should have:
|
|
592
437
|
- "exact": the exact text passage to highlight (quoted verbatim from source)
|
|
593
|
-
- "
|
|
594
|
-
- "
|
|
595
|
-
- "prefix": up to 32 characters of text immediately before the passage
|
|
596
|
-
- "suffix": up to 32 characters of text immediately after the passage
|
|
438
|
+
- "prefix": up to 64 characters of text immediately before the passage
|
|
439
|
+
- "suffix": up to 64 characters of text immediately after the passage
|
|
597
440
|
|
|
598
441
|
Respond with a valid JSON array.
|
|
599
442
|
|
|
600
443
|
Example format:
|
|
601
444
|
[
|
|
602
|
-
{"exact": "we will discontinue support for legacy systems by March 2025", "
|
|
445
|
+
{"exact": "we will discontinue support for legacy systems by March 2025", "prefix": "After careful consideration, ", "suffix": ". This decision affects"}
|
|
603
446
|
]`;
|
|
604
447
|
}
|
|
605
448
|
return prompt;
|
|
@@ -633,17 +476,15 @@ ${content.substring(0, 8e3)}
|
|
|
633
476
|
|
|
634
477
|
Return a JSON array of assessments. Each assessment must have:
|
|
635
478
|
- "exact": the exact text passage being assessed (quoted verbatim from source)
|
|
636
|
-
- "
|
|
637
|
-
- "
|
|
638
|
-
- "prefix": up to 32 characters of text immediately before the passage
|
|
639
|
-
- "suffix": up to 32 characters of text immediately after the passage
|
|
479
|
+
- "prefix": up to 64 characters of text immediately before the passage
|
|
480
|
+
- "suffix": up to 64 characters of text immediately after the passage
|
|
640
481
|
- "assessment": your assessment following the instructions above
|
|
641
482
|
|
|
642
483
|
Respond with a valid JSON array.
|
|
643
484
|
|
|
644
485
|
Example:
|
|
645
486
|
[
|
|
646
|
-
{"exact": "the quarterly revenue target", "
|
|
487
|
+
{"exact": "the quarterly revenue target", "prefix": "We established ", "suffix": " for Q4 2024.", "assessment": "This target seems ambitious given market conditions. Consider revising based on recent trends."}
|
|
647
488
|
]`;
|
|
648
489
|
} else {
|
|
649
490
|
const toneGuidance = tone ? `
|
|
@@ -669,17 +510,15 @@ ${content.substring(0, 8e3)}
|
|
|
669
510
|
|
|
670
511
|
Return a JSON array of assessments. Each assessment should have:
|
|
671
512
|
- "exact": the exact text passage being assessed (quoted verbatim from source)
|
|
672
|
-
- "
|
|
673
|
-
- "
|
|
674
|
-
- "prefix": up to 32 characters of text immediately before the passage
|
|
675
|
-
- "suffix": up to 32 characters of text immediately after the passage
|
|
513
|
+
- "prefix": up to 64 characters of text immediately before the passage
|
|
514
|
+
- "suffix": up to 64 characters of text immediately after the passage
|
|
676
515
|
- "assessment": your analytical assessment (1-3 sentences, evaluate validity/strength/implications)
|
|
677
516
|
|
|
678
517
|
Respond with a valid JSON array.
|
|
679
518
|
|
|
680
519
|
Example format:
|
|
681
520
|
[
|
|
682
|
-
{"exact": "AI will replace most jobs by 2030", "
|
|
521
|
+
{"exact": "AI will replace most jobs by 2030", "prefix": "Many experts predict that ", "suffix": ", fundamentally reshaping", "assessment": "This claim lacks nuance and supporting evidence. Employment patterns historically show job transformation rather than wholesale replacement. The timeline appears speculative without specific sector analysis."}
|
|
683
522
|
]`;
|
|
684
523
|
}
|
|
685
524
|
return prompt;
|
|
@@ -725,17 +564,15 @@ ${content}
|
|
|
725
564
|
|
|
726
565
|
Return a JSON array of tags. Each tag should have:
|
|
727
566
|
- "exact": the exact text passage (quoted verbatim from source)
|
|
728
|
-
- "
|
|
729
|
-
- "
|
|
730
|
-
- "prefix": up to 32 characters of text immediately before the passage
|
|
731
|
-
- "suffix": up to 32 characters of text immediately after the passage
|
|
567
|
+
- "prefix": up to 64 characters of text immediately before the passage
|
|
568
|
+
- "suffix": up to 64 characters of text immediately after the passage
|
|
732
569
|
|
|
733
570
|
Respond with a valid JSON array.
|
|
734
571
|
|
|
735
572
|
Example format:
|
|
736
573
|
[
|
|
737
|
-
{"exact": "What duty did the defendant owe?", "
|
|
738
|
-
{"exact": "In tort law, a duty of care is established when...", "
|
|
574
|
+
{"exact": "What duty did the defendant owe?", "prefix": "The central question is: ", "suffix": " This question must be"},
|
|
575
|
+
{"exact": "In tort law, a duty of care is established when...", "prefix": "Legal framework:\\n", "suffix": "\\n\\nApplying this standard"}
|
|
739
576
|
]`;
|
|
740
577
|
return prompt;
|
|
741
578
|
}
|
|
@@ -803,23 +640,29 @@ var MotivationParsers = class {
|
|
|
803
640
|
try {
|
|
804
641
|
const parsed = extractObjectsFromArray(response);
|
|
805
642
|
const valid = parsed.filter(
|
|
806
|
-
(c) => !!c && typeof c === "object" && typeof c.exact === "string" && typeof c.
|
|
643
|
+
(c) => !!c && typeof c === "object" && typeof c.exact === "string" && typeof c.comment === "string" && c.comment.trim().length > 0
|
|
807
644
|
);
|
|
808
645
|
console.log(`[MotivationParsers] Parsed ${valid.length} valid comments from ${parsed.length} total`);
|
|
809
646
|
const validatedComments = [];
|
|
810
647
|
for (const comment of valid) {
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
});
|
|
820
|
-
} catch (error) {
|
|
821
|
-
console.warn(`[MotivationParsers] Skipping invalid comment "${comment.exact}":`, error);
|
|
648
|
+
const reconciled = reconcileSelector(content, {
|
|
649
|
+
exact: comment.exact,
|
|
650
|
+
...typeof comment.prefix === "string" ? { prefix: comment.prefix } : {},
|
|
651
|
+
...typeof comment.suffix === "string" ? { suffix: comment.suffix } : {}
|
|
652
|
+
});
|
|
653
|
+
if (!reconciled) {
|
|
654
|
+
console.warn(`[MotivationParsers] Dropped hallucinated comment "${comment.exact}"`);
|
|
655
|
+
continue;
|
|
822
656
|
}
|
|
657
|
+
logAnchorMethod("comment", comment.exact, reconciled.anchorMethod);
|
|
658
|
+
validatedComments.push({
|
|
659
|
+
comment: comment.comment,
|
|
660
|
+
exact: reconciled.exact,
|
|
661
|
+
start: reconciled.start,
|
|
662
|
+
end: reconciled.end,
|
|
663
|
+
...reconciled.prefix !== void 0 ? { prefix: reconciled.prefix } : {},
|
|
664
|
+
...reconciled.suffix !== void 0 ? { suffix: reconciled.suffix } : {}
|
|
665
|
+
});
|
|
823
666
|
}
|
|
824
667
|
return validatedComments;
|
|
825
668
|
} catch (error) {
|
|
@@ -838,22 +681,27 @@ var MotivationParsers = class {
|
|
|
838
681
|
try {
|
|
839
682
|
const parsed = extractObjectsFromArray(response);
|
|
840
683
|
const highlights = parsed.filter(
|
|
841
|
-
(h) => !!h && typeof h === "object" && typeof h.exact === "string"
|
|
684
|
+
(h) => !!h && typeof h === "object" && typeof h.exact === "string"
|
|
842
685
|
);
|
|
843
686
|
const validatedHighlights = [];
|
|
844
687
|
for (const highlight of highlights) {
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
});
|
|
854
|
-
} catch (error) {
|
|
855
|
-
console.warn(`[MotivationParsers] Skipping invalid highlight "${highlight.exact}":`, error);
|
|
688
|
+
const reconciled = reconcileSelector(content, {
|
|
689
|
+
exact: highlight.exact,
|
|
690
|
+
...typeof highlight.prefix === "string" ? { prefix: highlight.prefix } : {},
|
|
691
|
+
...typeof highlight.suffix === "string" ? { suffix: highlight.suffix } : {}
|
|
692
|
+
});
|
|
693
|
+
if (!reconciled) {
|
|
694
|
+
console.warn(`[MotivationParsers] Dropped hallucinated highlight "${highlight.exact}"`);
|
|
695
|
+
continue;
|
|
856
696
|
}
|
|
697
|
+
logAnchorMethod("highlight", highlight.exact, reconciled.anchorMethod);
|
|
698
|
+
validatedHighlights.push({
|
|
699
|
+
exact: reconciled.exact,
|
|
700
|
+
start: reconciled.start,
|
|
701
|
+
end: reconciled.end,
|
|
702
|
+
...reconciled.prefix !== void 0 ? { prefix: reconciled.prefix } : {},
|
|
703
|
+
...reconciled.suffix !== void 0 ? { suffix: reconciled.suffix } : {}
|
|
704
|
+
});
|
|
857
705
|
}
|
|
858
706
|
return validatedHighlights;
|
|
859
707
|
} catch (error) {
|
|
@@ -873,22 +721,28 @@ var MotivationParsers = class {
|
|
|
873
721
|
try {
|
|
874
722
|
const parsed = extractObjectsFromArray(response);
|
|
875
723
|
const assessments = parsed.filter(
|
|
876
|
-
(a) => !!a && typeof a === "object" && typeof a.exact === "string" && typeof a.
|
|
724
|
+
(a) => !!a && typeof a === "object" && typeof a.exact === "string" && typeof a.assessment === "string"
|
|
877
725
|
);
|
|
878
726
|
const validatedAssessments = [];
|
|
879
727
|
for (const assessment of assessments) {
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
});
|
|
889
|
-
} catch (error) {
|
|
890
|
-
console.warn(`[MotivationParsers] Skipping invalid assessment "${assessment.exact}":`, error);
|
|
728
|
+
const reconciled = reconcileSelector(content, {
|
|
729
|
+
exact: assessment.exact,
|
|
730
|
+
...typeof assessment.prefix === "string" ? { prefix: assessment.prefix } : {},
|
|
731
|
+
...typeof assessment.suffix === "string" ? { suffix: assessment.suffix } : {}
|
|
732
|
+
});
|
|
733
|
+
if (!reconciled) {
|
|
734
|
+
console.warn(`[MotivationParsers] Dropped hallucinated assessment "${assessment.exact}"`);
|
|
735
|
+
continue;
|
|
891
736
|
}
|
|
737
|
+
logAnchorMethod("assessment", assessment.exact, reconciled.anchorMethod);
|
|
738
|
+
validatedAssessments.push({
|
|
739
|
+
assessment: assessment.assessment,
|
|
740
|
+
exact: reconciled.exact,
|
|
741
|
+
start: reconciled.start,
|
|
742
|
+
end: reconciled.end,
|
|
743
|
+
...reconciled.prefix !== void 0 ? { prefix: reconciled.prefix } : {},
|
|
744
|
+
...reconciled.suffix !== void 0 ? { suffix: reconciled.suffix } : {}
|
|
745
|
+
});
|
|
892
746
|
}
|
|
893
747
|
return validatedAssessments;
|
|
894
748
|
} catch (error) {
|
|
@@ -898,17 +752,15 @@ var MotivationParsers = class {
|
|
|
898
752
|
}
|
|
899
753
|
}
|
|
900
754
|
/**
|
|
901
|
-
* Parse
|
|
902
|
-
*
|
|
903
|
-
*
|
|
904
|
-
* @param response - Raw AI response string (may include markdown code fences)
|
|
905
|
-
* @returns Array of tag matches (offsets not yet validated)
|
|
755
|
+
* Parse the LLM's tag response into raw, pre-reconciliation tag inputs.
|
|
756
|
+
* Reconciliation happens in `validateTagOffsets`, which adds `start`/`end`
|
|
757
|
+
* by anchoring `exact` against the source content.
|
|
906
758
|
*/
|
|
907
759
|
static parseTags(response) {
|
|
908
760
|
try {
|
|
909
761
|
const parsed = extractObjectsFromArray(response);
|
|
910
762
|
const valid = parsed.filter(
|
|
911
|
-
(t) => !!t && typeof t === "object" && typeof t.exact === "string" &&
|
|
763
|
+
(t) => !!t && typeof t === "object" && typeof t.exact === "string" && t.exact.trim().length > 0
|
|
912
764
|
);
|
|
913
765
|
console.log(`[MotivationParsers] Parsed ${valid.length} valid tags from ${parsed.length} total`);
|
|
914
766
|
return valid;
|
|
@@ -918,34 +770,38 @@ var MotivationParsers = class {
|
|
|
918
770
|
}
|
|
919
771
|
}
|
|
920
772
|
/**
|
|
921
|
-
*
|
|
922
|
-
* Helper for tag detection after initial parsing
|
|
923
|
-
*
|
|
924
|
-
* @param tags - Parsed tags without validated offsets
|
|
925
|
-
* @param content - Original content to validate against
|
|
926
|
-
* @param category - Category to assign to validated tags
|
|
927
|
-
* @returns Array of validated tag matches
|
|
773
|
+
* Anchor raw tag inputs against source content and add category.
|
|
928
774
|
*/
|
|
929
775
|
static validateTagOffsets(tags, content, category) {
|
|
930
776
|
const validatedTags = [];
|
|
931
777
|
for (const tag of tags) {
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
suffix: validated.suffix
|
|
941
|
-
});
|
|
942
|
-
} catch (error) {
|
|
943
|
-
console.warn(`[MotivationParsers] Skipping invalid tag for category "${category}":`, error);
|
|
778
|
+
const reconciled = reconcileSelector(content, {
|
|
779
|
+
exact: tag.exact,
|
|
780
|
+
...typeof tag.prefix === "string" ? { prefix: tag.prefix } : {},
|
|
781
|
+
...typeof tag.suffix === "string" ? { suffix: tag.suffix } : {}
|
|
782
|
+
});
|
|
783
|
+
if (!reconciled) {
|
|
784
|
+
console.warn(`[MotivationParsers] Dropped hallucinated tag "${tag.exact}" for category "${category}"`);
|
|
785
|
+
continue;
|
|
944
786
|
}
|
|
787
|
+
logAnchorMethod("tag", tag.exact, reconciled.anchorMethod);
|
|
788
|
+
validatedTags.push({
|
|
789
|
+
category,
|
|
790
|
+
exact: reconciled.exact,
|
|
791
|
+
start: reconciled.start,
|
|
792
|
+
end: reconciled.end,
|
|
793
|
+
...reconciled.prefix !== void 0 ? { prefix: reconciled.prefix } : {},
|
|
794
|
+
...reconciled.suffix !== void 0 ? { suffix: reconciled.suffix } : {}
|
|
795
|
+
});
|
|
945
796
|
}
|
|
946
797
|
return validatedTags;
|
|
947
798
|
}
|
|
948
799
|
};
|
|
800
|
+
function logAnchorMethod(motivation, exact, anchorMethod) {
|
|
801
|
+
if (anchorMethod === "first-of-many" || anchorMethod === "fuzzy-match") {
|
|
802
|
+
console.warn(`[MotivationParsers] ${motivation} anchored via ${anchorMethod}: "${exact}"`);
|
|
803
|
+
}
|
|
804
|
+
}
|
|
949
805
|
|
|
950
806
|
// src/workers/annotation-detection.ts
|
|
951
807
|
var AnnotationDetection = class {
|
|
@@ -1073,17 +929,15 @@ ${exact}
|
|
|
1073
929
|
"""
|
|
1074
930
|
|
|
1075
931
|
Respond with a JSON array of entities found. Each entity should have:
|
|
1076
|
-
- exact: the exact text span from the input
|
|
932
|
+
- exact: the exact text span from the input (quoted verbatim \u2014 character-for-character)
|
|
1077
933
|
- entityType: one of the provided entity types
|
|
1078
|
-
-
|
|
1079
|
-
-
|
|
1080
|
-
- prefix: up to 32 characters of text immediately before the entity (helps identify correct occurrence)
|
|
1081
|
-
- suffix: up to 32 characters of text immediately after the entity (helps identify correct occurrence)
|
|
934
|
+
- prefix: up to 64 characters of text immediately before the entity (used to disambiguate when the same text appears more than once)
|
|
935
|
+
- suffix: up to 64 characters of text immediately after the entity (same purpose)
|
|
1082
936
|
|
|
1083
937
|
If no entities are found, respond with an empty array [].
|
|
1084
938
|
|
|
1085
939
|
Example output:
|
|
1086
|
-
[{"exact":"Alice","entityType":"Person","
|
|
940
|
+
[{"exact":"Alice","entityType":"Person","prefix":"","suffix":" went to"},{"exact":"Paris","entityType":"Location","prefix":"went to ","suffix":" yesterday"}]`;
|
|
1087
941
|
logger.debug("Sending entity extraction request", { entityTypes: entityTypesDescription });
|
|
1088
942
|
const response = await client.generateTextWithMetadata(
|
|
1089
943
|
prompt,
|
|
@@ -1112,151 +966,18 @@ Example output:
|
|
|
1112
966
|
logger.error(errorMsg);
|
|
1113
967
|
throw new Error(errorMsg);
|
|
1114
968
|
}
|
|
1115
|
-
return entities.
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
index: idx + 1,
|
|
1120
|
-
total: entities.length,
|
|
1121
|
-
type: entity.entityType,
|
|
1122
|
-
text: entity.exact,
|
|
1123
|
-
offsetsFromAI: `[${start}:${end}]`
|
|
1124
|
-
});
|
|
1125
|
-
const extractedText = exact.substring(start, end);
|
|
1126
|
-
let anchorMethod;
|
|
1127
|
-
if (extractedText === entity.exact) {
|
|
1128
|
-
anchorMethod = "llm-exact";
|
|
1129
|
-
logger.debug("Entity anchored", {
|
|
1130
|
-
text: entity.exact,
|
|
1131
|
-
entityType: entity.entityType,
|
|
1132
|
-
anchorMethod
|
|
1133
|
-
});
|
|
1134
|
-
} else {
|
|
1135
|
-
logger.debug("LLM offsets mismatch \u2014 attempting re-anchor", {
|
|
1136
|
-
expected: entity.exact,
|
|
1137
|
-
llmOffsets: `[${start}:${end}]`,
|
|
1138
|
-
foundAtLlmOffsets: extractedText
|
|
1139
|
-
});
|
|
1140
|
-
let occurrenceCount = 0;
|
|
1141
|
-
let firstOccurrence = -1;
|
|
1142
|
-
let searchPos = 0;
|
|
1143
|
-
while ((searchPos = exact.indexOf(entity.exact, searchPos)) !== -1) {
|
|
1144
|
-
if (firstOccurrence === -1) firstOccurrence = searchPos;
|
|
1145
|
-
occurrenceCount++;
|
|
1146
|
-
searchPos++;
|
|
1147
|
-
}
|
|
1148
|
-
if (occurrenceCount === 0) {
|
|
1149
|
-
anchorMethod = "dropped";
|
|
1150
|
-
logger.error("Entity text not found in resource \u2014 dropping", {
|
|
1151
|
-
text: entity.exact,
|
|
1152
|
-
entityType: entity.entityType,
|
|
1153
|
-
llmOffsets: `[${start}:${end}]`,
|
|
1154
|
-
anchorMethod,
|
|
1155
|
-
resourceStart: exact.substring(0, 200)
|
|
1156
|
-
});
|
|
1157
|
-
return null;
|
|
1158
|
-
}
|
|
1159
|
-
let recoveredOffset = -1;
|
|
1160
|
-
if (entity.prefix || entity.suffix) {
|
|
1161
|
-
let p = 0;
|
|
1162
|
-
while ((p = exact.indexOf(entity.exact, p)) !== -1) {
|
|
1163
|
-
const candidatePrefix = exact.substring(Math.max(0, p - 32), p);
|
|
1164
|
-
const candidateSuffix = exact.substring(
|
|
1165
|
-
p + entity.exact.length,
|
|
1166
|
-
Math.min(exact.length, p + entity.exact.length + 32)
|
|
1167
|
-
);
|
|
1168
|
-
const prefixMatch = !entity.prefix || candidatePrefix.endsWith(entity.prefix);
|
|
1169
|
-
const suffixMatch = !entity.suffix || candidateSuffix.startsWith(entity.suffix);
|
|
1170
|
-
if (prefixMatch && suffixMatch) {
|
|
1171
|
-
recoveredOffset = p;
|
|
1172
|
-
break;
|
|
1173
|
-
}
|
|
1174
|
-
p++;
|
|
1175
|
-
}
|
|
1176
|
-
}
|
|
1177
|
-
if (recoveredOffset !== -1) {
|
|
1178
|
-
anchorMethod = "context-recovered";
|
|
1179
|
-
start = recoveredOffset;
|
|
1180
|
-
end = recoveredOffset + entity.exact.length;
|
|
1181
|
-
logger.debug("Entity anchored", {
|
|
1182
|
-
text: entity.exact,
|
|
1183
|
-
entityType: entity.entityType,
|
|
1184
|
-
anchorMethod,
|
|
1185
|
-
offsetDiff: recoveredOffset - entity.startOffset
|
|
1186
|
-
});
|
|
1187
|
-
} else if (occurrenceCount === 1) {
|
|
1188
|
-
anchorMethod = "unique-match";
|
|
1189
|
-
start = firstOccurrence;
|
|
1190
|
-
end = firstOccurrence + entity.exact.length;
|
|
1191
|
-
logger.debug("Entity anchored", {
|
|
1192
|
-
text: entity.exact,
|
|
1193
|
-
entityType: entity.entityType,
|
|
1194
|
-
anchorMethod,
|
|
1195
|
-
offsetDiff: firstOccurrence - entity.startOffset
|
|
1196
|
-
});
|
|
1197
|
-
} else {
|
|
1198
|
-
anchorMethod = "first-of-many";
|
|
1199
|
-
start = firstOccurrence;
|
|
1200
|
-
end = firstOccurrence + entity.exact.length;
|
|
1201
|
-
logger.warn("Entity anchored at first of multiple occurrences \u2014 may be wrong", {
|
|
1202
|
-
text: entity.exact,
|
|
1203
|
-
entityType: entity.entityType,
|
|
1204
|
-
anchorMethod,
|
|
1205
|
-
occurrenceCount,
|
|
1206
|
-
chosenOffset: firstOccurrence,
|
|
1207
|
-
llmOffsets: `[${entity.startOffset}:${entity.endOffset}]`,
|
|
1208
|
-
hasPrefix: !!entity.prefix,
|
|
1209
|
-
hasSuffix: !!entity.suffix
|
|
1210
|
-
});
|
|
1211
|
-
}
|
|
1212
|
-
}
|
|
1213
|
-
return {
|
|
1214
|
-
exact: entity.exact,
|
|
1215
|
-
entityType: entity.entityType,
|
|
1216
|
-
start,
|
|
1217
|
-
end,
|
|
1218
|
-
prefix: entity.prefix,
|
|
1219
|
-
suffix: entity.suffix
|
|
1220
|
-
};
|
|
1221
|
-
}).filter((entity) => {
|
|
1222
|
-
if (entity === null) {
|
|
1223
|
-
logger.debug("Filtered entity: null");
|
|
1224
|
-
return false;
|
|
1225
|
-
}
|
|
1226
|
-
if (entity.start === void 0 || entity.end === void 0) {
|
|
1227
|
-
logger.warn("Filtered entity: missing offsets", { text: entity.exact });
|
|
1228
|
-
return false;
|
|
1229
|
-
}
|
|
1230
|
-
if (entity.start < 0) {
|
|
1231
|
-
logger.warn("Filtered entity: negative start", {
|
|
1232
|
-
text: entity.exact,
|
|
1233
|
-
start: entity.start
|
|
1234
|
-
});
|
|
1235
|
-
return false;
|
|
1236
|
-
}
|
|
1237
|
-
if (entity.end > exact.length) {
|
|
1238
|
-
logger.warn("Filtered entity: end exceeds text length", {
|
|
1239
|
-
text: entity.exact,
|
|
1240
|
-
end: entity.end,
|
|
1241
|
-
textLength: exact.length
|
|
1242
|
-
});
|
|
1243
|
-
return false;
|
|
969
|
+
return entities.filter((e) => {
|
|
970
|
+
const ok = e && typeof e === "object" && typeof e.exact === "string" && typeof e.entityType === "string";
|
|
971
|
+
if (!ok) {
|
|
972
|
+
logger.debug("Dropped malformed LLM entity", { entity: e });
|
|
1244
973
|
}
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
return false;
|
|
1253
|
-
}
|
|
1254
|
-
logger.debug("Accepted entity", {
|
|
1255
|
-
text: entity.exact,
|
|
1256
|
-
offsets: `[${entity.start}:${entity.end}]`
|
|
1257
|
-
});
|
|
1258
|
-
return true;
|
|
1259
|
-
});
|
|
974
|
+
return ok;
|
|
975
|
+
}).map((entity) => ({
|
|
976
|
+
exact: entity.exact,
|
|
977
|
+
entityType: entity.entityType,
|
|
978
|
+
...typeof entity.prefix === "string" ? { prefix: entity.prefix } : {},
|
|
979
|
+
...typeof entity.suffix === "string" ? { suffix: entity.suffix } : {}
|
|
980
|
+
}));
|
|
1260
981
|
} catch (error) {
|
|
1261
982
|
logger.error("Failed to parse entity extraction response", {
|
|
1262
983
|
error: error instanceof Error ? error.message : String(error)
|
|
@@ -1392,7 +1113,59 @@ Requirements:
|
|
|
1392
1113
|
});
|
|
1393
1114
|
return result;
|
|
1394
1115
|
}
|
|
1395
|
-
function
|
|
1116
|
+
function toMatch(r) {
|
|
1117
|
+
return {
|
|
1118
|
+
exact: r.exact,
|
|
1119
|
+
start: r.start,
|
|
1120
|
+
end: r.end,
|
|
1121
|
+
...r.prefix !== void 0 ? { prefix: r.prefix } : {},
|
|
1122
|
+
...r.suffix !== void 0 ? { suffix: r.suffix } : {}
|
|
1123
|
+
};
|
|
1124
|
+
}
|
|
1125
|
+
function annotationDedupeKey(ann) {
|
|
1126
|
+
const target = ann.target;
|
|
1127
|
+
const selectors = Array.isArray(target?.selector) ? target.selector : [];
|
|
1128
|
+
const pos = selectors.find((s) => s.type === "TextPositionSelector");
|
|
1129
|
+
return [
|
|
1130
|
+
ann.motivation,
|
|
1131
|
+
pos?.start ?? "?",
|
|
1132
|
+
pos?.end ?? "?",
|
|
1133
|
+
JSON.stringify(ann.body ?? null)
|
|
1134
|
+
].join("|");
|
|
1135
|
+
}
|
|
1136
|
+
function dedupeAnnotations(annotations) {
|
|
1137
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1138
|
+
const out = [];
|
|
1139
|
+
for (const ann of annotations) {
|
|
1140
|
+
const key = annotationDedupeKey(ann);
|
|
1141
|
+
if (seen.has(key)) continue;
|
|
1142
|
+
seen.add(key);
|
|
1143
|
+
out.push(ann);
|
|
1144
|
+
}
|
|
1145
|
+
return out;
|
|
1146
|
+
}
|
|
1147
|
+
function buildTextAnnotation(content, resourceId, userId, generator, motivation, match, body) {
|
|
1148
|
+
if (content.substring(match.start, match.end) !== match.exact) {
|
|
1149
|
+
throw new Error(
|
|
1150
|
+
`buildTextAnnotation invariant: content.substring(${match.start}, ${match.end}) !== exact for resource ${resourceId}, motivation ${motivation}`
|
|
1151
|
+
);
|
|
1152
|
+
}
|
|
1153
|
+
if (match.prefix !== void 0) {
|
|
1154
|
+
const actualPrefix = content.substring(Math.max(0, match.start - match.prefix.length), match.start);
|
|
1155
|
+
if (actualPrefix !== match.prefix) {
|
|
1156
|
+
throw new Error(
|
|
1157
|
+
`buildTextAnnotation invariant: content prefix-slice !== prefix for resource ${resourceId}, motivation ${motivation}`
|
|
1158
|
+
);
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
if (match.suffix !== void 0) {
|
|
1162
|
+
const actualSuffix = content.substring(match.end, Math.min(content.length, match.end + match.suffix.length));
|
|
1163
|
+
if (actualSuffix !== match.suffix) {
|
|
1164
|
+
throw new Error(
|
|
1165
|
+
`buildTextAnnotation invariant: content suffix-slice !== suffix for resource ${resourceId}, motivation ${motivation}`
|
|
1166
|
+
);
|
|
1167
|
+
}
|
|
1168
|
+
}
|
|
1396
1169
|
const creator = didToAgent(userId);
|
|
1397
1170
|
const wasAttributedTo = creator["@id"] === generator["@id"] ? [generator] : [creator, generator];
|
|
1398
1171
|
return {
|
|
@@ -1431,9 +1204,9 @@ async function processHighlightJob(content, inferenceClient, params, userId, gen
|
|
|
1431
1204
|
params.sourceLanguage
|
|
1432
1205
|
);
|
|
1433
1206
|
onProgress(60, `Creating ${highlights.length} annotations...`, "creating");
|
|
1434
|
-
const annotations = highlights.map(
|
|
1435
|
-
(h) => buildTextAnnotation(params.resourceId, userId, generator, "highlighting", h)
|
|
1436
|
-
);
|
|
1207
|
+
const annotations = dedupeAnnotations(highlights.map(
|
|
1208
|
+
(h) => buildTextAnnotation(content, params.resourceId, userId, generator, "highlighting", h)
|
|
1209
|
+
));
|
|
1437
1210
|
onProgress(100, `Complete! Created ${annotations.length} highlights`, "creating");
|
|
1438
1211
|
return {
|
|
1439
1212
|
annotations,
|
|
@@ -1454,16 +1227,16 @@ async function processCommentJob(content, inferenceClient, params, userId, gener
|
|
|
1454
1227
|
);
|
|
1455
1228
|
onProgress(60, `Creating ${comments.length} annotations...`, "creating");
|
|
1456
1229
|
const bodyLanguage = params.language ?? "en";
|
|
1457
|
-
const annotations = comments.map(
|
|
1230
|
+
const annotations = dedupeAnnotations(comments.map(
|
|
1458
1231
|
(c) => (
|
|
1459
1232
|
// Match the pre-#651 CommentAnnotationWorker: include format and
|
|
1460
1233
|
// language on the body TextualBody. Optional in the schema, but
|
|
1461
1234
|
// consumers that do language-aware rendering rely on them.
|
|
1462
|
-
buildTextAnnotation(params.resourceId, userId, generator, "commenting", c, [
|
|
1235
|
+
buildTextAnnotation(content, params.resourceId, userId, generator, "commenting", c, [
|
|
1463
1236
|
{ type: "TextualBody", value: c.comment, purpose: "commenting", format: "text/plain", language: bodyLanguage }
|
|
1464
1237
|
])
|
|
1465
1238
|
)
|
|
1466
|
-
);
|
|
1239
|
+
));
|
|
1467
1240
|
onProgress(100, `Complete! Created ${annotations.length} comments`, "creating");
|
|
1468
1241
|
return {
|
|
1469
1242
|
annotations,
|
|
@@ -1484,7 +1257,7 @@ async function processAssessmentJob(content, inferenceClient, params, userId, ge
|
|
|
1484
1257
|
);
|
|
1485
1258
|
onProgress(60, `Creating ${assessments.length} annotations...`, "creating");
|
|
1486
1259
|
const bodyLanguage = params.language ?? "en";
|
|
1487
|
-
const annotations = assessments.map(
|
|
1260
|
+
const annotations = dedupeAnnotations(assessments.map(
|
|
1488
1261
|
(a) => (
|
|
1489
1262
|
// Single-object body with purpose aligned to motivation, matching the
|
|
1490
1263
|
// pre-#651 AssessmentAnnotationWorker's shape and the majority of
|
|
@@ -1492,7 +1265,7 @@ async function processAssessmentJob(content, inferenceClient, params, userId, ge
|
|
|
1492
1265
|
// purpose='describing' — that loses the "this is an assessment, not
|
|
1493
1266
|
// a description" signal and breaks existing readers that access
|
|
1494
1267
|
// `body.value` directly on the object.
|
|
1495
|
-
buildTextAnnotation(params.resourceId, userId, generator, "assessing", a, {
|
|
1268
|
+
buildTextAnnotation(content, params.resourceId, userId, generator, "assessing", a, {
|
|
1496
1269
|
type: "TextualBody",
|
|
1497
1270
|
value: a.assessment,
|
|
1498
1271
|
purpose: "assessing",
|
|
@@ -1500,7 +1273,7 @@ async function processAssessmentJob(content, inferenceClient, params, userId, ge
|
|
|
1500
1273
|
language: bodyLanguage
|
|
1501
1274
|
})
|
|
1502
1275
|
)
|
|
1503
|
-
);
|
|
1276
|
+
));
|
|
1504
1277
|
onProgress(100, `Complete! Created ${annotations.length} assessments`, "creating");
|
|
1505
1278
|
return {
|
|
1506
1279
|
annotations,
|
|
@@ -1544,27 +1317,44 @@ async function processReferenceJob(content, inferenceClient, params, userId, gen
|
|
|
1544
1317
|
{ type: "TextualBody", value: entityTypeName, purpose: "tagging", format: "text/plain", language: bodyLanguage }
|
|
1545
1318
|
];
|
|
1546
1319
|
for (const entity of extractedEntities) {
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
);
|
|
1557
|
-
allAnnotations.push(ann);
|
|
1558
|
-
totalEmitted++;
|
|
1559
|
-
} catch {
|
|
1320
|
+
const reconciled = reconcileSelector(content, {
|
|
1321
|
+
exact: entity.exact,
|
|
1322
|
+
...entity.prefix !== void 0 ? { prefix: entity.prefix } : {},
|
|
1323
|
+
...entity.suffix !== void 0 ? { suffix: entity.suffix } : {}
|
|
1324
|
+
});
|
|
1325
|
+
if (!reconciled) {
|
|
1326
|
+
logger.error("Entity dropped \u2014 text not found in source", {
|
|
1327
|
+
text: entity.exact,
|
|
1328
|
+
entityType: entity.entityType
|
|
1329
|
+
});
|
|
1560
1330
|
errors++;
|
|
1331
|
+
continue;
|
|
1332
|
+
}
|
|
1333
|
+
if (reconciled.anchorMethod === "first-of-many" || reconciled.anchorMethod === "fuzzy-match") {
|
|
1334
|
+
logger.warn("Entity anchored via degraded method", {
|
|
1335
|
+
text: entity.exact,
|
|
1336
|
+
entityType: entity.entityType,
|
|
1337
|
+
anchorMethod: reconciled.anchorMethod
|
|
1338
|
+
});
|
|
1561
1339
|
}
|
|
1340
|
+
const ann = buildTextAnnotation(
|
|
1341
|
+
content,
|
|
1342
|
+
params.resourceId,
|
|
1343
|
+
userId,
|
|
1344
|
+
generator,
|
|
1345
|
+
"linking",
|
|
1346
|
+
toMatch(reconciled),
|
|
1347
|
+
unresolvedBody
|
|
1348
|
+
);
|
|
1349
|
+
allAnnotations.push(ann);
|
|
1350
|
+
totalEmitted++;
|
|
1562
1351
|
}
|
|
1563
1352
|
}
|
|
1564
|
-
|
|
1353
|
+
const annotations = dedupeAnnotations(allAnnotations);
|
|
1354
|
+
onProgress(100, `Complete! Created ${annotations.length} references`, "creating");
|
|
1565
1355
|
return {
|
|
1566
|
-
annotations
|
|
1567
|
-
result: { totalFound, totalEmitted, errors }
|
|
1356
|
+
annotations,
|
|
1357
|
+
result: { totalFound, totalEmitted: annotations.length, errors }
|
|
1568
1358
|
};
|
|
1569
1359
|
}
|
|
1570
1360
|
async function processTagJob(content, inferenceClient, params, userId, generator, onProgress) {
|
|
@@ -1584,15 +1374,19 @@ async function processTagJob(content, inferenceClient, params, userId, generator
|
|
|
1584
1374
|
const tags = allTags;
|
|
1585
1375
|
onProgress(60, `Creating ${tags.length} tag annotations...`, "creating");
|
|
1586
1376
|
const bodyLanguage = params.language ?? "en";
|
|
1587
|
-
const
|
|
1588
|
-
const annotations = tags.map((t) => {
|
|
1377
|
+
const annotations = dedupeAnnotations(tags.map((t) => {
|
|
1589
1378
|
const category = t.category ?? "unknown";
|
|
1590
|
-
|
|
1591
|
-
return buildTextAnnotation(params.resourceId, userId, generator, "tagging", t, [
|
|
1379
|
+
return buildTextAnnotation(content, params.resourceId, userId, generator, "tagging", t, [
|
|
1592
1380
|
{ type: "TextualBody", value: category, purpose: "tagging", format: "text/plain", language: bodyLanguage },
|
|
1593
1381
|
{ type: "TextualBody", value: params.schema.id, purpose: "classifying", format: "text/plain" }
|
|
1594
1382
|
]);
|
|
1595
|
-
});
|
|
1383
|
+
}));
|
|
1384
|
+
const byCategory = {};
|
|
1385
|
+
for (const ann of annotations) {
|
|
1386
|
+
const body = ann.body;
|
|
1387
|
+
const category = Array.isArray(body) && typeof body[0]?.value === "string" ? body[0].value : "unknown";
|
|
1388
|
+
byCategory[category] = (byCategory[category] ?? 0) + 1;
|
|
1389
|
+
}
|
|
1596
1390
|
onProgress(100, `Complete! Created ${annotations.length} tags`, "creating");
|
|
1597
1391
|
return {
|
|
1598
1392
|
annotations,
|
|
@@ -1628,6 +1422,6 @@ async function processGenerationJob(inferenceClient, params, onProgress, logger)
|
|
|
1628
1422
|
};
|
|
1629
1423
|
}
|
|
1630
1424
|
|
|
1631
|
-
export { AnnotationDetection, FsJobQueue,
|
|
1425
|
+
export { AnnotationDetection, FsJobQueue, generateResourceFromTopic, isCancelledJob, isCompleteJob, isFailedJob, isPendingJob, isRunningJob, processAssessmentJob, processCommentJob, processGenerationJob, processHighlightJob, processReferenceJob, processTagJob };
|
|
1632
1426
|
//# sourceMappingURL=index.js.map
|
|
1633
1427
|
//# sourceMappingURL=index.js.map
|