@semiont/jobs 0.4.19 → 0.4.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +78 -61
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -2,7 +2,7 @@ import { promises, watch } from 'fs';
|
|
|
2
2
|
import * as path from 'path';
|
|
3
3
|
import { validateAndCorrectOffsets, getTargetSelector, getExactText, getLocaleEnglishName } from '@semiont/api-client';
|
|
4
4
|
import { generateAnnotationId } from '@semiont/event-sourcing';
|
|
5
|
-
import { userToAgent, userId, jobId, CREATION_METHODS, resourceId, annotationId } from '@semiont/core';
|
|
5
|
+
import { userToAgent, userId, jobId, CREATION_METHODS, resourceId, annotationId, errField } from '@semiont/core';
|
|
6
6
|
import { deriveStorageUri } from '@semiont/content';
|
|
7
7
|
|
|
8
8
|
var __create = Object.create;
|
|
@@ -10944,75 +10944,92 @@ Example output:
|
|
|
10944
10944
|
offsetsFromAI: `[${startOffset}:${endOffset}]`
|
|
10945
10945
|
});
|
|
10946
10946
|
const extractedText = exact.substring(startOffset, endOffset);
|
|
10947
|
-
|
|
10948
|
-
|
|
10949
|
-
|
|
10950
|
-
|
|
10951
|
-
|
|
10947
|
+
let anchorMethod;
|
|
10948
|
+
if (extractedText === entity.exact) {
|
|
10949
|
+
anchorMethod = "llm-exact";
|
|
10950
|
+
logger?.debug("Entity anchored", {
|
|
10951
|
+
text: entity.exact,
|
|
10952
|
+
entityType: entity.entityType,
|
|
10953
|
+
anchorMethod
|
|
10952
10954
|
});
|
|
10953
|
-
|
|
10954
|
-
|
|
10955
|
-
|
|
10956
|
-
|
|
10957
|
-
|
|
10958
|
-
before: contextBefore,
|
|
10959
|
-
extracted: extractedText,
|
|
10960
|
-
after: contextAfter
|
|
10955
|
+
} else {
|
|
10956
|
+
logger?.debug("LLM offsets mismatch \u2014 attempting re-anchor", {
|
|
10957
|
+
expected: entity.exact,
|
|
10958
|
+
llmOffsets: `[${startOffset}:${endOffset}]`,
|
|
10959
|
+
foundAtLlmOffsets: extractedText
|
|
10961
10960
|
});
|
|
10962
|
-
|
|
10963
|
-
let
|
|
10964
|
-
|
|
10965
|
-
|
|
10966
|
-
|
|
10967
|
-
|
|
10961
|
+
let occurrenceCount = 0;
|
|
10962
|
+
let firstOccurrence = -1;
|
|
10963
|
+
let searchPos = 0;
|
|
10964
|
+
while ((searchPos = exact.indexOf(entity.exact, searchPos)) !== -1) {
|
|
10965
|
+
if (firstOccurrence === -1) firstOccurrence = searchPos;
|
|
10966
|
+
occurrenceCount++;
|
|
10967
|
+
searchPos++;
|
|
10968
|
+
}
|
|
10969
|
+
if (occurrenceCount === 0) {
|
|
10970
|
+
anchorMethod = "dropped";
|
|
10971
|
+
logger?.error("Entity text not found in resource \u2014 dropping", {
|
|
10972
|
+
text: entity.exact,
|
|
10973
|
+
entityType: entity.entityType,
|
|
10974
|
+
llmOffsets: `[${startOffset}:${endOffset}]`,
|
|
10975
|
+
anchorMethod,
|
|
10976
|
+
resourceStart: exact.substring(0, 200)
|
|
10968
10977
|
});
|
|
10969
|
-
|
|
10970
|
-
|
|
10971
|
-
|
|
10978
|
+
return null;
|
|
10979
|
+
}
|
|
10980
|
+
let recoveredOffset = -1;
|
|
10981
|
+
if (entity.prefix || entity.suffix) {
|
|
10982
|
+
let p = 0;
|
|
10983
|
+
while ((p = exact.indexOf(entity.exact, p)) !== -1) {
|
|
10984
|
+
const candidatePrefix = exact.substring(Math.max(0, p - 32), p);
|
|
10972
10985
|
const candidateSuffix = exact.substring(
|
|
10973
|
-
|
|
10974
|
-
Math.min(exact.length,
|
|
10986
|
+
p + entity.exact.length,
|
|
10987
|
+
Math.min(exact.length, p + entity.exact.length + 32)
|
|
10975
10988
|
);
|
|
10976
10989
|
const prefixMatch = !entity.prefix || candidatePrefix.endsWith(entity.prefix);
|
|
10977
10990
|
const suffixMatch = !entity.suffix || candidateSuffix.startsWith(entity.suffix);
|
|
10978
10991
|
if (prefixMatch && suffixMatch) {
|
|
10979
|
-
|
|
10980
|
-
offset: searchPos,
|
|
10981
|
-
offsetDiff: searchPos - startOffset,
|
|
10982
|
-
candidatePrefix,
|
|
10983
|
-
candidateSuffix
|
|
10984
|
-
});
|
|
10985
|
-
startOffset = searchPos;
|
|
10986
|
-
endOffset = searchPos + entity.exact.length;
|
|
10987
|
-
found = true;
|
|
10992
|
+
recoveredOffset = p;
|
|
10988
10993
|
break;
|
|
10989
10994
|
}
|
|
10990
|
-
|
|
10991
|
-
}
|
|
10992
|
-
if (!found) {
|
|
10993
|
-
logger?.warn("No occurrence found with matching context", { text: entity.exact });
|
|
10995
|
+
p++;
|
|
10994
10996
|
}
|
|
10995
10997
|
}
|
|
10996
|
-
if (
|
|
10997
|
-
|
|
10998
|
-
|
|
10999
|
-
|
|
11000
|
-
|
|
11001
|
-
|
|
11002
|
-
|
|
11003
|
-
|
|
11004
|
-
|
|
11005
|
-
|
|
11006
|
-
|
|
11007
|
-
|
|
11008
|
-
|
|
11009
|
-
|
|
11010
|
-
|
|
11011
|
-
|
|
11012
|
-
|
|
10998
|
+
if (recoveredOffset !== -1) {
|
|
10999
|
+
anchorMethod = "context-recovered";
|
|
11000
|
+
startOffset = recoveredOffset;
|
|
11001
|
+
endOffset = recoveredOffset + entity.exact.length;
|
|
11002
|
+
logger?.debug("Entity anchored", {
|
|
11003
|
+
text: entity.exact,
|
|
11004
|
+
entityType: entity.entityType,
|
|
11005
|
+
anchorMethod,
|
|
11006
|
+
offsetDiff: recoveredOffset - entity.startOffset
|
|
11007
|
+
});
|
|
11008
|
+
} else if (occurrenceCount === 1) {
|
|
11009
|
+
anchorMethod = "unique-match";
|
|
11010
|
+
startOffset = firstOccurrence;
|
|
11011
|
+
endOffset = firstOccurrence + entity.exact.length;
|
|
11012
|
+
logger?.debug("Entity anchored", {
|
|
11013
|
+
text: entity.exact,
|
|
11014
|
+
entityType: entity.entityType,
|
|
11015
|
+
anchorMethod,
|
|
11016
|
+
offsetDiff: firstOccurrence - entity.startOffset
|
|
11017
|
+
});
|
|
11018
|
+
} else {
|
|
11019
|
+
anchorMethod = "first-of-many";
|
|
11020
|
+
startOffset = firstOccurrence;
|
|
11021
|
+
endOffset = firstOccurrence + entity.exact.length;
|
|
11022
|
+
logger?.warn("Entity anchored at first of multiple occurrences \u2014 may be wrong", {
|
|
11023
|
+
text: entity.exact,
|
|
11024
|
+
entityType: entity.entityType,
|
|
11025
|
+
anchorMethod,
|
|
11026
|
+
occurrenceCount,
|
|
11027
|
+
chosenOffset: firstOccurrence,
|
|
11028
|
+
llmOffsets: `[${entity.startOffset}:${entity.endOffset}]`,
|
|
11029
|
+
hasPrefix: !!entity.prefix,
|
|
11030
|
+
hasSuffix: !!entity.suffix
|
|
11031
|
+
});
|
|
11013
11032
|
}
|
|
11014
|
-
} else {
|
|
11015
|
-
logger?.debug("Offsets correct", { text: entity.exact });
|
|
11016
11033
|
}
|
|
11017
11034
|
return {
|
|
11018
11035
|
exact: entity.exact,
|
|
@@ -11904,7 +11921,7 @@ var HighlightAnnotationWorker = class extends JobWorker {
|
|
|
11904
11921
|
await this.createHighlightAnnotation(job.params.resourceId, job.metadata, highlight);
|
|
11905
11922
|
created++;
|
|
11906
11923
|
} catch (error) {
|
|
11907
|
-
this.logger?.error("Failed to create highlight", { error });
|
|
11924
|
+
this.logger?.error("Failed to create highlight", { error: errField(error) });
|
|
11908
11925
|
}
|
|
11909
11926
|
}
|
|
11910
11927
|
updatedJob = {
|
|
@@ -12113,7 +12130,7 @@ var AssessmentAnnotationWorker = class extends JobWorker {
|
|
|
12113
12130
|
await this.createAssessmentAnnotation(job.params.resourceId, job.metadata, assessment, job.params.language);
|
|
12114
12131
|
created++;
|
|
12115
12132
|
} catch (error) {
|
|
12116
|
-
this.logger?.error("Failed to create assessment", { error });
|
|
12133
|
+
this.logger?.error("Failed to create assessment", { error: errField(error) });
|
|
12117
12134
|
}
|
|
12118
12135
|
}
|
|
12119
12136
|
updatedJob = {
|
|
@@ -12326,7 +12343,7 @@ var CommentAnnotationWorker = class extends JobWorker {
|
|
|
12326
12343
|
await this.createCommentAnnotation(job.params.resourceId, job.metadata, comment, job.params.language);
|
|
12327
12344
|
created++;
|
|
12328
12345
|
} catch (error) {
|
|
12329
|
-
this.logger?.error("Failed to create comment", { error });
|
|
12346
|
+
this.logger?.error("Failed to create comment", { error: errField(error) });
|
|
12330
12347
|
}
|
|
12331
12348
|
}
|
|
12332
12349
|
updatedJob = {
|
|
@@ -12571,7 +12588,7 @@ var TagAnnotationWorker = class extends JobWorker {
|
|
|
12571
12588
|
await this.createTagAnnotation(job.params.resourceId, job.metadata, job.params.schemaId, tag);
|
|
12572
12589
|
created++;
|
|
12573
12590
|
} catch (error) {
|
|
12574
|
-
this.logger?.error("Failed to create tag", { error });
|
|
12591
|
+
this.logger?.error("Failed to create tag", { error: errField(error) });
|
|
12575
12592
|
}
|
|
12576
12593
|
}
|
|
12577
12594
|
updatedJob = {
|