@semiont/jobs 0.4.19 → 0.4.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2,7 +2,7 @@ import { promises, watch } from 'fs';
2
2
  import * as path from 'path';
3
3
  import { validateAndCorrectOffsets, getTargetSelector, getExactText, getLocaleEnglishName } from '@semiont/api-client';
4
4
  import { generateAnnotationId } from '@semiont/event-sourcing';
5
- import { userToAgent, userId, jobId, CREATION_METHODS, resourceId, annotationId } from '@semiont/core';
5
+ import { userToAgent, userId, jobId, CREATION_METHODS, resourceId, annotationId, errField } from '@semiont/core';
6
6
  import { deriveStorageUri } from '@semiont/content';
7
7
 
8
8
  var __create = Object.create;
@@ -10944,75 +10944,92 @@ Example output:
10944
10944
  offsetsFromAI: `[${startOffset}:${endOffset}]`
10945
10945
  });
10946
10946
  const extractedText = exact.substring(startOffset, endOffset);
10947
- if (extractedText !== entity.exact) {
10948
- logger?.warn("Offset mismatch detected", {
10949
- expected: entity.exact,
10950
- foundAtOffsets: `[${startOffset}:${endOffset}]`,
10951
- foundText: extractedText
10947
+ let anchorMethod;
10948
+ if (extractedText === entity.exact) {
10949
+ anchorMethod = "llm-exact";
10950
+ logger?.debug("Entity anchored", {
10951
+ text: entity.exact,
10952
+ entityType: entity.entityType,
10953
+ anchorMethod
10952
10954
  });
10953
- const contextStart = Math.max(0, startOffset - 50);
10954
- const contextEnd = Math.min(exact.length, endOffset + 50);
10955
- const contextBefore = exact.substring(contextStart, startOffset);
10956
- const contextAfter = exact.substring(endOffset, contextEnd);
10957
- logger?.debug("Context around AI offset", {
10958
- before: contextBefore,
10959
- extracted: extractedText,
10960
- after: contextAfter
10955
+ } else {
10956
+ logger?.debug("LLM offsets mismatch \u2014 attempting re-anchor", {
10957
+ expected: entity.exact,
10958
+ llmOffsets: `[${startOffset}:${endOffset}]`,
10959
+ foundAtLlmOffsets: extractedText
10961
10960
  });
10962
- logger?.debug("Searching for exact match in resource");
10963
- let found = false;
10964
- if (entity.prefix || entity.suffix) {
10965
- logger?.debug("Using LLM-provided context for disambiguation", {
10966
- prefix: entity.prefix,
10967
- suffix: entity.suffix
10961
+ let occurrenceCount = 0;
10962
+ let firstOccurrence = -1;
10963
+ let searchPos = 0;
10964
+ while ((searchPos = exact.indexOf(entity.exact, searchPos)) !== -1) {
10965
+ if (firstOccurrence === -1) firstOccurrence = searchPos;
10966
+ occurrenceCount++;
10967
+ searchPos++;
10968
+ }
10969
+ if (occurrenceCount === 0) {
10970
+ anchorMethod = "dropped";
10971
+ logger?.error("Entity text not found in resource \u2014 dropping", {
10972
+ text: entity.exact,
10973
+ entityType: entity.entityType,
10974
+ llmOffsets: `[${startOffset}:${endOffset}]`,
10975
+ anchorMethod,
10976
+ resourceStart: exact.substring(0, 200)
10968
10977
  });
10969
- let searchPos = 0;
10970
- while ((searchPos = exact.indexOf(entity.exact, searchPos)) !== -1) {
10971
- const candidatePrefix = exact.substring(Math.max(0, searchPos - 32), searchPos);
10978
+ return null;
10979
+ }
10980
+ let recoveredOffset = -1;
10981
+ if (entity.prefix || entity.suffix) {
10982
+ let p = 0;
10983
+ while ((p = exact.indexOf(entity.exact, p)) !== -1) {
10984
+ const candidatePrefix = exact.substring(Math.max(0, p - 32), p);
10972
10985
  const candidateSuffix = exact.substring(
10973
- searchPos + entity.exact.length,
10974
- Math.min(exact.length, searchPos + entity.exact.length + 32)
10986
+ p + entity.exact.length,
10987
+ Math.min(exact.length, p + entity.exact.length + 32)
10975
10988
  );
10976
10989
  const prefixMatch = !entity.prefix || candidatePrefix.endsWith(entity.prefix);
10977
10990
  const suffixMatch = !entity.suffix || candidateSuffix.startsWith(entity.suffix);
10978
10991
  if (prefixMatch && suffixMatch) {
10979
- logger?.debug("Found match using context", {
10980
- offset: searchPos,
10981
- offsetDiff: searchPos - startOffset,
10982
- candidatePrefix,
10983
- candidateSuffix
10984
- });
10985
- startOffset = searchPos;
10986
- endOffset = searchPos + entity.exact.length;
10987
- found = true;
10992
+ recoveredOffset = p;
10988
10993
  break;
10989
10994
  }
10990
- searchPos++;
10991
- }
10992
- if (!found) {
10993
- logger?.warn("No occurrence found with matching context", { text: entity.exact });
10995
+ p++;
10994
10996
  }
10995
10997
  }
10996
- if (!found) {
10997
- const index = exact.indexOf(entity.exact);
10998
- if (index !== -1) {
10999
- logger?.warn("Using first occurrence", {
11000
- text: entity.exact,
11001
- offset: index,
11002
- offsetDiff: index - startOffset
11003
- });
11004
- startOffset = index;
11005
- endOffset = index + entity.exact.length;
11006
- } else {
11007
- logger?.error("Cannot find entity anywhere in resource", {
11008
- text: entity.exact,
11009
- resourceStart: exact.substring(0, 200)
11010
- });
11011
- return null;
11012
- }
10998
+ if (recoveredOffset !== -1) {
10999
+ anchorMethod = "context-recovered";
11000
+ startOffset = recoveredOffset;
11001
+ endOffset = recoveredOffset + entity.exact.length;
11002
+ logger?.debug("Entity anchored", {
11003
+ text: entity.exact,
11004
+ entityType: entity.entityType,
11005
+ anchorMethod,
11006
+ offsetDiff: recoveredOffset - entity.startOffset
11007
+ });
11008
+ } else if (occurrenceCount === 1) {
11009
+ anchorMethod = "unique-match";
11010
+ startOffset = firstOccurrence;
11011
+ endOffset = firstOccurrence + entity.exact.length;
11012
+ logger?.debug("Entity anchored", {
11013
+ text: entity.exact,
11014
+ entityType: entity.entityType,
11015
+ anchorMethod,
11016
+ offsetDiff: firstOccurrence - entity.startOffset
11017
+ });
11018
+ } else {
11019
+ anchorMethod = "first-of-many";
11020
+ startOffset = firstOccurrence;
11021
+ endOffset = firstOccurrence + entity.exact.length;
11022
+ logger?.warn("Entity anchored at first of multiple occurrences \u2014 may be wrong", {
11023
+ text: entity.exact,
11024
+ entityType: entity.entityType,
11025
+ anchorMethod,
11026
+ occurrenceCount,
11027
+ chosenOffset: firstOccurrence,
11028
+ llmOffsets: `[${entity.startOffset}:${entity.endOffset}]`,
11029
+ hasPrefix: !!entity.prefix,
11030
+ hasSuffix: !!entity.suffix
11031
+ });
11013
11032
  }
11014
- } else {
11015
- logger?.debug("Offsets correct", { text: entity.exact });
11016
11033
  }
11017
11034
  return {
11018
11035
  exact: entity.exact,
@@ -11904,7 +11921,7 @@ var HighlightAnnotationWorker = class extends JobWorker {
11904
11921
  await this.createHighlightAnnotation(job.params.resourceId, job.metadata, highlight);
11905
11922
  created++;
11906
11923
  } catch (error) {
11907
- this.logger?.error("Failed to create highlight", { error });
11924
+ this.logger?.error("Failed to create highlight", { error: errField(error) });
11908
11925
  }
11909
11926
  }
11910
11927
  updatedJob = {
@@ -12113,7 +12130,7 @@ var AssessmentAnnotationWorker = class extends JobWorker {
12113
12130
  await this.createAssessmentAnnotation(job.params.resourceId, job.metadata, assessment, job.params.language);
12114
12131
  created++;
12115
12132
  } catch (error) {
12116
- this.logger?.error("Failed to create assessment", { error });
12133
+ this.logger?.error("Failed to create assessment", { error: errField(error) });
12117
12134
  }
12118
12135
  }
12119
12136
  updatedJob = {
@@ -12326,7 +12343,7 @@ var CommentAnnotationWorker = class extends JobWorker {
12326
12343
  await this.createCommentAnnotation(job.params.resourceId, job.metadata, comment, job.params.language);
12327
12344
  created++;
12328
12345
  } catch (error) {
12329
- this.logger?.error("Failed to create comment", { error });
12346
+ this.logger?.error("Failed to create comment", { error: errField(error) });
12330
12347
  }
12331
12348
  }
12332
12349
  updatedJob = {
@@ -12571,7 +12588,7 @@ var TagAnnotationWorker = class extends JobWorker {
12571
12588
  await this.createTagAnnotation(job.params.resourceId, job.metadata, job.params.schemaId, tag);
12572
12589
  created++;
12573
12590
  } catch (error) {
12574
- this.logger?.error("Failed to create tag", { error });
12591
+ this.logger?.error("Failed to create tag", { error: errField(error) });
12575
12592
  }
12576
12593
  }
12577
12594
  updatedJob = {