@semiont/jobs 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/fs-job-queue.d.ts +79 -0
  2. package/dist/fs-job-queue.d.ts.map +1 -0
  3. package/dist/index.d.ts +20 -632
  4. package/dist/index.d.ts.map +1 -0
  5. package/dist/index.js +65 -56
  6. package/dist/index.js.map +1 -1
  7. package/dist/job-claim-adapter.d.ts +76 -0
  8. package/dist/job-claim-adapter.d.ts.map +1 -0
  9. package/dist/job-queue-interface.d.ts +19 -0
  10. package/dist/job-queue-interface.d.ts.map +1 -0
  11. package/dist/job-queue-state-unit.d.ts +26 -0
  12. package/dist/job-queue-state-unit.d.ts.map +1 -0
  13. package/dist/job-worker.d.ts +67 -0
  14. package/dist/job-worker.d.ts.map +1 -0
  15. package/dist/processors.d.ts +41 -0
  16. package/dist/processors.d.ts.map +1 -0
  17. package/dist/types.d.ts +319 -0
  18. package/dist/types.d.ts.map +1 -0
  19. package/dist/worker-main.d.ts +22 -2
  20. package/dist/worker-main.d.ts.map +1 -0
  21. package/dist/worker-main.js +165 -114
  22. package/dist/worker-main.js.map +1 -1
  23. package/dist/worker-process.d.ts +47 -0
  24. package/dist/worker-process.d.ts.map +1 -0
  25. package/dist/workers/annotation-detection.d.ts +61 -0
  26. package/dist/workers/annotation-detection.d.ts.map +1 -0
  27. package/dist/workers/detection/entity-extractor.d.ts +42 -0
  28. package/dist/workers/detection/entity-extractor.d.ts.map +1 -0
  29. package/dist/workers/detection/motivation-parsers.d.ts +116 -0
  30. package/dist/workers/detection/motivation-parsers.d.ts.map +1 -0
  31. package/dist/workers/detection/motivation-prompts.d.ts +57 -0
  32. package/dist/workers/detection/motivation-prompts.d.ts.map +1 -0
  33. package/dist/workers/generation/resource-generation.d.ts +23 -0
  34. package/dist/workers/generation/resource-generation.d.ts.map +1 -0
  35. package/package.json +3 -3
package/dist/index.js CHANGED
@@ -974,7 +974,7 @@ var AnnotationDetection = class {
974
974
  */
975
975
  static async detectComments(content, client, instructions, tone, density, language, sourceLanguage) {
976
976
  const prompt = MotivationPrompts.buildCommentPrompt(content, instructions, tone, density, language, sourceLanguage);
977
- const response = await client.generateText(prompt, 3e3, 0.4);
977
+ const response = await client.generateText(prompt, 3e3, 0.4, { format: "json" });
978
978
  return MotivationParsers.parseComments(response, content);
979
979
  }
980
980
  /**
@@ -986,7 +986,7 @@ var AnnotationDetection = class {
986
986
  */
987
987
  static async detectHighlights(content, client, instructions, density, sourceLanguage) {
988
988
  const prompt = MotivationPrompts.buildHighlightPrompt(content, instructions, density, sourceLanguage);
989
- const response = await client.generateText(prompt, 2e3, 0.3);
989
+ const response = await client.generateText(prompt, 2e3, 0.3, { format: "json" });
990
990
  return MotivationParsers.parseHighlights(response, content);
991
991
  }
992
992
  /**
@@ -998,7 +998,7 @@ var AnnotationDetection = class {
998
998
  */
999
999
  static async detectAssessments(content, client, instructions, tone, density, language, sourceLanguage) {
1000
1000
  const prompt = MotivationPrompts.buildAssessmentPrompt(content, instructions, tone, density, language, sourceLanguage);
1001
- const response = await client.generateText(prompt, 3e3, 0.3);
1001
+ const response = await client.generateText(prompt, 3e3, 0.3, { format: "json" });
1002
1002
  return MotivationParsers.parseAssessments(response, content);
1003
1003
  }
1004
1004
  /**
@@ -1028,12 +1028,12 @@ var AnnotationDetection = class {
1028
1028
  categoryInfo.examples,
1029
1029
  sourceLanguage
1030
1030
  );
1031
- const response = await client.generateText(prompt, 4e3, 0.2);
1031
+ const response = await client.generateText(prompt, 4e3, 0.2, { format: "json" });
1032
1032
  const parsedTags = MotivationParsers.parseTags(response);
1033
1033
  return MotivationParsers.validateTagOffsets(parsedTags, content, category);
1034
1034
  }
1035
1035
  };
1036
- async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences = false, logger, sourceLanguage) {
1036
+ async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences, logger, sourceLanguage) {
1037
1037
  const entityTypesDescription = entityTypes.map((et) => {
1038
1038
  if (typeof et === "string") {
1039
1039
  return et;
@@ -1084,50 +1084,57 @@ If no entities are found, respond with an empty array [].
1084
1084
 
1085
1085
  Example output:
1086
1086
  [{"exact":"Alice","entityType":"Person","startOffset":0,"endOffset":5,"prefix":"","suffix":" went to"},{"exact":"Paris","entityType":"Location","startOffset":20,"endOffset":25,"prefix":"went to ","suffix":" yesterday"}]`;
1087
- logger?.debug("Sending entity extraction request", { entityTypes: entityTypesDescription });
1087
+ logger.debug("Sending entity extraction request", { entityTypes: entityTypesDescription });
1088
1088
  const response = await client.generateTextWithMetadata(
1089
1089
  prompt,
1090
1090
  4e3,
1091
1091
  // Increased to handle many entities without truncation
1092
- 0.3
1092
+ 0.3,
1093
1093
  // Lower temperature for more consistent extraction
1094
+ // Force grammar-constrained JSON output. Without this, Ollama models
1095
+ // periodically emit malformed JSON (truncated brackets, mid-token
1096
+ // breaks at higher token counts) which silently parse-fails into
1097
+ // [] downstream. The prompt's schema (which keys, what types) still
1098
+ // governs *what* the JSON contains; `format: 'json'` governs that
1099
+ // it's syntactically valid.
1100
+ { format: "json" }
1094
1101
  );
1095
- logger?.debug("Got entity extraction response", { responseLength: response.text.length });
1102
+ logger.debug("Got entity extraction response", { responseLength: response.text.length });
1096
1103
  try {
1097
1104
  let jsonStr = response.text.trim();
1098
1105
  if (jsonStr.startsWith("```")) {
1099
1106
  jsonStr = jsonStr.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
1100
1107
  }
1101
1108
  const entities = JSON.parse(jsonStr);
1102
- logger?.debug("Parsed entities from AI response", { count: entities.length });
1109
+ logger.debug("Parsed entities from AI response", { count: entities.length });
1103
1110
  if (response.stopReason === "max_tokens") {
1104
1111
  const errorMsg = `AI response truncated: Found ${entities.length} entities but response hit max_tokens limit. Increase max_tokens or reduce resource size.`;
1105
- logger?.error(errorMsg);
1112
+ logger.error(errorMsg);
1106
1113
  throw new Error(errorMsg);
1107
1114
  }
1108
1115
  return entities.map((entity, idx) => {
1109
- let startOffset = entity.startOffset;
1110
- let endOffset = entity.endOffset;
1111
- logger?.debug("Processing entity", {
1116
+ let start = entity.startOffset;
1117
+ let end = entity.endOffset;
1118
+ logger.debug("Processing entity", {
1112
1119
  index: idx + 1,
1113
1120
  total: entities.length,
1114
1121
  type: entity.entityType,
1115
1122
  text: entity.exact,
1116
- offsetsFromAI: `[${startOffset}:${endOffset}]`
1123
+ offsetsFromAI: `[${start}:${end}]`
1117
1124
  });
1118
- const extractedText = exact.substring(startOffset, endOffset);
1125
+ const extractedText = exact.substring(start, end);
1119
1126
  let anchorMethod;
1120
1127
  if (extractedText === entity.exact) {
1121
1128
  anchorMethod = "llm-exact";
1122
- logger?.debug("Entity anchored", {
1129
+ logger.debug("Entity anchored", {
1123
1130
  text: entity.exact,
1124
1131
  entityType: entity.entityType,
1125
1132
  anchorMethod
1126
1133
  });
1127
1134
  } else {
1128
- logger?.debug("LLM offsets mismatch \u2014 attempting re-anchor", {
1135
+ logger.debug("LLM offsets mismatch \u2014 attempting re-anchor", {
1129
1136
  expected: entity.exact,
1130
- llmOffsets: `[${startOffset}:${endOffset}]`,
1137
+ llmOffsets: `[${start}:${end}]`,
1131
1138
  foundAtLlmOffsets: extractedText
1132
1139
  });
1133
1140
  let occurrenceCount = 0;
@@ -1140,10 +1147,10 @@ Example output:
1140
1147
  }
1141
1148
  if (occurrenceCount === 0) {
1142
1149
  anchorMethod = "dropped";
1143
- logger?.error("Entity text not found in resource \u2014 dropping", {
1150
+ logger.error("Entity text not found in resource \u2014 dropping", {
1144
1151
  text: entity.exact,
1145
1152
  entityType: entity.entityType,
1146
- llmOffsets: `[${startOffset}:${endOffset}]`,
1153
+ llmOffsets: `[${start}:${end}]`,
1147
1154
  anchorMethod,
1148
1155
  resourceStart: exact.substring(0, 200)
1149
1156
  });
@@ -1169,9 +1176,9 @@ Example output:
1169
1176
  }
1170
1177
  if (recoveredOffset !== -1) {
1171
1178
  anchorMethod = "context-recovered";
1172
- startOffset = recoveredOffset;
1173
- endOffset = recoveredOffset + entity.exact.length;
1174
- logger?.debug("Entity anchored", {
1179
+ start = recoveredOffset;
1180
+ end = recoveredOffset + entity.exact.length;
1181
+ logger.debug("Entity anchored", {
1175
1182
  text: entity.exact,
1176
1183
  entityType: entity.entityType,
1177
1184
  anchorMethod,
@@ -1179,9 +1186,9 @@ Example output:
1179
1186
  });
1180
1187
  } else if (occurrenceCount === 1) {
1181
1188
  anchorMethod = "unique-match";
1182
- startOffset = firstOccurrence;
1183
- endOffset = firstOccurrence + entity.exact.length;
1184
- logger?.debug("Entity anchored", {
1189
+ start = firstOccurrence;
1190
+ end = firstOccurrence + entity.exact.length;
1191
+ logger.debug("Entity anchored", {
1185
1192
  text: entity.exact,
1186
1193
  entityType: entity.entityType,
1187
1194
  anchorMethod,
@@ -1189,9 +1196,9 @@ Example output:
1189
1196
  });
1190
1197
  } else {
1191
1198
  anchorMethod = "first-of-many";
1192
- startOffset = firstOccurrence;
1193
- endOffset = firstOccurrence + entity.exact.length;
1194
- logger?.warn("Entity anchored at first of multiple occurrences \u2014 may be wrong", {
1199
+ start = firstOccurrence;
1200
+ end = firstOccurrence + entity.exact.length;
1201
+ logger.warn("Entity anchored at first of multiple occurrences \u2014 may be wrong", {
1195
1202
  text: entity.exact,
1196
1203
  entityType: entity.entityType,
1197
1204
  anchorMethod,
@@ -1206,52 +1213,52 @@ Example output:
1206
1213
  return {
1207
1214
  exact: entity.exact,
1208
1215
  entityType: entity.entityType,
1209
- startOffset,
1210
- endOffset,
1216
+ start,
1217
+ end,
1211
1218
  prefix: entity.prefix,
1212
1219
  suffix: entity.suffix
1213
1220
  };
1214
1221
  }).filter((entity) => {
1215
1222
  if (entity === null) {
1216
- logger?.debug("Filtered entity: null");
1223
+ logger.debug("Filtered entity: null");
1217
1224
  return false;
1218
1225
  }
1219
- if (entity.startOffset === void 0 || entity.endOffset === void 0) {
1220
- logger?.warn("Filtered entity: missing offsets", { text: entity.exact });
1226
+ if (entity.start === void 0 || entity.end === void 0) {
1227
+ logger.warn("Filtered entity: missing offsets", { text: entity.exact });
1221
1228
  return false;
1222
1229
  }
1223
- if (entity.startOffset < 0) {
1224
- logger?.warn("Filtered entity: negative startOffset", {
1230
+ if (entity.start < 0) {
1231
+ logger.warn("Filtered entity: negative start", {
1225
1232
  text: entity.exact,
1226
- startOffset: entity.startOffset
1233
+ start: entity.start
1227
1234
  });
1228
1235
  return false;
1229
1236
  }
1230
- if (entity.endOffset > exact.length) {
1231
- logger?.warn("Filtered entity: endOffset exceeds text length", {
1237
+ if (entity.end > exact.length) {
1238
+ logger.warn("Filtered entity: end exceeds text length", {
1232
1239
  text: entity.exact,
1233
- endOffset: entity.endOffset,
1240
+ end: entity.end,
1234
1241
  textLength: exact.length
1235
1242
  });
1236
1243
  return false;
1237
1244
  }
1238
- const extractedText = exact.substring(entity.startOffset, entity.endOffset);
1245
+ const extractedText = exact.substring(entity.start, entity.end);
1239
1246
  if (extractedText !== entity.exact) {
1240
- logger?.warn("Filtered entity: offset mismatch", {
1247
+ logger.warn("Filtered entity: offset mismatch", {
1241
1248
  expected: entity.exact,
1242
1249
  got: extractedText,
1243
- offsets: `[${entity.startOffset}:${entity.endOffset}]`
1250
+ offsets: `[${entity.start}:${entity.end}]`
1244
1251
  });
1245
1252
  return false;
1246
1253
  }
1247
- logger?.debug("Accepted entity", {
1254
+ logger.debug("Accepted entity", {
1248
1255
  text: entity.exact,
1249
- offsets: `[${entity.startOffset}:${entity.endOffset}]`
1256
+ offsets: `[${entity.start}:${entity.end}]`
1250
1257
  });
1251
1258
  return true;
1252
1259
  });
1253
1260
  } catch (error) {
1254
- logger?.error("Failed to parse entity extraction response", {
1261
+ logger.error("Failed to parse entity extraction response", {
1255
1262
  error: error instanceof Error ? error.message : String(error)
1256
1263
  });
1257
1264
  return [];
@@ -1260,8 +1267,8 @@ Example output:
1260
1267
  function getLanguageName(locale) {
1261
1268
  return getLocaleEnglishName(locale) || locale;
1262
1269
  }
1263
- async function generateResourceFromTopic(topic, entityTypes, client, userPrompt, locale, context, temperature, maxTokens, logger, sourceLanguage) {
1264
- logger?.debug("Generating resource from topic", {
1270
+ async function generateResourceFromTopic(topic, entityTypes, client, logger, userPrompt, locale, context, temperature, maxTokens, sourceLanguage) {
1271
+ logger.debug("Generating resource from topic", {
1265
1272
  topicPreview: topic.substring(0, 100),
1266
1273
  entityTypes,
1267
1274
  hasUserPrompt: !!userPrompt,
@@ -1369,15 +1376,15 @@ Requirements:
1369
1376
  content
1370
1377
  };
1371
1378
  };
1372
- logger?.debug("Sending prompt to inference", {
1379
+ logger.debug("Sending prompt to inference", {
1373
1380
  promptLength: prompt.length,
1374
1381
  temperature: finalTemperature,
1375
1382
  maxTokens: finalMaxTokens
1376
1383
  });
1377
1384
  const response = await client.generateText(prompt, finalMaxTokens, finalTemperature);
1378
- logger?.debug("Got response from inference", { responseLength: response.length });
1385
+ logger.debug("Got response from inference", { responseLength: response.length });
1379
1386
  const result = parseResponse(response);
1380
- logger?.debug("Parsed response", {
1387
+ logger.debug("Parsed response", {
1381
1388
  hasTitle: !!result.title,
1382
1389
  titleLength: result.title?.length,
1383
1390
  hasContent: !!result.content,
@@ -1386,13 +1393,16 @@ Requirements:
1386
1393
  return result;
1387
1394
  }
1388
1395
  function buildTextAnnotation(resourceId, userId, generator, motivation, match, body) {
1396
+ const creator = didToAgent(userId);
1397
+ const wasAttributedTo = creator["@id"] === generator["@id"] ? [generator] : [creator, generator];
1389
1398
  return {
1390
1399
  "@context": "http://www.w3.org/ns/anno.jsonld",
1391
1400
  "type": "Annotation",
1392
1401
  "id": generateAnnotationId(),
1393
1402
  motivation,
1394
- creator: didToAgent(userId),
1403
+ creator,
1395
1404
  generator,
1405
+ wasAttributedTo,
1396
1406
  created: (/* @__PURE__ */ new Date()).toISOString(),
1397
1407
  target: {
1398
1408
  type: "SpecificResource",
@@ -1535,7 +1545,7 @@ async function processReferenceJob(content, inferenceClient, params, userId, gen
1535
1545
  ];
1536
1546
  for (const entity of extractedEntities) {
1537
1547
  try {
1538
- const validated = validateAndCorrectOffsets(content, entity.startOffset, entity.endOffset, entity.exact);
1548
+ const validated = validateAndCorrectOffsets(content, entity.start, entity.end, entity.exact);
1539
1549
  const ann = buildTextAnnotation(
1540
1550
  params.resourceId,
1541
1551
  userId,
@@ -1589,7 +1599,7 @@ async function processTagJob(content, inferenceClient, params, userId, generator
1589
1599
  result: { tagsFound: tags.length, tagsCreated: annotations.length, byCategory }
1590
1600
  };
1591
1601
  }
1592
- async function processGenerationJob(inferenceClient, params, onProgress) {
1602
+ async function processGenerationJob(inferenceClient, params, onProgress, logger) {
1593
1603
  onProgress(20, "Fetching context...", "fetching");
1594
1604
  const title = params.title ?? "Untitled";
1595
1605
  const entityTypes = (params.entityTypes ?? []).map(String);
@@ -1598,13 +1608,12 @@ async function processGenerationJob(inferenceClient, params, onProgress) {
1598
1608
  title,
1599
1609
  entityTypes,
1600
1610
  inferenceClient,
1611
+ logger,
1601
1612
  params.prompt,
1602
1613
  params.language,
1603
1614
  params.context,
1604
1615
  params.temperature,
1605
1616
  params.maxTokens,
1606
- void 0,
1607
- // logger
1608
1617
  params.sourceLanguage
1609
1618
  );
1610
1619
  onProgress(85, "Creating resource...", "creating");