@semiont/jobs 0.5.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/fs-job-queue.d.ts +79 -0
- package/dist/fs-job-queue.d.ts.map +1 -0
- package/dist/index.d.ts +20 -632
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +65 -56
- package/dist/index.js.map +1 -1
- package/dist/job-claim-adapter.d.ts +76 -0
- package/dist/job-claim-adapter.d.ts.map +1 -0
- package/dist/job-queue-interface.d.ts +19 -0
- package/dist/job-queue-interface.d.ts.map +1 -0
- package/dist/job-queue-state-unit.d.ts +26 -0
- package/dist/job-queue-state-unit.d.ts.map +1 -0
- package/dist/job-worker.d.ts +67 -0
- package/dist/job-worker.d.ts.map +1 -0
- package/dist/processors.d.ts +41 -0
- package/dist/processors.d.ts.map +1 -0
- package/dist/types.d.ts +319 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/worker-main.d.ts +22 -2
- package/dist/worker-main.d.ts.map +1 -0
- package/dist/worker-main.js +165 -114
- package/dist/worker-main.js.map +1 -1
- package/dist/worker-process.d.ts +47 -0
- package/dist/worker-process.d.ts.map +1 -0
- package/dist/workers/annotation-detection.d.ts +61 -0
- package/dist/workers/annotation-detection.d.ts.map +1 -0
- package/dist/workers/detection/entity-extractor.d.ts +42 -0
- package/dist/workers/detection/entity-extractor.d.ts.map +1 -0
- package/dist/workers/detection/motivation-parsers.d.ts +116 -0
- package/dist/workers/detection/motivation-parsers.d.ts.map +1 -0
- package/dist/workers/detection/motivation-prompts.d.ts +57 -0
- package/dist/workers/detection/motivation-prompts.d.ts.map +1 -0
- package/dist/workers/generation/resource-generation.d.ts +23 -0
- package/dist/workers/generation/resource-generation.d.ts.map +1 -0
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -974,7 +974,7 @@ var AnnotationDetection = class {
|
|
|
974
974
|
*/
|
|
975
975
|
static async detectComments(content, client, instructions, tone, density, language, sourceLanguage) {
|
|
976
976
|
const prompt = MotivationPrompts.buildCommentPrompt(content, instructions, tone, density, language, sourceLanguage);
|
|
977
|
-
const response = await client.generateText(prompt, 3e3, 0.4);
|
|
977
|
+
const response = await client.generateText(prompt, 3e3, 0.4, { format: "json" });
|
|
978
978
|
return MotivationParsers.parseComments(response, content);
|
|
979
979
|
}
|
|
980
980
|
/**
|
|
@@ -986,7 +986,7 @@ var AnnotationDetection = class {
|
|
|
986
986
|
*/
|
|
987
987
|
static async detectHighlights(content, client, instructions, density, sourceLanguage) {
|
|
988
988
|
const prompt = MotivationPrompts.buildHighlightPrompt(content, instructions, density, sourceLanguage);
|
|
989
|
-
const response = await client.generateText(prompt, 2e3, 0.3);
|
|
989
|
+
const response = await client.generateText(prompt, 2e3, 0.3, { format: "json" });
|
|
990
990
|
return MotivationParsers.parseHighlights(response, content);
|
|
991
991
|
}
|
|
992
992
|
/**
|
|
@@ -998,7 +998,7 @@ var AnnotationDetection = class {
|
|
|
998
998
|
*/
|
|
999
999
|
static async detectAssessments(content, client, instructions, tone, density, language, sourceLanguage) {
|
|
1000
1000
|
const prompt = MotivationPrompts.buildAssessmentPrompt(content, instructions, tone, density, language, sourceLanguage);
|
|
1001
|
-
const response = await client.generateText(prompt, 3e3, 0.3);
|
|
1001
|
+
const response = await client.generateText(prompt, 3e3, 0.3, { format: "json" });
|
|
1002
1002
|
return MotivationParsers.parseAssessments(response, content);
|
|
1003
1003
|
}
|
|
1004
1004
|
/**
|
|
@@ -1028,12 +1028,12 @@ var AnnotationDetection = class {
|
|
|
1028
1028
|
categoryInfo.examples,
|
|
1029
1029
|
sourceLanguage
|
|
1030
1030
|
);
|
|
1031
|
-
const response = await client.generateText(prompt, 4e3, 0.2);
|
|
1031
|
+
const response = await client.generateText(prompt, 4e3, 0.2, { format: "json" });
|
|
1032
1032
|
const parsedTags = MotivationParsers.parseTags(response);
|
|
1033
1033
|
return MotivationParsers.validateTagOffsets(parsedTags, content, category);
|
|
1034
1034
|
}
|
|
1035
1035
|
};
|
|
1036
|
-
async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences
|
|
1036
|
+
async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences, logger, sourceLanguage) {
|
|
1037
1037
|
const entityTypesDescription = entityTypes.map((et) => {
|
|
1038
1038
|
if (typeof et === "string") {
|
|
1039
1039
|
return et;
|
|
@@ -1084,50 +1084,57 @@ If no entities are found, respond with an empty array [].
|
|
|
1084
1084
|
|
|
1085
1085
|
Example output:
|
|
1086
1086
|
[{"exact":"Alice","entityType":"Person","startOffset":0,"endOffset":5,"prefix":"","suffix":" went to"},{"exact":"Paris","entityType":"Location","startOffset":20,"endOffset":25,"prefix":"went to ","suffix":" yesterday"}]`;
|
|
1087
|
-
logger
|
|
1087
|
+
logger.debug("Sending entity extraction request", { entityTypes: entityTypesDescription });
|
|
1088
1088
|
const response = await client.generateTextWithMetadata(
|
|
1089
1089
|
prompt,
|
|
1090
1090
|
4e3,
|
|
1091
1091
|
// Increased to handle many entities without truncation
|
|
1092
|
-
0.3
|
|
1092
|
+
0.3,
|
|
1093
1093
|
// Lower temperature for more consistent extraction
|
|
1094
|
+
// Force grammar-constrained JSON output. Without this, Ollama models
|
|
1095
|
+
// periodically emit malformed JSON (truncated brackets, mid-token
|
|
1096
|
+
// breaks at higher token counts) which silently parse-fails into
|
|
1097
|
+
// [] downstream. The prompt's schema (which keys, what types) still
|
|
1098
|
+
// governs *what* the JSON contains; `format: 'json'` governs that
|
|
1099
|
+
// it's syntactically valid.
|
|
1100
|
+
{ format: "json" }
|
|
1094
1101
|
);
|
|
1095
|
-
logger
|
|
1102
|
+
logger.debug("Got entity extraction response", { responseLength: response.text.length });
|
|
1096
1103
|
try {
|
|
1097
1104
|
let jsonStr = response.text.trim();
|
|
1098
1105
|
if (jsonStr.startsWith("```")) {
|
|
1099
1106
|
jsonStr = jsonStr.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
1100
1107
|
}
|
|
1101
1108
|
const entities = JSON.parse(jsonStr);
|
|
1102
|
-
logger
|
|
1109
|
+
logger.debug("Parsed entities from AI response", { count: entities.length });
|
|
1103
1110
|
if (response.stopReason === "max_tokens") {
|
|
1104
1111
|
const errorMsg = `AI response truncated: Found ${entities.length} entities but response hit max_tokens limit. Increase max_tokens or reduce resource size.`;
|
|
1105
|
-
logger
|
|
1112
|
+
logger.error(errorMsg);
|
|
1106
1113
|
throw new Error(errorMsg);
|
|
1107
1114
|
}
|
|
1108
1115
|
return entities.map((entity, idx) => {
|
|
1109
|
-
let
|
|
1110
|
-
let
|
|
1111
|
-
logger
|
|
1116
|
+
let start = entity.startOffset;
|
|
1117
|
+
let end = entity.endOffset;
|
|
1118
|
+
logger.debug("Processing entity", {
|
|
1112
1119
|
index: idx + 1,
|
|
1113
1120
|
total: entities.length,
|
|
1114
1121
|
type: entity.entityType,
|
|
1115
1122
|
text: entity.exact,
|
|
1116
|
-
offsetsFromAI: `[${
|
|
1123
|
+
offsetsFromAI: `[${start}:${end}]`
|
|
1117
1124
|
});
|
|
1118
|
-
const extractedText = exact.substring(
|
|
1125
|
+
const extractedText = exact.substring(start, end);
|
|
1119
1126
|
let anchorMethod;
|
|
1120
1127
|
if (extractedText === entity.exact) {
|
|
1121
1128
|
anchorMethod = "llm-exact";
|
|
1122
|
-
logger
|
|
1129
|
+
logger.debug("Entity anchored", {
|
|
1123
1130
|
text: entity.exact,
|
|
1124
1131
|
entityType: entity.entityType,
|
|
1125
1132
|
anchorMethod
|
|
1126
1133
|
});
|
|
1127
1134
|
} else {
|
|
1128
|
-
logger
|
|
1135
|
+
logger.debug("LLM offsets mismatch \u2014 attempting re-anchor", {
|
|
1129
1136
|
expected: entity.exact,
|
|
1130
|
-
llmOffsets: `[${
|
|
1137
|
+
llmOffsets: `[${start}:${end}]`,
|
|
1131
1138
|
foundAtLlmOffsets: extractedText
|
|
1132
1139
|
});
|
|
1133
1140
|
let occurrenceCount = 0;
|
|
@@ -1140,10 +1147,10 @@ Example output:
|
|
|
1140
1147
|
}
|
|
1141
1148
|
if (occurrenceCount === 0) {
|
|
1142
1149
|
anchorMethod = "dropped";
|
|
1143
|
-
logger
|
|
1150
|
+
logger.error("Entity text not found in resource \u2014 dropping", {
|
|
1144
1151
|
text: entity.exact,
|
|
1145
1152
|
entityType: entity.entityType,
|
|
1146
|
-
llmOffsets: `[${
|
|
1153
|
+
llmOffsets: `[${start}:${end}]`,
|
|
1147
1154
|
anchorMethod,
|
|
1148
1155
|
resourceStart: exact.substring(0, 200)
|
|
1149
1156
|
});
|
|
@@ -1169,9 +1176,9 @@ Example output:
|
|
|
1169
1176
|
}
|
|
1170
1177
|
if (recoveredOffset !== -1) {
|
|
1171
1178
|
anchorMethod = "context-recovered";
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
logger
|
|
1179
|
+
start = recoveredOffset;
|
|
1180
|
+
end = recoveredOffset + entity.exact.length;
|
|
1181
|
+
logger.debug("Entity anchored", {
|
|
1175
1182
|
text: entity.exact,
|
|
1176
1183
|
entityType: entity.entityType,
|
|
1177
1184
|
anchorMethod,
|
|
@@ -1179,9 +1186,9 @@ Example output:
|
|
|
1179
1186
|
});
|
|
1180
1187
|
} else if (occurrenceCount === 1) {
|
|
1181
1188
|
anchorMethod = "unique-match";
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
logger
|
|
1189
|
+
start = firstOccurrence;
|
|
1190
|
+
end = firstOccurrence + entity.exact.length;
|
|
1191
|
+
logger.debug("Entity anchored", {
|
|
1185
1192
|
text: entity.exact,
|
|
1186
1193
|
entityType: entity.entityType,
|
|
1187
1194
|
anchorMethod,
|
|
@@ -1189,9 +1196,9 @@ Example output:
|
|
|
1189
1196
|
});
|
|
1190
1197
|
} else {
|
|
1191
1198
|
anchorMethod = "first-of-many";
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
logger
|
|
1199
|
+
start = firstOccurrence;
|
|
1200
|
+
end = firstOccurrence + entity.exact.length;
|
|
1201
|
+
logger.warn("Entity anchored at first of multiple occurrences \u2014 may be wrong", {
|
|
1195
1202
|
text: entity.exact,
|
|
1196
1203
|
entityType: entity.entityType,
|
|
1197
1204
|
anchorMethod,
|
|
@@ -1206,52 +1213,52 @@ Example output:
|
|
|
1206
1213
|
return {
|
|
1207
1214
|
exact: entity.exact,
|
|
1208
1215
|
entityType: entity.entityType,
|
|
1209
|
-
|
|
1210
|
-
|
|
1216
|
+
start,
|
|
1217
|
+
end,
|
|
1211
1218
|
prefix: entity.prefix,
|
|
1212
1219
|
suffix: entity.suffix
|
|
1213
1220
|
};
|
|
1214
1221
|
}).filter((entity) => {
|
|
1215
1222
|
if (entity === null) {
|
|
1216
|
-
logger
|
|
1223
|
+
logger.debug("Filtered entity: null");
|
|
1217
1224
|
return false;
|
|
1218
1225
|
}
|
|
1219
|
-
if (entity.
|
|
1220
|
-
logger
|
|
1226
|
+
if (entity.start === void 0 || entity.end === void 0) {
|
|
1227
|
+
logger.warn("Filtered entity: missing offsets", { text: entity.exact });
|
|
1221
1228
|
return false;
|
|
1222
1229
|
}
|
|
1223
|
-
if (entity.
|
|
1224
|
-
logger
|
|
1230
|
+
if (entity.start < 0) {
|
|
1231
|
+
logger.warn("Filtered entity: negative start", {
|
|
1225
1232
|
text: entity.exact,
|
|
1226
|
-
|
|
1233
|
+
start: entity.start
|
|
1227
1234
|
});
|
|
1228
1235
|
return false;
|
|
1229
1236
|
}
|
|
1230
|
-
if (entity.
|
|
1231
|
-
logger
|
|
1237
|
+
if (entity.end > exact.length) {
|
|
1238
|
+
logger.warn("Filtered entity: end exceeds text length", {
|
|
1232
1239
|
text: entity.exact,
|
|
1233
|
-
|
|
1240
|
+
end: entity.end,
|
|
1234
1241
|
textLength: exact.length
|
|
1235
1242
|
});
|
|
1236
1243
|
return false;
|
|
1237
1244
|
}
|
|
1238
|
-
const extractedText = exact.substring(entity.
|
|
1245
|
+
const extractedText = exact.substring(entity.start, entity.end);
|
|
1239
1246
|
if (extractedText !== entity.exact) {
|
|
1240
|
-
logger
|
|
1247
|
+
logger.warn("Filtered entity: offset mismatch", {
|
|
1241
1248
|
expected: entity.exact,
|
|
1242
1249
|
got: extractedText,
|
|
1243
|
-
offsets: `[${entity.
|
|
1250
|
+
offsets: `[${entity.start}:${entity.end}]`
|
|
1244
1251
|
});
|
|
1245
1252
|
return false;
|
|
1246
1253
|
}
|
|
1247
|
-
logger
|
|
1254
|
+
logger.debug("Accepted entity", {
|
|
1248
1255
|
text: entity.exact,
|
|
1249
|
-
offsets: `[${entity.
|
|
1256
|
+
offsets: `[${entity.start}:${entity.end}]`
|
|
1250
1257
|
});
|
|
1251
1258
|
return true;
|
|
1252
1259
|
});
|
|
1253
1260
|
} catch (error) {
|
|
1254
|
-
logger
|
|
1261
|
+
logger.error("Failed to parse entity extraction response", {
|
|
1255
1262
|
error: error instanceof Error ? error.message : String(error)
|
|
1256
1263
|
});
|
|
1257
1264
|
return [];
|
|
@@ -1260,8 +1267,8 @@ Example output:
|
|
|
1260
1267
|
function getLanguageName(locale) {
|
|
1261
1268
|
return getLocaleEnglishName(locale) || locale;
|
|
1262
1269
|
}
|
|
1263
|
-
async function generateResourceFromTopic(topic, entityTypes, client, userPrompt, locale, context, temperature, maxTokens,
|
|
1264
|
-
logger
|
|
1270
|
+
async function generateResourceFromTopic(topic, entityTypes, client, logger, userPrompt, locale, context, temperature, maxTokens, sourceLanguage) {
|
|
1271
|
+
logger.debug("Generating resource from topic", {
|
|
1265
1272
|
topicPreview: topic.substring(0, 100),
|
|
1266
1273
|
entityTypes,
|
|
1267
1274
|
hasUserPrompt: !!userPrompt,
|
|
@@ -1369,15 +1376,15 @@ Requirements:
|
|
|
1369
1376
|
content
|
|
1370
1377
|
};
|
|
1371
1378
|
};
|
|
1372
|
-
logger
|
|
1379
|
+
logger.debug("Sending prompt to inference", {
|
|
1373
1380
|
promptLength: prompt.length,
|
|
1374
1381
|
temperature: finalTemperature,
|
|
1375
1382
|
maxTokens: finalMaxTokens
|
|
1376
1383
|
});
|
|
1377
1384
|
const response = await client.generateText(prompt, finalMaxTokens, finalTemperature);
|
|
1378
|
-
logger
|
|
1385
|
+
logger.debug("Got response from inference", { responseLength: response.length });
|
|
1379
1386
|
const result = parseResponse(response);
|
|
1380
|
-
logger
|
|
1387
|
+
logger.debug("Parsed response", {
|
|
1381
1388
|
hasTitle: !!result.title,
|
|
1382
1389
|
titleLength: result.title?.length,
|
|
1383
1390
|
hasContent: !!result.content,
|
|
@@ -1386,13 +1393,16 @@ Requirements:
|
|
|
1386
1393
|
return result;
|
|
1387
1394
|
}
|
|
1388
1395
|
function buildTextAnnotation(resourceId, userId, generator, motivation, match, body) {
|
|
1396
|
+
const creator = didToAgent(userId);
|
|
1397
|
+
const wasAttributedTo = creator["@id"] === generator["@id"] ? [generator] : [creator, generator];
|
|
1389
1398
|
return {
|
|
1390
1399
|
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
1391
1400
|
"type": "Annotation",
|
|
1392
1401
|
"id": generateAnnotationId(),
|
|
1393
1402
|
motivation,
|
|
1394
|
-
creator
|
|
1403
|
+
creator,
|
|
1395
1404
|
generator,
|
|
1405
|
+
wasAttributedTo,
|
|
1396
1406
|
created: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1397
1407
|
target: {
|
|
1398
1408
|
type: "SpecificResource",
|
|
@@ -1535,7 +1545,7 @@ async function processReferenceJob(content, inferenceClient, params, userId, gen
|
|
|
1535
1545
|
];
|
|
1536
1546
|
for (const entity of extractedEntities) {
|
|
1537
1547
|
try {
|
|
1538
|
-
const validated = validateAndCorrectOffsets(content, entity.
|
|
1548
|
+
const validated = validateAndCorrectOffsets(content, entity.start, entity.end, entity.exact);
|
|
1539
1549
|
const ann = buildTextAnnotation(
|
|
1540
1550
|
params.resourceId,
|
|
1541
1551
|
userId,
|
|
@@ -1589,7 +1599,7 @@ async function processTagJob(content, inferenceClient, params, userId, generator
|
|
|
1589
1599
|
result: { tagsFound: tags.length, tagsCreated: annotations.length, byCategory }
|
|
1590
1600
|
};
|
|
1591
1601
|
}
|
|
1592
|
-
async function processGenerationJob(inferenceClient, params, onProgress) {
|
|
1602
|
+
async function processGenerationJob(inferenceClient, params, onProgress, logger) {
|
|
1593
1603
|
onProgress(20, "Fetching context...", "fetching");
|
|
1594
1604
|
const title = params.title ?? "Untitled";
|
|
1595
1605
|
const entityTypes = (params.entityTypes ?? []).map(String);
|
|
@@ -1598,13 +1608,12 @@ async function processGenerationJob(inferenceClient, params, onProgress) {
|
|
|
1598
1608
|
title,
|
|
1599
1609
|
entityTypes,
|
|
1600
1610
|
inferenceClient,
|
|
1611
|
+
logger,
|
|
1601
1612
|
params.prompt,
|
|
1602
1613
|
params.language,
|
|
1603
1614
|
params.context,
|
|
1604
1615
|
params.temperature,
|
|
1605
1616
|
params.maxTokens,
|
|
1606
|
-
void 0,
|
|
1607
|
-
// logger
|
|
1608
1617
|
params.sourceLanguage
|
|
1609
1618
|
);
|
|
1610
1619
|
onProgress(85, "Creating resource...", "creating");
|