@semiont/jobs 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/fs-job-queue.d.ts +79 -0
- package/dist/fs-job-queue.d.ts.map +1 -0
- package/dist/index.d.ts +20 -623
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +74 -218
- package/dist/index.js.map +1 -1
- package/dist/job-claim-adapter.d.ts +76 -0
- package/dist/job-claim-adapter.d.ts.map +1 -0
- package/dist/job-queue-interface.d.ts +19 -0
- package/dist/job-queue-interface.d.ts.map +1 -0
- package/dist/job-queue-state-unit.d.ts +26 -0
- package/dist/job-queue-state-unit.d.ts.map +1 -0
- package/dist/job-worker.d.ts +67 -0
- package/dist/job-worker.d.ts.map +1 -0
- package/dist/processors.d.ts +41 -0
- package/dist/processors.d.ts.map +1 -0
- package/dist/types.d.ts +319 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/worker-main.d.ts +22 -2
- package/dist/worker-main.d.ts.map +1 -0
- package/dist/worker-main.js +175 -276
- package/dist/worker-main.js.map +1 -1
- package/dist/worker-process.d.ts +47 -0
- package/dist/worker-process.d.ts.map +1 -0
- package/dist/workers/annotation-detection.d.ts +61 -0
- package/dist/workers/annotation-detection.d.ts.map +1 -0
- package/dist/workers/detection/entity-extractor.d.ts +42 -0
- package/dist/workers/detection/entity-extractor.d.ts.map +1 -0
- package/dist/workers/detection/motivation-parsers.d.ts +116 -0
- package/dist/workers/detection/motivation-parsers.d.ts.map +1 -0
- package/dist/workers/detection/motivation-prompts.d.ts +57 -0
- package/dist/workers/detection/motivation-prompts.d.ts.map +1 -0
- package/dist/workers/generation/resource-generation.d.ts +23 -0
- package/dist/workers/generation/resource-generation.d.ts.map +1 -0
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -947,159 +947,6 @@ var MotivationParsers = class {
|
|
|
947
947
|
}
|
|
948
948
|
};
|
|
949
949
|
|
|
950
|
-
// ../ontology/dist/index.js
|
|
951
|
-
var TAG_SCHEMAS = {
|
|
952
|
-
"legal-irac": {
|
|
953
|
-
id: "legal-irac",
|
|
954
|
-
name: "Legal Analysis (IRAC)",
|
|
955
|
-
description: "Issue, Rule, Application, Conclusion framework for legal reasoning",
|
|
956
|
-
domain: "legal",
|
|
957
|
-
tags: [
|
|
958
|
-
{
|
|
959
|
-
name: "Issue",
|
|
960
|
-
description: "The legal question or problem to be resolved",
|
|
961
|
-
examples: [
|
|
962
|
-
"What is the central legal question?",
|
|
963
|
-
"What must the court decide?",
|
|
964
|
-
"What is the dispute about?"
|
|
965
|
-
]
|
|
966
|
-
},
|
|
967
|
-
{
|
|
968
|
-
name: "Rule",
|
|
969
|
-
description: "The relevant law, statute, or legal principle",
|
|
970
|
-
examples: [
|
|
971
|
-
"What law applies?",
|
|
972
|
-
"What is the legal standard?",
|
|
973
|
-
"What statute governs this case?"
|
|
974
|
-
]
|
|
975
|
-
},
|
|
976
|
-
{
|
|
977
|
-
name: "Application",
|
|
978
|
-
description: "How the rule applies to the specific facts",
|
|
979
|
-
examples: [
|
|
980
|
-
"How does the law apply to these facts?",
|
|
981
|
-
"Analysis of the case",
|
|
982
|
-
"How do the facts satisfy the legal standard?"
|
|
983
|
-
]
|
|
984
|
-
},
|
|
985
|
-
{
|
|
986
|
-
name: "Conclusion",
|
|
987
|
-
description: "The resolution or outcome based on the analysis",
|
|
988
|
-
examples: [
|
|
989
|
-
"What is the court's decision?",
|
|
990
|
-
"What is the final judgment?",
|
|
991
|
-
"What is the holding?"
|
|
992
|
-
]
|
|
993
|
-
}
|
|
994
|
-
]
|
|
995
|
-
},
|
|
996
|
-
"scientific-imrad": {
|
|
997
|
-
id: "scientific-imrad",
|
|
998
|
-
name: "Scientific Paper (IMRAD)",
|
|
999
|
-
description: "Introduction, Methods, Results, Discussion structure for research papers",
|
|
1000
|
-
domain: "scientific",
|
|
1001
|
-
tags: [
|
|
1002
|
-
{
|
|
1003
|
-
name: "Introduction",
|
|
1004
|
-
description: "Background, context, and research question",
|
|
1005
|
-
examples: [
|
|
1006
|
-
"What is the research question?",
|
|
1007
|
-
"Why is this important?",
|
|
1008
|
-
"What is the hypothesis?"
|
|
1009
|
-
]
|
|
1010
|
-
},
|
|
1011
|
-
{
|
|
1012
|
-
name: "Methods",
|
|
1013
|
-
description: "Experimental design and procedures",
|
|
1014
|
-
examples: [
|
|
1015
|
-
"How was the study conducted?",
|
|
1016
|
-
"What methods were used?",
|
|
1017
|
-
"What was the experimental design?"
|
|
1018
|
-
]
|
|
1019
|
-
},
|
|
1020
|
-
{
|
|
1021
|
-
name: "Results",
|
|
1022
|
-
description: "Findings and observations",
|
|
1023
|
-
examples: [
|
|
1024
|
-
"What did the study find?",
|
|
1025
|
-
"What are the data?",
|
|
1026
|
-
"What were the observations?"
|
|
1027
|
-
]
|
|
1028
|
-
},
|
|
1029
|
-
{
|
|
1030
|
-
name: "Discussion",
|
|
1031
|
-
description: "Interpretation and implications of results",
|
|
1032
|
-
examples: [
|
|
1033
|
-
"What do the results mean?",
|
|
1034
|
-
"What are the implications?",
|
|
1035
|
-
"How do these findings relate to prior work?"
|
|
1036
|
-
]
|
|
1037
|
-
}
|
|
1038
|
-
]
|
|
1039
|
-
},
|
|
1040
|
-
"argument-toulmin": {
|
|
1041
|
-
id: "argument-toulmin",
|
|
1042
|
-
name: "Argument Structure (Toulmin)",
|
|
1043
|
-
description: "Claim, Evidence, Warrant, Counterargument, Rebuttal framework for argumentation",
|
|
1044
|
-
domain: "general",
|
|
1045
|
-
tags: [
|
|
1046
|
-
{
|
|
1047
|
-
name: "Claim",
|
|
1048
|
-
description: "The main assertion or thesis",
|
|
1049
|
-
examples: [
|
|
1050
|
-
"What is being argued?",
|
|
1051
|
-
"What is the main point?",
|
|
1052
|
-
"What position is being taken?"
|
|
1053
|
-
]
|
|
1054
|
-
},
|
|
1055
|
-
{
|
|
1056
|
-
name: "Evidence",
|
|
1057
|
-
description: "Data or facts supporting the claim",
|
|
1058
|
-
examples: [
|
|
1059
|
-
"What supports this claim?",
|
|
1060
|
-
"What are the facts?",
|
|
1061
|
-
"What data is provided?"
|
|
1062
|
-
]
|
|
1063
|
-
},
|
|
1064
|
-
{
|
|
1065
|
-
name: "Warrant",
|
|
1066
|
-
description: "Reasoning connecting evidence to claim",
|
|
1067
|
-
examples: [
|
|
1068
|
-
"Why does this evidence support the claim?",
|
|
1069
|
-
"What is the logic?",
|
|
1070
|
-
"How does this reasoning work?"
|
|
1071
|
-
]
|
|
1072
|
-
},
|
|
1073
|
-
{
|
|
1074
|
-
name: "Counterargument",
|
|
1075
|
-
description: "Opposing viewpoints or objections",
|
|
1076
|
-
examples: [
|
|
1077
|
-
"What are the objections?",
|
|
1078
|
-
"What do critics say?",
|
|
1079
|
-
"What are alternative views?"
|
|
1080
|
-
]
|
|
1081
|
-
},
|
|
1082
|
-
{
|
|
1083
|
-
name: "Rebuttal",
|
|
1084
|
-
description: "Response to counterarguments",
|
|
1085
|
-
examples: [
|
|
1086
|
-
"How is the objection addressed?",
|
|
1087
|
-
"Why is the counterargument wrong?",
|
|
1088
|
-
"How is the criticism answered?"
|
|
1089
|
-
]
|
|
1090
|
-
}
|
|
1091
|
-
]
|
|
1092
|
-
}
|
|
1093
|
-
};
|
|
1094
|
-
function getTagSchema(schemaId) {
|
|
1095
|
-
return TAG_SCHEMAS[schemaId] || null;
|
|
1096
|
-
}
|
|
1097
|
-
function getSchemaCategory(schemaId, categoryName) {
|
|
1098
|
-
const schema = getTagSchema(schemaId);
|
|
1099
|
-
if (!schema) return null;
|
|
1100
|
-
return schema.tags.find((tag) => tag.name === categoryName) || null;
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
950
|
// src/workers/annotation-detection.ts
|
|
1104
951
|
var AnnotationDetection = class {
|
|
1105
952
|
/**
|
|
@@ -1127,7 +974,7 @@ var AnnotationDetection = class {
|
|
|
1127
974
|
*/
|
|
1128
975
|
static async detectComments(content, client, instructions, tone, density, language, sourceLanguage) {
|
|
1129
976
|
const prompt = MotivationPrompts.buildCommentPrompt(content, instructions, tone, density, language, sourceLanguage);
|
|
1130
|
-
const response = await client.generateText(prompt, 3e3, 0.4);
|
|
977
|
+
const response = await client.generateText(prompt, 3e3, 0.4, { format: "json" });
|
|
1131
978
|
return MotivationParsers.parseComments(response, content);
|
|
1132
979
|
}
|
|
1133
980
|
/**
|
|
@@ -1139,7 +986,7 @@ var AnnotationDetection = class {
|
|
|
1139
986
|
*/
|
|
1140
987
|
static async detectHighlights(content, client, instructions, density, sourceLanguage) {
|
|
1141
988
|
const prompt = MotivationPrompts.buildHighlightPrompt(content, instructions, density, sourceLanguage);
|
|
1142
|
-
const response = await client.generateText(prompt, 2e3, 0.3);
|
|
989
|
+
const response = await client.generateText(prompt, 2e3, 0.3, { format: "json" });
|
|
1143
990
|
return MotivationParsers.parseHighlights(response, content);
|
|
1144
991
|
}
|
|
1145
992
|
/**
|
|
@@ -1151,25 +998,25 @@ var AnnotationDetection = class {
|
|
|
1151
998
|
*/
|
|
1152
999
|
static async detectAssessments(content, client, instructions, tone, density, language, sourceLanguage) {
|
|
1153
1000
|
const prompt = MotivationPrompts.buildAssessmentPrompt(content, instructions, tone, density, language, sourceLanguage);
|
|
1154
|
-
const response = await client.generateText(prompt, 3e3, 0.3);
|
|
1001
|
+
const response = await client.generateText(prompt, 3e3, 0.3, { format: "json" });
|
|
1155
1002
|
return MotivationParsers.parseAssessments(response, content);
|
|
1156
1003
|
}
|
|
1157
1004
|
/**
|
|
1158
1005
|
* Detect tags in content for a specific category.
|
|
1159
1006
|
*
|
|
1007
|
+
* The full `TagSchema` is supplied by the dispatcher (resolved against
|
|
1008
|
+
* the per-KB tag-schema projection at job-creation time) so the worker
|
|
1009
|
+
* is independent of the registry.
|
|
1010
|
+
*
|
|
1160
1011
|
* `sourceLanguage` is the locale of the content being analyzed. Body-locale
|
|
1161
1012
|
* (`language`) doesn't influence the tag prompt — categories are schema
|
|
1162
1013
|
* identifiers, not LLM-generated text — so it's consumed at the body-stamp
|
|
1163
1014
|
* site, not here.
|
|
1164
1015
|
*/
|
|
1165
|
-
static async detectTags(content, client,
|
|
1166
|
-
const
|
|
1167
|
-
if (!schema) {
|
|
1168
|
-
throw new Error(`Invalid tag schema: ${schemaId}`);
|
|
1169
|
-
}
|
|
1170
|
-
const categoryInfo = getSchemaCategory(schemaId, category);
|
|
1016
|
+
static async detectTags(content, client, schema, category, sourceLanguage) {
|
|
1017
|
+
const categoryInfo = schema.tags.find((t) => t.name === category);
|
|
1171
1018
|
if (!categoryInfo) {
|
|
1172
|
-
throw new Error(`Invalid category "${category}" for schema ${
|
|
1019
|
+
throw new Error(`Invalid category "${category}" for schema ${schema.id}`);
|
|
1173
1020
|
}
|
|
1174
1021
|
const prompt = MotivationPrompts.buildTagPrompt(
|
|
1175
1022
|
content,
|
|
@@ -1181,12 +1028,12 @@ var AnnotationDetection = class {
|
|
|
1181
1028
|
categoryInfo.examples,
|
|
1182
1029
|
sourceLanguage
|
|
1183
1030
|
);
|
|
1184
|
-
const response = await client.generateText(prompt, 4e3, 0.2);
|
|
1031
|
+
const response = await client.generateText(prompt, 4e3, 0.2, { format: "json" });
|
|
1185
1032
|
const parsedTags = MotivationParsers.parseTags(response);
|
|
1186
1033
|
return MotivationParsers.validateTagOffsets(parsedTags, content, category);
|
|
1187
1034
|
}
|
|
1188
1035
|
};
|
|
1189
|
-
async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences
|
|
1036
|
+
async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences, logger, sourceLanguage) {
|
|
1190
1037
|
const entityTypesDescription = entityTypes.map((et) => {
|
|
1191
1038
|
if (typeof et === "string") {
|
|
1192
1039
|
return et;
|
|
@@ -1237,50 +1084,57 @@ If no entities are found, respond with an empty array [].
|
|
|
1237
1084
|
|
|
1238
1085
|
Example output:
|
|
1239
1086
|
[{"exact":"Alice","entityType":"Person","startOffset":0,"endOffset":5,"prefix":"","suffix":" went to"},{"exact":"Paris","entityType":"Location","startOffset":20,"endOffset":25,"prefix":"went to ","suffix":" yesterday"}]`;
|
|
1240
|
-
logger
|
|
1087
|
+
logger.debug("Sending entity extraction request", { entityTypes: entityTypesDescription });
|
|
1241
1088
|
const response = await client.generateTextWithMetadata(
|
|
1242
1089
|
prompt,
|
|
1243
1090
|
4e3,
|
|
1244
1091
|
// Increased to handle many entities without truncation
|
|
1245
|
-
0.3
|
|
1092
|
+
0.3,
|
|
1246
1093
|
// Lower temperature for more consistent extraction
|
|
1094
|
+
// Force grammar-constrained JSON output. Without this, Ollama models
|
|
1095
|
+
// periodically emit malformed JSON (truncated brackets, mid-token
|
|
1096
|
+
// breaks at higher token counts) which silently parse-fails into
|
|
1097
|
+
// [] downstream. The prompt's schema (which keys, what types) still
|
|
1098
|
+
// governs *what* the JSON contains; `format: 'json'` governs that
|
|
1099
|
+
// it's syntactically valid.
|
|
1100
|
+
{ format: "json" }
|
|
1247
1101
|
);
|
|
1248
|
-
logger
|
|
1102
|
+
logger.debug("Got entity extraction response", { responseLength: response.text.length });
|
|
1249
1103
|
try {
|
|
1250
1104
|
let jsonStr = response.text.trim();
|
|
1251
1105
|
if (jsonStr.startsWith("```")) {
|
|
1252
1106
|
jsonStr = jsonStr.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
1253
1107
|
}
|
|
1254
1108
|
const entities = JSON.parse(jsonStr);
|
|
1255
|
-
logger
|
|
1109
|
+
logger.debug("Parsed entities from AI response", { count: entities.length });
|
|
1256
1110
|
if (response.stopReason === "max_tokens") {
|
|
1257
1111
|
const errorMsg = `AI response truncated: Found ${entities.length} entities but response hit max_tokens limit. Increase max_tokens or reduce resource size.`;
|
|
1258
|
-
logger
|
|
1112
|
+
logger.error(errorMsg);
|
|
1259
1113
|
throw new Error(errorMsg);
|
|
1260
1114
|
}
|
|
1261
1115
|
return entities.map((entity, idx) => {
|
|
1262
|
-
let
|
|
1263
|
-
let
|
|
1264
|
-
logger
|
|
1116
|
+
let start = entity.startOffset;
|
|
1117
|
+
let end = entity.endOffset;
|
|
1118
|
+
logger.debug("Processing entity", {
|
|
1265
1119
|
index: idx + 1,
|
|
1266
1120
|
total: entities.length,
|
|
1267
1121
|
type: entity.entityType,
|
|
1268
1122
|
text: entity.exact,
|
|
1269
|
-
offsetsFromAI: `[${
|
|
1123
|
+
offsetsFromAI: `[${start}:${end}]`
|
|
1270
1124
|
});
|
|
1271
|
-
const extractedText = exact.substring(
|
|
1125
|
+
const extractedText = exact.substring(start, end);
|
|
1272
1126
|
let anchorMethod;
|
|
1273
1127
|
if (extractedText === entity.exact) {
|
|
1274
1128
|
anchorMethod = "llm-exact";
|
|
1275
|
-
logger
|
|
1129
|
+
logger.debug("Entity anchored", {
|
|
1276
1130
|
text: entity.exact,
|
|
1277
1131
|
entityType: entity.entityType,
|
|
1278
1132
|
anchorMethod
|
|
1279
1133
|
});
|
|
1280
1134
|
} else {
|
|
1281
|
-
logger
|
|
1135
|
+
logger.debug("LLM offsets mismatch \u2014 attempting re-anchor", {
|
|
1282
1136
|
expected: entity.exact,
|
|
1283
|
-
llmOffsets: `[${
|
|
1137
|
+
llmOffsets: `[${start}:${end}]`,
|
|
1284
1138
|
foundAtLlmOffsets: extractedText
|
|
1285
1139
|
});
|
|
1286
1140
|
let occurrenceCount = 0;
|
|
@@ -1293,10 +1147,10 @@ Example output:
|
|
|
1293
1147
|
}
|
|
1294
1148
|
if (occurrenceCount === 0) {
|
|
1295
1149
|
anchorMethod = "dropped";
|
|
1296
|
-
logger
|
|
1150
|
+
logger.error("Entity text not found in resource \u2014 dropping", {
|
|
1297
1151
|
text: entity.exact,
|
|
1298
1152
|
entityType: entity.entityType,
|
|
1299
|
-
llmOffsets: `[${
|
|
1153
|
+
llmOffsets: `[${start}:${end}]`,
|
|
1300
1154
|
anchorMethod,
|
|
1301
1155
|
resourceStart: exact.substring(0, 200)
|
|
1302
1156
|
});
|
|
@@ -1322,9 +1176,9 @@ Example output:
|
|
|
1322
1176
|
}
|
|
1323
1177
|
if (recoveredOffset !== -1) {
|
|
1324
1178
|
anchorMethod = "context-recovered";
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
logger
|
|
1179
|
+
start = recoveredOffset;
|
|
1180
|
+
end = recoveredOffset + entity.exact.length;
|
|
1181
|
+
logger.debug("Entity anchored", {
|
|
1328
1182
|
text: entity.exact,
|
|
1329
1183
|
entityType: entity.entityType,
|
|
1330
1184
|
anchorMethod,
|
|
@@ -1332,9 +1186,9 @@ Example output:
|
|
|
1332
1186
|
});
|
|
1333
1187
|
} else if (occurrenceCount === 1) {
|
|
1334
1188
|
anchorMethod = "unique-match";
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
logger
|
|
1189
|
+
start = firstOccurrence;
|
|
1190
|
+
end = firstOccurrence + entity.exact.length;
|
|
1191
|
+
logger.debug("Entity anchored", {
|
|
1338
1192
|
text: entity.exact,
|
|
1339
1193
|
entityType: entity.entityType,
|
|
1340
1194
|
anchorMethod,
|
|
@@ -1342,9 +1196,9 @@ Example output:
|
|
|
1342
1196
|
});
|
|
1343
1197
|
} else {
|
|
1344
1198
|
anchorMethod = "first-of-many";
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
logger
|
|
1199
|
+
start = firstOccurrence;
|
|
1200
|
+
end = firstOccurrence + entity.exact.length;
|
|
1201
|
+
logger.warn("Entity anchored at first of multiple occurrences \u2014 may be wrong", {
|
|
1348
1202
|
text: entity.exact,
|
|
1349
1203
|
entityType: entity.entityType,
|
|
1350
1204
|
anchorMethod,
|
|
@@ -1359,52 +1213,52 @@ Example output:
|
|
|
1359
1213
|
return {
|
|
1360
1214
|
exact: entity.exact,
|
|
1361
1215
|
entityType: entity.entityType,
|
|
1362
|
-
|
|
1363
|
-
|
|
1216
|
+
start,
|
|
1217
|
+
end,
|
|
1364
1218
|
prefix: entity.prefix,
|
|
1365
1219
|
suffix: entity.suffix
|
|
1366
1220
|
};
|
|
1367
1221
|
}).filter((entity) => {
|
|
1368
1222
|
if (entity === null) {
|
|
1369
|
-
logger
|
|
1223
|
+
logger.debug("Filtered entity: null");
|
|
1370
1224
|
return false;
|
|
1371
1225
|
}
|
|
1372
|
-
if (entity.
|
|
1373
|
-
logger
|
|
1226
|
+
if (entity.start === void 0 || entity.end === void 0) {
|
|
1227
|
+
logger.warn("Filtered entity: missing offsets", { text: entity.exact });
|
|
1374
1228
|
return false;
|
|
1375
1229
|
}
|
|
1376
|
-
if (entity.
|
|
1377
|
-
logger
|
|
1230
|
+
if (entity.start < 0) {
|
|
1231
|
+
logger.warn("Filtered entity: negative start", {
|
|
1378
1232
|
text: entity.exact,
|
|
1379
|
-
|
|
1233
|
+
start: entity.start
|
|
1380
1234
|
});
|
|
1381
1235
|
return false;
|
|
1382
1236
|
}
|
|
1383
|
-
if (entity.
|
|
1384
|
-
logger
|
|
1237
|
+
if (entity.end > exact.length) {
|
|
1238
|
+
logger.warn("Filtered entity: end exceeds text length", {
|
|
1385
1239
|
text: entity.exact,
|
|
1386
|
-
|
|
1240
|
+
end: entity.end,
|
|
1387
1241
|
textLength: exact.length
|
|
1388
1242
|
});
|
|
1389
1243
|
return false;
|
|
1390
1244
|
}
|
|
1391
|
-
const extractedText = exact.substring(entity.
|
|
1245
|
+
const extractedText = exact.substring(entity.start, entity.end);
|
|
1392
1246
|
if (extractedText !== entity.exact) {
|
|
1393
|
-
logger
|
|
1247
|
+
logger.warn("Filtered entity: offset mismatch", {
|
|
1394
1248
|
expected: entity.exact,
|
|
1395
1249
|
got: extractedText,
|
|
1396
|
-
offsets: `[${entity.
|
|
1250
|
+
offsets: `[${entity.start}:${entity.end}]`
|
|
1397
1251
|
});
|
|
1398
1252
|
return false;
|
|
1399
1253
|
}
|
|
1400
|
-
logger
|
|
1254
|
+
logger.debug("Accepted entity", {
|
|
1401
1255
|
text: entity.exact,
|
|
1402
|
-
offsets: `[${entity.
|
|
1256
|
+
offsets: `[${entity.start}:${entity.end}]`
|
|
1403
1257
|
});
|
|
1404
1258
|
return true;
|
|
1405
1259
|
});
|
|
1406
1260
|
} catch (error) {
|
|
1407
|
-
logger
|
|
1261
|
+
logger.error("Failed to parse entity extraction response", {
|
|
1408
1262
|
error: error instanceof Error ? error.message : String(error)
|
|
1409
1263
|
});
|
|
1410
1264
|
return [];
|
|
@@ -1413,8 +1267,8 @@ Example output:
|
|
|
1413
1267
|
function getLanguageName(locale) {
|
|
1414
1268
|
return getLocaleEnglishName(locale) || locale;
|
|
1415
1269
|
}
|
|
1416
|
-
async function generateResourceFromTopic(topic, entityTypes, client, userPrompt, locale, context, temperature, maxTokens,
|
|
1417
|
-
logger
|
|
1270
|
+
async function generateResourceFromTopic(topic, entityTypes, client, logger, userPrompt, locale, context, temperature, maxTokens, sourceLanguage) {
|
|
1271
|
+
logger.debug("Generating resource from topic", {
|
|
1418
1272
|
topicPreview: topic.substring(0, 100),
|
|
1419
1273
|
entityTypes,
|
|
1420
1274
|
hasUserPrompt: !!userPrompt,
|
|
@@ -1522,15 +1376,15 @@ Requirements:
|
|
|
1522
1376
|
content
|
|
1523
1377
|
};
|
|
1524
1378
|
};
|
|
1525
|
-
logger
|
|
1379
|
+
logger.debug("Sending prompt to inference", {
|
|
1526
1380
|
promptLength: prompt.length,
|
|
1527
1381
|
temperature: finalTemperature,
|
|
1528
1382
|
maxTokens: finalMaxTokens
|
|
1529
1383
|
});
|
|
1530
1384
|
const response = await client.generateText(prompt, finalMaxTokens, finalTemperature);
|
|
1531
|
-
logger
|
|
1385
|
+
logger.debug("Got response from inference", { responseLength: response.length });
|
|
1532
1386
|
const result = parseResponse(response);
|
|
1533
|
-
logger
|
|
1387
|
+
logger.debug("Parsed response", {
|
|
1534
1388
|
hasTitle: !!result.title,
|
|
1535
1389
|
titleLength: result.title?.length,
|
|
1536
1390
|
hasContent: !!result.content,
|
|
@@ -1539,13 +1393,16 @@ Requirements:
|
|
|
1539
1393
|
return result;
|
|
1540
1394
|
}
|
|
1541
1395
|
function buildTextAnnotation(resourceId, userId, generator, motivation, match, body) {
|
|
1396
|
+
const creator = didToAgent(userId);
|
|
1397
|
+
const wasAttributedTo = creator["@id"] === generator["@id"] ? [generator] : [creator, generator];
|
|
1542
1398
|
return {
|
|
1543
1399
|
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
1544
1400
|
"type": "Annotation",
|
|
1545
1401
|
"id": generateAnnotationId(),
|
|
1546
1402
|
motivation,
|
|
1547
|
-
creator
|
|
1403
|
+
creator,
|
|
1548
1404
|
generator,
|
|
1405
|
+
wasAttributedTo,
|
|
1549
1406
|
created: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1550
1407
|
target: {
|
|
1551
1408
|
type: "SpecificResource",
|
|
@@ -1688,7 +1545,7 @@ async function processReferenceJob(content, inferenceClient, params, userId, gen
|
|
|
1688
1545
|
];
|
|
1689
1546
|
for (const entity of extractedEntities) {
|
|
1690
1547
|
try {
|
|
1691
|
-
const validated = validateAndCorrectOffsets(content, entity.
|
|
1548
|
+
const validated = validateAndCorrectOffsets(content, entity.start, entity.end, entity.exact);
|
|
1692
1549
|
const ann = buildTextAnnotation(
|
|
1693
1550
|
params.resourceId,
|
|
1694
1551
|
userId,
|
|
@@ -1718,7 +1575,7 @@ async function processTagJob(content, inferenceClient, params, userId, generator
|
|
|
1718
1575
|
const categoryTags = await AnnotationDetection.detectTags(
|
|
1719
1576
|
content,
|
|
1720
1577
|
inferenceClient,
|
|
1721
|
-
params.
|
|
1578
|
+
params.schema,
|
|
1722
1579
|
category,
|
|
1723
1580
|
params.sourceLanguage
|
|
1724
1581
|
);
|
|
@@ -1733,7 +1590,7 @@ async function processTagJob(content, inferenceClient, params, userId, generator
|
|
|
1733
1590
|
byCategory[category] = (byCategory[category] ?? 0) + 1;
|
|
1734
1591
|
return buildTextAnnotation(params.resourceId, userId, generator, "tagging", t, [
|
|
1735
1592
|
{ type: "TextualBody", value: category, purpose: "tagging", format: "text/plain", language: bodyLanguage },
|
|
1736
|
-
{ type: "TextualBody", value: params.
|
|
1593
|
+
{ type: "TextualBody", value: params.schema.id, purpose: "classifying", format: "text/plain" }
|
|
1737
1594
|
]);
|
|
1738
1595
|
});
|
|
1739
1596
|
onProgress(100, `Complete! Created ${annotations.length} tags`, "creating");
|
|
@@ -1742,7 +1599,7 @@ async function processTagJob(content, inferenceClient, params, userId, generator
|
|
|
1742
1599
|
result: { tagsFound: tags.length, tagsCreated: annotations.length, byCategory }
|
|
1743
1600
|
};
|
|
1744
1601
|
}
|
|
1745
|
-
async function processGenerationJob(inferenceClient, params, onProgress) {
|
|
1602
|
+
async function processGenerationJob(inferenceClient, params, onProgress, logger) {
|
|
1746
1603
|
onProgress(20, "Fetching context...", "fetching");
|
|
1747
1604
|
const title = params.title ?? "Untitled";
|
|
1748
1605
|
const entityTypes = (params.entityTypes ?? []).map(String);
|
|
@@ -1751,13 +1608,12 @@ async function processGenerationJob(inferenceClient, params, onProgress) {
|
|
|
1751
1608
|
title,
|
|
1752
1609
|
entityTypes,
|
|
1753
1610
|
inferenceClient,
|
|
1611
|
+
logger,
|
|
1754
1612
|
params.prompt,
|
|
1755
1613
|
params.language,
|
|
1756
1614
|
params.context,
|
|
1757
1615
|
params.temperature,
|
|
1758
1616
|
params.maxTokens,
|
|
1759
|
-
void 0,
|
|
1760
|
-
// logger
|
|
1761
1617
|
params.sourceLanguage
|
|
1762
1618
|
);
|
|
1763
1619
|
onProgress(85, "Creating resource...", "creating");
|