@semiont/make-meaning 0.2.30-build.61 → 0.2.30-build.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +109 -10
- package/dist/index.d.ts +471 -159
- package/dist/index.js +2912 -1599
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,727 +1,1230 @@
|
|
|
1
|
-
// src/
|
|
2
|
-
import
|
|
1
|
+
// src/service.ts
|
|
2
|
+
import * as path from "path";
|
|
3
|
+
import { JobQueue } from "@semiont/jobs";
|
|
4
|
+
import { createEventStore as createEventStoreCore } from "@semiont/event-sourcing";
|
|
5
|
+
import { FilesystemRepresentationStore as FilesystemRepresentationStore3 } from "@semiont/content";
|
|
6
|
+
import { resourceId as makeResourceId2 } from "@semiont/core";
|
|
7
|
+
import { getInferenceClient } from "@semiont/inference";
|
|
8
|
+
import { getGraphDatabase } from "@semiont/graph";
|
|
9
|
+
|
|
10
|
+
// src/jobs/reference-detection-worker.ts
|
|
11
|
+
import { JobWorker } from "@semiont/jobs";
|
|
12
|
+
import { generateAnnotationId } from "@semiont/event-sourcing";
|
|
13
|
+
import { resourceIdToURI } from "@semiont/core";
|
|
14
|
+
import {
|
|
15
|
+
getPrimaryRepresentation,
|
|
16
|
+
decodeRepresentation,
|
|
17
|
+
validateAndCorrectOffsets
|
|
18
|
+
} from "@semiont/api-client";
|
|
19
|
+
|
|
20
|
+
// src/detection/entity-extractor.ts
|
|
21
|
+
async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences = false) {
|
|
22
|
+
console.log("extractEntities called with:", {
|
|
23
|
+
textLength: exact.length,
|
|
24
|
+
entityTypes: Array.isArray(entityTypes) ? entityTypes.map((et) => typeof et === "string" ? et : et.type) : []
|
|
25
|
+
});
|
|
26
|
+
const entityTypesDescription = entityTypes.map((et) => {
|
|
27
|
+
if (typeof et === "string") {
|
|
28
|
+
return et;
|
|
29
|
+
}
|
|
30
|
+
return et.examples && et.examples.length > 0 ? `${et.type} (examples: ${et.examples.slice(0, 3).join(", ")})` : et.type;
|
|
31
|
+
}).join(", ");
|
|
32
|
+
const descriptiveReferenceGuidance = includeDescriptiveReferences ? `
|
|
33
|
+
Include both:
|
|
34
|
+
- Direct mentions (names, proper nouns)
|
|
35
|
+
- Descriptive references (substantive phrases that refer to entities)
|
|
36
|
+
|
|
37
|
+
For descriptive references, include:
|
|
38
|
+
- Definite descriptions: "the Nobel laureate", "the tech giant", "the former president"
|
|
39
|
+
- Role-based references: "the CEO", "the physicist", "the author", "the owner", "the contractor"
|
|
40
|
+
- Epithets with context: "the Cupertino-based company", "the iPhone maker"
|
|
41
|
+
- References to entities even when identity is unknown or unspecified
|
|
42
|
+
|
|
43
|
+
Do NOT include:
|
|
44
|
+
- Simple pronouns alone: he, she, it, they, him, her, them
|
|
45
|
+
- Generic determiners alone: this, that, these, those
|
|
46
|
+
- Possessives without substance: his, her, their, its
|
|
47
|
+
|
|
48
|
+
Examples:
|
|
49
|
+
- For "Marie Curie", include "the Nobel laureate" and "the physicist" but NOT "she"
|
|
50
|
+
- For an unknown person, include "the owner" or "the contractor" (role-based references count even when identity is unspecified)
|
|
51
|
+
` : `
|
|
52
|
+
Find direct mentions only (names, proper nouns). Do not include pronouns or descriptive references.
|
|
53
|
+
`;
|
|
54
|
+
const prompt = `Identify entity references in the following text. Look for mentions of: ${entityTypesDescription}.
|
|
55
|
+
${descriptiveReferenceGuidance}
|
|
56
|
+
Text to analyze:
|
|
57
|
+
"""
|
|
58
|
+
${exact}
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
Return ONLY a JSON array of entities found. Each entity should have:
|
|
62
|
+
- exact: the exact text span from the input
|
|
63
|
+
- entityType: one of the provided entity types
|
|
64
|
+
- startOffset: character position where the entity starts (0-indexed)
|
|
65
|
+
- endOffset: character position where the entity ends
|
|
66
|
+
- prefix: up to 32 characters of text immediately before the entity (helps identify correct occurrence)
|
|
67
|
+
- suffix: up to 32 characters of text immediately after the entity (helps identify correct occurrence)
|
|
68
|
+
|
|
69
|
+
Return empty array [] if no entities found.
|
|
70
|
+
Do not include markdown formatting or code fences, just the raw JSON array.
|
|
71
|
+
|
|
72
|
+
Example output:
|
|
73
|
+
[{"exact":"Alice","entityType":"Person","startOffset":0,"endOffset":5,"prefix":"","suffix":" went to"},{"exact":"Paris","entityType":"Location","startOffset":20,"endOffset":25,"prefix":"went to ","suffix":" yesterday"}]`;
|
|
74
|
+
console.log("Sending entity extraction request");
|
|
75
|
+
const response = await client.generateTextWithMetadata(
|
|
76
|
+
prompt,
|
|
77
|
+
4e3,
|
|
78
|
+
// Increased to handle many entities without truncation
|
|
79
|
+
0.3
|
|
80
|
+
// Lower temperature for more consistent extraction
|
|
81
|
+
);
|
|
82
|
+
console.log("Got entity extraction response");
|
|
83
|
+
console.log("Entity extraction raw response length:", response.text.length);
|
|
84
|
+
try {
|
|
85
|
+
let jsonStr = response.text.trim();
|
|
86
|
+
if (jsonStr.startsWith("```")) {
|
|
87
|
+
jsonStr = jsonStr.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
88
|
+
}
|
|
89
|
+
const entities = JSON.parse(jsonStr);
|
|
90
|
+
console.log("Parsed", entities.length, "entities from response");
|
|
91
|
+
if (response.stopReason === "max_tokens") {
|
|
92
|
+
const errorMsg = `AI response truncated: Found ${entities.length} entities but response hit max_tokens limit. Increase max_tokens or reduce resource size.`;
|
|
93
|
+
console.error(`\u274C ${errorMsg}`);
|
|
94
|
+
throw new Error(errorMsg);
|
|
95
|
+
}
|
|
96
|
+
return entities.map((entity, idx) => {
|
|
97
|
+
let startOffset = entity.startOffset;
|
|
98
|
+
let endOffset = entity.endOffset;
|
|
99
|
+
console.log(`
|
|
100
|
+
[Entity ${idx + 1}/${entities.length}]`);
|
|
101
|
+
console.log(` Type: ${entity.entityType}`);
|
|
102
|
+
console.log(` Text: "${entity.exact}"`);
|
|
103
|
+
console.log(` Offsets from AI: [${startOffset}, ${endOffset}]`);
|
|
104
|
+
const extractedText = exact.substring(startOffset, endOffset);
|
|
105
|
+
if (extractedText !== entity.exact) {
|
|
106
|
+
console.log(` \u26A0\uFE0F Offset mismatch!`);
|
|
107
|
+
console.log(` Expected: "${entity.exact}"`);
|
|
108
|
+
console.log(` Found at AI offsets [${startOffset}:${endOffset}]: "${extractedText}"`);
|
|
109
|
+
const contextStart = Math.max(0, startOffset - 50);
|
|
110
|
+
const contextEnd = Math.min(exact.length, endOffset + 50);
|
|
111
|
+
const contextBefore = exact.substring(contextStart, startOffset);
|
|
112
|
+
const contextAfter = exact.substring(endOffset, contextEnd);
|
|
113
|
+
console.log(` Context: "...${contextBefore}[${extractedText}]${contextAfter}..."`);
|
|
114
|
+
console.log(` Searching for exact match in resource...`);
|
|
115
|
+
let found = false;
|
|
116
|
+
if (entity.prefix || entity.suffix) {
|
|
117
|
+
console.log(` Using LLM-provided context for disambiguation:`);
|
|
118
|
+
if (entity.prefix) console.log(` Prefix: "${entity.prefix}"`);
|
|
119
|
+
if (entity.suffix) console.log(` Suffix: "${entity.suffix}"`);
|
|
120
|
+
let searchPos = 0;
|
|
121
|
+
while ((searchPos = exact.indexOf(entity.exact, searchPos)) !== -1) {
|
|
122
|
+
const candidatePrefix = exact.substring(Math.max(0, searchPos - 32), searchPos);
|
|
123
|
+
const candidateSuffix = exact.substring(
|
|
124
|
+
searchPos + entity.exact.length,
|
|
125
|
+
Math.min(exact.length, searchPos + entity.exact.length + 32)
|
|
126
|
+
);
|
|
127
|
+
const prefixMatch = !entity.prefix || candidatePrefix.endsWith(entity.prefix);
|
|
128
|
+
const suffixMatch = !entity.suffix || candidateSuffix.startsWith(entity.suffix);
|
|
129
|
+
if (prefixMatch && suffixMatch) {
|
|
130
|
+
console.log(` \u2705 Found match using context at offset ${searchPos} (diff: ${searchPos - startOffset})`);
|
|
131
|
+
console.log(` Candidate prefix: "${candidatePrefix}"`);
|
|
132
|
+
console.log(` Candidate suffix: "${candidateSuffix}"`);
|
|
133
|
+
startOffset = searchPos;
|
|
134
|
+
endOffset = searchPos + entity.exact.length;
|
|
135
|
+
found = true;
|
|
136
|
+
break;
|
|
137
|
+
}
|
|
138
|
+
searchPos++;
|
|
139
|
+
}
|
|
140
|
+
if (!found) {
|
|
141
|
+
console.log(` \u26A0\uFE0F No occurrence found with matching context`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
if (!found) {
|
|
145
|
+
const index = exact.indexOf(entity.exact);
|
|
146
|
+
if (index !== -1) {
|
|
147
|
+
console.log(` \u26A0\uFE0F Using first occurrence at offset ${index} (diff: ${index - startOffset})`);
|
|
148
|
+
startOffset = index;
|
|
149
|
+
endOffset = index + entity.exact.length;
|
|
150
|
+
} else {
|
|
151
|
+
console.log(` \u274C Cannot find "${entity.exact}" anywhere in resource`);
|
|
152
|
+
console.log(` Resource starts with: "${exact.substring(0, 200)}..."`);
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
} else {
|
|
157
|
+
console.log(` \u2705 Offsets correct`);
|
|
158
|
+
}
|
|
159
|
+
return {
|
|
160
|
+
exact: entity.exact,
|
|
161
|
+
entityType: entity.entityType,
|
|
162
|
+
startOffset,
|
|
163
|
+
endOffset,
|
|
164
|
+
prefix: entity.prefix,
|
|
165
|
+
suffix: entity.suffix
|
|
166
|
+
};
|
|
167
|
+
}).filter((entity) => {
|
|
168
|
+
if (entity === null) {
|
|
169
|
+
console.log("\u274C Filtered entity: null");
|
|
170
|
+
return false;
|
|
171
|
+
}
|
|
172
|
+
if (entity.startOffset === void 0 || entity.endOffset === void 0) {
|
|
173
|
+
console.log(`\u274C Filtered entity "${entity.exact}": missing offsets`);
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
if (entity.startOffset < 0) {
|
|
177
|
+
console.log(`\u274C Filtered entity "${entity.exact}": negative startOffset (${entity.startOffset})`);
|
|
178
|
+
return false;
|
|
179
|
+
}
|
|
180
|
+
if (entity.endOffset > exact.length) {
|
|
181
|
+
console.log(`\u274C Filtered entity "${entity.exact}": endOffset (${entity.endOffset}) > text length (${exact.length})`);
|
|
182
|
+
return false;
|
|
183
|
+
}
|
|
184
|
+
const extractedText = exact.substring(entity.startOffset, entity.endOffset);
|
|
185
|
+
if (extractedText !== entity.exact) {
|
|
186
|
+
console.log(`\u274C Filtered entity "${entity.exact}": offset mismatch`);
|
|
187
|
+
console.log(` Expected: "${entity.exact}"`);
|
|
188
|
+
console.log(` Got at [${entity.startOffset}:${entity.endOffset}]: "${extractedText}"`);
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
191
|
+
console.log(`\u2705 Accepted entity "${entity.exact}" at [${entity.startOffset}:${entity.endOffset}]`);
|
|
192
|
+
return true;
|
|
193
|
+
});
|
|
194
|
+
} catch (error) {
|
|
195
|
+
console.error("Failed to parse entity extraction response:", error);
|
|
196
|
+
return [];
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// src/jobs/reference-detection-worker.ts
|
|
3
201
|
import { FilesystemRepresentationStore } from "@semiont/content";
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
202
|
+
var ReferenceDetectionWorker = class extends JobWorker {
|
|
203
|
+
constructor(jobQueue, config, eventStore, inferenceClient) {
|
|
204
|
+
super(jobQueue);
|
|
205
|
+
this.config = config;
|
|
206
|
+
this.eventStore = eventStore;
|
|
207
|
+
this.inferenceClient = inferenceClient;
|
|
208
|
+
}
|
|
209
|
+
getWorkerName() {
|
|
210
|
+
return "ReferenceDetectionWorker";
|
|
211
|
+
}
|
|
212
|
+
canProcessJob(job) {
|
|
213
|
+
return job.metadata.type === "detection";
|
|
214
|
+
}
|
|
215
|
+
async executeJob(job) {
|
|
216
|
+
if (job.metadata.type !== "detection") {
|
|
217
|
+
throw new Error(`Invalid job type: ${job.metadata.type}`);
|
|
16
218
|
}
|
|
17
|
-
|
|
219
|
+
if (job.status !== "running") {
|
|
220
|
+
throw new Error(`Job must be in running state to execute, got: ${job.status}`);
|
|
221
|
+
}
|
|
222
|
+
await this.processDetectionJob(job);
|
|
18
223
|
}
|
|
19
224
|
/**
|
|
20
|
-
*
|
|
225
|
+
* Detect entity references in resource using AI
|
|
226
|
+
* Self-contained implementation for reference detection
|
|
227
|
+
*
|
|
228
|
+
* Public for testing charset handling - see entity-detection-charset.test.ts
|
|
21
229
|
*/
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
const
|
|
25
|
-
const
|
|
26
|
-
|
|
27
|
-
const
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
if (
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
230
|
+
async detectReferences(resource, entityTypes, includeDescriptiveReferences = false) {
|
|
231
|
+
console.log(`Detecting entities of types: ${entityTypes.join(", ")}${includeDescriptiveReferences ? " (including descriptive references)" : ""}`);
|
|
232
|
+
const detectedAnnotations = [];
|
|
233
|
+
const primaryRep = getPrimaryRepresentation(resource);
|
|
234
|
+
if (!primaryRep) return detectedAnnotations;
|
|
235
|
+
const mediaType = primaryRep.mediaType;
|
|
236
|
+
const baseMediaType = mediaType?.split(";")[0]?.trim() || "";
|
|
237
|
+
if (baseMediaType === "text/plain" || baseMediaType === "text/markdown") {
|
|
238
|
+
if (!primaryRep.checksum || !primaryRep.mediaType) return detectedAnnotations;
|
|
239
|
+
const basePath = this.config.services.filesystem.path;
|
|
240
|
+
const projectRoot = this.config._metadata?.projectRoot;
|
|
241
|
+
const repStore = new FilesystemRepresentationStore({ basePath }, projectRoot);
|
|
242
|
+
const contentBuffer = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
|
|
243
|
+
const content = decodeRepresentation(contentBuffer, primaryRep.mediaType);
|
|
244
|
+
const extractedEntities = await extractEntities(content, entityTypes, this.inferenceClient, includeDescriptiveReferences);
|
|
245
|
+
for (const entity of extractedEntities) {
|
|
246
|
+
try {
|
|
247
|
+
const validated = validateAndCorrectOffsets(
|
|
248
|
+
content,
|
|
249
|
+
entity.startOffset,
|
|
250
|
+
entity.endOffset,
|
|
251
|
+
entity.exact
|
|
252
|
+
);
|
|
253
|
+
const annotation = {
|
|
254
|
+
annotation: {
|
|
255
|
+
selector: {
|
|
256
|
+
start: validated.start,
|
|
257
|
+
end: validated.end,
|
|
258
|
+
exact: validated.exact,
|
|
259
|
+
prefix: validated.prefix,
|
|
260
|
+
suffix: validated.suffix
|
|
261
|
+
},
|
|
262
|
+
entityTypes: [entity.entityType]
|
|
263
|
+
}
|
|
264
|
+
};
|
|
265
|
+
detectedAnnotations.push(annotation);
|
|
266
|
+
} catch (error) {
|
|
267
|
+
console.warn(`[ReferenceDetectionWorker] Skipping invalid entity "${entity.exact}":`, error);
|
|
37
268
|
}
|
|
38
269
|
}
|
|
39
|
-
resources.push(doc);
|
|
40
270
|
}
|
|
41
|
-
|
|
42
|
-
const aTime = a.dateCreated ? new Date(a.dateCreated).getTime() : 0;
|
|
43
|
-
const bTime = b.dateCreated ? new Date(b.dateCreated).getTime() : 0;
|
|
44
|
-
return bTime - aTime;
|
|
45
|
-
});
|
|
46
|
-
return resources;
|
|
271
|
+
return detectedAnnotations;
|
|
47
272
|
}
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
273
|
+
async processDetectionJob(job) {
|
|
274
|
+
console.log(`[ReferenceDetectionWorker] Processing detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
|
|
275
|
+
console.log(`[ReferenceDetectionWorker] \u{1F50D} Entity types: ${job.params.entityTypes.join(", ")}`);
|
|
276
|
+
const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
|
|
277
|
+
if (!resource) {
|
|
278
|
+
throw new Error(`Resource ${job.params.resourceId} not found`);
|
|
279
|
+
}
|
|
280
|
+
let totalFound = 0;
|
|
281
|
+
let totalEmitted = 0;
|
|
282
|
+
let totalErrors = 0;
|
|
283
|
+
let updatedJob = {
|
|
284
|
+
...job,
|
|
285
|
+
progress: {
|
|
286
|
+
totalEntityTypes: job.params.entityTypes.length,
|
|
287
|
+
processedEntityTypes: 0,
|
|
288
|
+
entitiesFound: 0,
|
|
289
|
+
entitiesEmitted: 0
|
|
290
|
+
}
|
|
291
|
+
};
|
|
292
|
+
await this.updateJobProgress(updatedJob);
|
|
293
|
+
for (let i = 0; i < job.params.entityTypes.length; i++) {
|
|
294
|
+
const entityType = job.params.entityTypes[i];
|
|
295
|
+
if (!entityType) continue;
|
|
296
|
+
console.log(`[ReferenceDetectionWorker] \u{1F916} [${i + 1}/${job.params.entityTypes.length}] Detecting ${entityType}...`);
|
|
297
|
+
const detectedAnnotations = await this.detectReferences(resource, [entityType], job.params.includeDescriptiveReferences);
|
|
298
|
+
totalFound += detectedAnnotations.length;
|
|
299
|
+
console.log(`[ReferenceDetectionWorker] \u2705 Found ${detectedAnnotations.length} ${entityType} entities`);
|
|
300
|
+
for (let idx = 0; idx < detectedAnnotations.length; idx++) {
|
|
301
|
+
const detected = detectedAnnotations[idx];
|
|
302
|
+
if (!detected) {
|
|
303
|
+
console.warn(`[ReferenceDetectionWorker] Skipping undefined entity at index ${idx}`);
|
|
304
|
+
continue;
|
|
305
|
+
}
|
|
306
|
+
let referenceId;
|
|
58
307
|
try {
|
|
59
|
-
const
|
|
60
|
-
if (
|
|
61
|
-
|
|
62
|
-
const contentPreview = decodeRepresentation(contentBuffer, primaryRep.mediaType).slice(0, 200);
|
|
63
|
-
return { ...doc, content: contentPreview };
|
|
308
|
+
const backendUrl = this.config.services.backend?.publicURL;
|
|
309
|
+
if (!backendUrl) {
|
|
310
|
+
throw new Error("Backend publicURL not configured");
|
|
64
311
|
}
|
|
65
|
-
|
|
66
|
-
} catch {
|
|
67
|
-
|
|
312
|
+
referenceId = generateAnnotationId(backendUrl);
|
|
313
|
+
} catch (error) {
|
|
314
|
+
console.error(`[ReferenceDetectionWorker] Failed to generate annotation ID:`, error);
|
|
315
|
+
throw new Error("Configuration error: Backend publicURL not set");
|
|
68
316
|
}
|
|
69
|
-
|
|
70
|
-
|
|
317
|
+
try {
|
|
318
|
+
await this.eventStore.appendEvent({
|
|
319
|
+
type: "annotation.added",
|
|
320
|
+
resourceId: job.params.resourceId,
|
|
321
|
+
userId: job.metadata.userId,
|
|
322
|
+
version: 1,
|
|
323
|
+
payload: {
|
|
324
|
+
annotation: {
|
|
325
|
+
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
326
|
+
"type": "Annotation",
|
|
327
|
+
id: referenceId,
|
|
328
|
+
motivation: "linking",
|
|
329
|
+
target: {
|
|
330
|
+
source: resourceIdToURI(job.params.resourceId, this.config.services.backend.publicURL),
|
|
331
|
+
// Convert to full URI
|
|
332
|
+
selector: [
|
|
333
|
+
{
|
|
334
|
+
type: "TextPositionSelector",
|
|
335
|
+
start: detected.annotation.selector.start,
|
|
336
|
+
end: detected.annotation.selector.end
|
|
337
|
+
},
|
|
338
|
+
{
|
|
339
|
+
type: "TextQuoteSelector",
|
|
340
|
+
exact: detected.annotation.selector.exact,
|
|
341
|
+
...detected.annotation.selector.prefix && { prefix: detected.annotation.selector.prefix },
|
|
342
|
+
...detected.annotation.selector.suffix && { suffix: detected.annotation.selector.suffix }
|
|
343
|
+
}
|
|
344
|
+
]
|
|
345
|
+
},
|
|
346
|
+
body: (detected.annotation.entityTypes || []).map((et) => ({
|
|
347
|
+
type: "TextualBody",
|
|
348
|
+
value: et,
|
|
349
|
+
purpose: "tagging"
|
|
350
|
+
})),
|
|
351
|
+
modified: (/* @__PURE__ */ new Date()).toISOString()
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
});
|
|
355
|
+
totalEmitted++;
|
|
356
|
+
if ((idx + 1) % 10 === 0 || idx === detectedAnnotations.length - 1) {
|
|
357
|
+
console.log(`[ReferenceDetectionWorker] \u{1F4E4} Emitted ${idx + 1}/${detectedAnnotations.length} events for ${entityType}`);
|
|
358
|
+
}
|
|
359
|
+
} catch (error) {
|
|
360
|
+
totalErrors++;
|
|
361
|
+
console.error(`[ReferenceDetectionWorker] \u274C Failed to emit event for ${referenceId}:`, error);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
console.log(`[ReferenceDetectionWorker] \u2705 Completed ${entityType}: ${detectedAnnotations.length} found, ${detectedAnnotations.length - (totalErrors - (totalFound - totalEmitted))} emitted`);
|
|
365
|
+
updatedJob = {
|
|
366
|
+
...updatedJob,
|
|
367
|
+
progress: {
|
|
368
|
+
totalEntityTypes: job.params.entityTypes.length,
|
|
369
|
+
processedEntityTypes: i + 1,
|
|
370
|
+
currentEntityType: entityType,
|
|
371
|
+
entitiesFound: totalFound,
|
|
372
|
+
entitiesEmitted: totalEmitted
|
|
373
|
+
}
|
|
374
|
+
};
|
|
375
|
+
await this.updateJobProgress(updatedJob);
|
|
376
|
+
}
|
|
377
|
+
console.log(`[ReferenceDetectionWorker] \u2705 Detection complete: ${totalFound} entities found, ${totalEmitted} events emitted, ${totalErrors} errors`);
|
|
378
|
+
}
|
|
379
|
+
async handleJobFailure(job, error) {
|
|
380
|
+
await super.handleJobFailure(job, error);
|
|
381
|
+
if (job.status === "failed" && job.metadata.type === "detection") {
|
|
382
|
+
const detJob = job;
|
|
383
|
+
await this.eventStore.appendEvent({
|
|
384
|
+
type: "job.failed",
|
|
385
|
+
resourceId: detJob.params.resourceId,
|
|
386
|
+
userId: detJob.metadata.userId,
|
|
387
|
+
version: 1,
|
|
388
|
+
payload: {
|
|
389
|
+
jobId: detJob.metadata.id,
|
|
390
|
+
jobType: detJob.metadata.type,
|
|
391
|
+
error: "Entity detection failed. Please try again later."
|
|
392
|
+
}
|
|
393
|
+
});
|
|
394
|
+
}
|
|
71
395
|
}
|
|
72
|
-
};
|
|
73
|
-
|
|
74
|
-
// src/annotation-context.ts
|
|
75
|
-
import { generateResourceSummary, generateText } from "@semiont/inference";
|
|
76
|
-
import {
|
|
77
|
-
getBodySource,
|
|
78
|
-
getTargetSource,
|
|
79
|
-
getTargetSelector,
|
|
80
|
-
getResourceEntityTypes,
|
|
81
|
-
getTextPositionSelector,
|
|
82
|
-
getPrimaryRepresentation as getPrimaryRepresentation2,
|
|
83
|
-
decodeRepresentation as decodeRepresentation2
|
|
84
|
-
} from "@semiont/api-client";
|
|
85
|
-
import { FilesystemRepresentationStore as FilesystemRepresentationStore2 } from "@semiont/content";
|
|
86
|
-
import { FilesystemViewStorage as FilesystemViewStorage2 } from "@semiont/event-sourcing";
|
|
87
|
-
import { resourceId as createResourceId, uriToResourceId } from "@semiont/core";
|
|
88
|
-
import { getEntityTypes } from "@semiont/ontology";
|
|
89
|
-
var AnnotationContext = class {
|
|
90
396
|
/**
|
|
91
|
-
*
|
|
92
|
-
*
|
|
93
|
-
* @param annotationUri - Full annotation URI (e.g., http://localhost:4000/annotations/abc123)
|
|
94
|
-
* @param resourceId - Source resource ID
|
|
95
|
-
* @param config - Application configuration
|
|
96
|
-
* @param options - Context building options
|
|
97
|
-
* @returns Rich context for LLM processing
|
|
98
|
-
* @throws Error if annotation or resource not found
|
|
397
|
+
* Update job progress and emit events to Event Store
|
|
398
|
+
* Overrides base class to also emit job progress events
|
|
99
399
|
*/
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
contextWindow = 1e3
|
|
105
|
-
} = options;
|
|
106
|
-
if (contextWindow < 100 || contextWindow > 5e3) {
|
|
107
|
-
throw new Error("contextWindow must be between 100 and 5000");
|
|
108
|
-
}
|
|
109
|
-
console.log(`[AnnotationContext] buildLLMContext called with annotationUri=${annotationUri2}, resourceId=${resourceId2}`);
|
|
110
|
-
const basePath = config.services.filesystem.path;
|
|
111
|
-
console.log(`[AnnotationContext] basePath=${basePath}`);
|
|
112
|
-
const projectRoot = config._metadata?.projectRoot;
|
|
113
|
-
const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
|
|
114
|
-
const repStore = new FilesystemRepresentationStore2({ basePath }, projectRoot);
|
|
115
|
-
console.log(`[AnnotationContext] Getting view for resourceId=${resourceId2}`);
|
|
116
|
-
let sourceView;
|
|
117
|
-
try {
|
|
118
|
-
sourceView = await viewStorage.get(resourceId2);
|
|
119
|
-
console.log(`[AnnotationContext] Got view:`, !!sourceView);
|
|
120
|
-
if (!sourceView) {
|
|
121
|
-
throw new Error("Source resource not found");
|
|
122
|
-
}
|
|
123
|
-
} catch (error) {
|
|
124
|
-
console.error(`[AnnotationContext] Error getting view:`, error);
|
|
125
|
-
throw error;
|
|
126
|
-
}
|
|
127
|
-
console.log(`[AnnotationContext] Looking for annotation ${annotationUri2} in resource ${resourceId2}`);
|
|
128
|
-
console.log(`[AnnotationContext] View has ${sourceView.annotations.annotations.length} annotations`);
|
|
129
|
-
console.log(`[AnnotationContext] First 5 annotation IDs:`, sourceView.annotations.annotations.slice(0, 5).map((a) => a.id));
|
|
130
|
-
const annotation = sourceView.annotations.annotations.find((a) => a.id === annotationUri2);
|
|
131
|
-
console.log(`[AnnotationContext] Found annotation:`, !!annotation);
|
|
132
|
-
if (!annotation) {
|
|
133
|
-
throw new Error("Annotation not found in view");
|
|
134
|
-
}
|
|
135
|
-
const targetSource = getTargetSource(annotation.target);
|
|
136
|
-
const targetResourceId = targetSource.split("/").pop();
|
|
137
|
-
console.log(`[AnnotationContext] Target source: ${targetSource}, Expected resource ID: ${resourceId2}, Extracted ID: ${targetResourceId}`);
|
|
138
|
-
if (targetResourceId !== resourceId2) {
|
|
139
|
-
throw new Error(`Annotation target resource ID (${targetResourceId}) does not match expected resource ID (${resourceId2})`);
|
|
400
|
+
async updateJobProgress(job) {
|
|
401
|
+
await super.updateJobProgress(job);
|
|
402
|
+
if (job.metadata.type !== "detection") {
|
|
403
|
+
return;
|
|
140
404
|
}
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
let targetDoc = null;
|
|
144
|
-
if (bodySource) {
|
|
145
|
-
const parts = bodySource.split("/");
|
|
146
|
-
const lastPart = parts[parts.length - 1];
|
|
147
|
-
if (!lastPart) {
|
|
148
|
-
throw new Error(`Invalid body source URI: ${bodySource}`);
|
|
149
|
-
}
|
|
150
|
-
const targetResourceId2 = createResourceId(lastPart);
|
|
151
|
-
const targetView = await viewStorage.get(targetResourceId2);
|
|
152
|
-
targetDoc = targetView?.resource || null;
|
|
405
|
+
if (job.status !== "running") {
|
|
406
|
+
return;
|
|
153
407
|
}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
const end = selector.end;
|
|
171
|
-
const before = contentStr.slice(Math.max(0, start - contextWindow), start);
|
|
172
|
-
const selected = contentStr.slice(start, end);
|
|
173
|
-
const after = contentStr.slice(end, Math.min(contentStr.length, end + contextWindow));
|
|
174
|
-
sourceContext = { before, selected, after };
|
|
175
|
-
console.log(`[AnnotationContext] Built source context using TextPositionSelector (${start}-${end})`);
|
|
176
|
-
} else if (targetSelector.type === "TextQuoteSelector") {
|
|
177
|
-
const selector = targetSelector;
|
|
178
|
-
const exact = selector.exact;
|
|
179
|
-
const index = contentStr.indexOf(exact);
|
|
180
|
-
if (index !== -1) {
|
|
181
|
-
const start = index;
|
|
182
|
-
const end = index + exact.length;
|
|
183
|
-
const before = contentStr.slice(Math.max(0, start - contextWindow), start);
|
|
184
|
-
const selected = exact;
|
|
185
|
-
const after = contentStr.slice(end, Math.min(contentStr.length, end + contextWindow));
|
|
186
|
-
sourceContext = { before, selected, after };
|
|
187
|
-
console.log(`[AnnotationContext] Built source context using TextQuoteSelector (found at ${index})`);
|
|
188
|
-
} else {
|
|
189
|
-
console.warn(`[AnnotationContext] TextQuoteSelector exact text not found in content: "${exact.substring(0, 50)}..."`);
|
|
408
|
+
const detJob = job;
|
|
409
|
+
const baseEvent = {
|
|
410
|
+
resourceId: detJob.params.resourceId,
|
|
411
|
+
userId: detJob.metadata.userId,
|
|
412
|
+
version: 1
|
|
413
|
+
};
|
|
414
|
+
const isFirstUpdate = detJob.progress.processedEntityTypes === 0;
|
|
415
|
+
const isFinalUpdate = detJob.progress.processedEntityTypes === detJob.progress.totalEntityTypes && detJob.progress.totalEntityTypes > 0;
|
|
416
|
+
if (isFirstUpdate) {
|
|
417
|
+
await this.eventStore.appendEvent({
|
|
418
|
+
type: "job.started",
|
|
419
|
+
...baseEvent,
|
|
420
|
+
payload: {
|
|
421
|
+
jobId: detJob.metadata.id,
|
|
422
|
+
jobType: detJob.metadata.type,
|
|
423
|
+
totalSteps: detJob.params.entityTypes.length
|
|
190
424
|
}
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
|
|
425
|
+
});
|
|
426
|
+
} else if (isFinalUpdate) {
|
|
427
|
+
await this.eventStore.appendEvent({
|
|
428
|
+
type: "job.completed",
|
|
429
|
+
...baseEvent,
|
|
430
|
+
payload: {
|
|
431
|
+
jobId: detJob.metadata.id,
|
|
432
|
+
jobType: detJob.metadata.type,
|
|
433
|
+
foundCount: detJob.progress.entitiesFound
|
|
434
|
+
}
|
|
435
|
+
});
|
|
436
|
+
} else {
|
|
437
|
+
const percentage = Math.round(detJob.progress.processedEntityTypes / detJob.progress.totalEntityTypes * 100);
|
|
438
|
+
await this.eventStore.appendEvent({
|
|
439
|
+
type: "job.progress",
|
|
440
|
+
...baseEvent,
|
|
441
|
+
payload: {
|
|
442
|
+
jobId: detJob.metadata.id,
|
|
443
|
+
jobType: detJob.metadata.type,
|
|
444
|
+
percentage,
|
|
445
|
+
currentStep: detJob.progress.currentEntityType,
|
|
446
|
+
processedSteps: detJob.progress.processedEntityTypes,
|
|
447
|
+
totalSteps: detJob.progress.totalEntityTypes,
|
|
448
|
+
foundCount: detJob.progress.entitiesFound
|
|
449
|
+
}
|
|
450
|
+
});
|
|
194
451
|
}
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
452
|
+
}
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
// src/jobs/generation-worker.ts
|
|
456
|
+
import { JobWorker as JobWorker2 } from "@semiont/jobs";
|
|
457
|
+
import { FilesystemRepresentationStore as FilesystemRepresentationStore2 } from "@semiont/content";
|
|
458
|
+
|
|
459
|
+
// src/generation/resource-generation.ts
|
|
460
|
+
import { getLocaleEnglishName } from "@semiont/api-client";
|
|
461
|
+
function getLanguageName(locale) {
|
|
462
|
+
return getLocaleEnglishName(locale) || locale;
|
|
463
|
+
}
|
|
464
|
+
async function generateResourceFromTopic(topic, entityTypes, client, userPrompt, locale, context, temperature, maxTokens) {
|
|
465
|
+
console.log("generateResourceFromTopic called with:", {
|
|
466
|
+
topic: topic.substring(0, 100),
|
|
467
|
+
entityTypes,
|
|
468
|
+
hasUserPrompt: !!userPrompt,
|
|
469
|
+
locale,
|
|
470
|
+
hasContext: !!context,
|
|
471
|
+
temperature,
|
|
472
|
+
maxTokens
|
|
473
|
+
});
|
|
474
|
+
const finalTemperature = temperature ?? 0.7;
|
|
475
|
+
const finalMaxTokens = maxTokens ?? 500;
|
|
476
|
+
const languageInstruction = locale && locale !== "en" ? `
|
|
477
|
+
|
|
478
|
+
IMPORTANT: Write the entire resource in ${getLanguageName(locale)}.` : "";
|
|
479
|
+
let contextSection = "";
|
|
480
|
+
if (context?.sourceContext) {
|
|
481
|
+
const { before, selected, after } = context.sourceContext;
|
|
482
|
+
contextSection = `
|
|
483
|
+
|
|
484
|
+
Source document context:
|
|
485
|
+
---
|
|
486
|
+
${before ? `...${before}` : ""}
|
|
487
|
+
**[${selected}]**
|
|
488
|
+
${after ? `${after}...` : ""}
|
|
489
|
+
---
|
|
490
|
+
`;
|
|
491
|
+
}
|
|
492
|
+
const prompt = `Generate a concise, informative resource about "${topic}".
|
|
493
|
+
${entityTypes.length > 0 ? `Focus on these entity types: ${entityTypes.join(", ")}.` : ""}
|
|
494
|
+
${userPrompt ? `Additional context: ${userPrompt}` : ""}${contextSection}${languageInstruction}
|
|
495
|
+
|
|
496
|
+
Requirements:
|
|
497
|
+
- Start with a clear heading (# Title)
|
|
498
|
+
- Write 2-3 paragraphs of substantive content
|
|
499
|
+
- Be factual and informative
|
|
500
|
+
- Use markdown formatting
|
|
501
|
+
- Return ONLY the markdown content, no JSON, no code fences, no additional wrapper`;
|
|
502
|
+
const parseResponse = (response2) => {
|
|
503
|
+
let content = response2.trim();
|
|
504
|
+
if (content.startsWith("```markdown") || content.startsWith("```md")) {
|
|
505
|
+
content = content.slice(content.indexOf("\n") + 1);
|
|
506
|
+
const endIndex = content.lastIndexOf("```");
|
|
507
|
+
if (endIndex !== -1) {
|
|
508
|
+
content = content.slice(0, endIndex);
|
|
205
509
|
}
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
selected: sourceContext.selected,
|
|
212
|
-
after: sourceContext.after || ""
|
|
213
|
-
},
|
|
214
|
-
metadata: {
|
|
215
|
-
resourceType: "document",
|
|
216
|
-
language: sourceDoc.language,
|
|
217
|
-
entityTypes: getEntityTypes(annotation)
|
|
510
|
+
} else if (content.startsWith("```")) {
|
|
511
|
+
content = content.slice(3);
|
|
512
|
+
const endIndex = content.lastIndexOf("```");
|
|
513
|
+
if (endIndex !== -1) {
|
|
514
|
+
content = content.slice(0, endIndex);
|
|
218
515
|
}
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
...generationContext ? { context: generationContext } : {},
|
|
225
|
-
...sourceContext ? { sourceContext } : {},
|
|
226
|
-
// Keep for backward compatibility
|
|
227
|
-
...targetContext ? { targetContext } : {},
|
|
228
|
-
...suggestedResolution ? { suggestedResolution } : {}
|
|
516
|
+
}
|
|
517
|
+
content = content.trim();
|
|
518
|
+
return {
|
|
519
|
+
title: topic,
|
|
520
|
+
content
|
|
229
521
|
};
|
|
230
|
-
|
|
522
|
+
};
|
|
523
|
+
console.log("Sending prompt to inference (length:", prompt.length, "chars)", "temp:", finalTemperature, "maxTokens:", finalMaxTokens);
|
|
524
|
+
const response = await client.generateText(prompt, finalMaxTokens, finalTemperature);
|
|
525
|
+
console.log("Got raw response (length:", response.length, "chars)");
|
|
526
|
+
const result = parseResponse(response);
|
|
527
|
+
console.log("Parsed result:", {
|
|
528
|
+
hasTitle: !!result.title,
|
|
529
|
+
titleLength: result.title?.length,
|
|
530
|
+
hasContent: !!result.content,
|
|
531
|
+
contentLength: result.content?.length
|
|
532
|
+
});
|
|
533
|
+
return result;
|
|
534
|
+
}
|
|
535
|
+
async function generateResourceSummary(resourceName, content, entityTypes, client) {
|
|
536
|
+
const truncatedContent = content.length > 2e3 ? content.substring(0, 2e3) + "..." : content;
|
|
537
|
+
const prompt = `Create a brief, intelligent summary of this resource titled "${resourceName}".
|
|
538
|
+
${entityTypes.length > 0 ? `Key entity types: ${entityTypes.join(", ")}` : ""}
|
|
539
|
+
|
|
540
|
+
Resource content:
|
|
541
|
+
${truncatedContent}
|
|
542
|
+
|
|
543
|
+
Write a 2-3 sentence summary that captures the key points and would help someone understand what this resource contains.`;
|
|
544
|
+
return await client.generateText(prompt, 150, 0.5);
|
|
545
|
+
}
|
|
546
|
+
async function generateReferenceSuggestions(referenceTitle, client, entityType, currentContent) {
|
|
547
|
+
const prompt = `For a reference titled "${referenceTitle}"${entityType ? ` (type: ${entityType})` : ""}${currentContent ? ` with current stub: "${currentContent}"` : ""}, suggest 3 specific, actionable next steps or related topics to explore.
|
|
548
|
+
|
|
549
|
+
Format as a simple list, one suggestion per line.`;
|
|
550
|
+
const response = await client.generateText(prompt, 200, 0.8);
|
|
551
|
+
if (!response) {
|
|
552
|
+
return null;
|
|
231
553
|
}
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
554
|
+
return response.split("\n").map((line) => line.replace(/^[-*•]\s*/, "").trim()).filter((line) => line.length > 0).slice(0, 3);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// src/jobs/generation-worker.ts
|
|
558
|
+
import {
|
|
559
|
+
getTargetSelector,
|
|
560
|
+
getExactText,
|
|
561
|
+
resourceUri,
|
|
562
|
+
annotationUri
|
|
563
|
+
} from "@semiont/api-client";
|
|
564
|
+
import { getEntityTypes } from "@semiont/ontology";
|
|
565
|
+
import {
|
|
566
|
+
CREATION_METHODS,
|
|
567
|
+
generateUuid,
|
|
568
|
+
resourceId,
|
|
569
|
+
annotationId
|
|
570
|
+
} from "@semiont/core";
|
|
571
|
+
var GenerationWorker = class extends JobWorker2 {
|
|
572
|
+
constructor(jobQueue, config, eventStore, inferenceClient) {
|
|
573
|
+
super(jobQueue);
|
|
574
|
+
this.config = config;
|
|
575
|
+
this.eventStore = eventStore;
|
|
576
|
+
this.inferenceClient = inferenceClient;
|
|
248
577
|
}
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
* @returns Array of all annotation objects
|
|
252
|
-
*/
|
|
253
|
-
static async getAllAnnotations(resourceId2, config) {
|
|
254
|
-
const annotations = await this.getResourceAnnotations(resourceId2, config);
|
|
255
|
-
return await this.enrichResolvedReferences(annotations.annotations, config);
|
|
578
|
+
getWorkerName() {
|
|
579
|
+
return "GenerationWorker";
|
|
256
580
|
}
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
if (!config.services?.filesystem?.path) {
|
|
264
|
-
return annotations;
|
|
581
|
+
canProcessJob(job) {
|
|
582
|
+
return job.metadata.type === "generation";
|
|
583
|
+
}
|
|
584
|
+
async executeJob(job) {
|
|
585
|
+
if (job.metadata.type !== "generation") {
|
|
586
|
+
throw new Error(`Invalid job type: ${job.metadata.type}`);
|
|
265
587
|
}
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
588
|
+
if (job.status !== "running") {
|
|
589
|
+
throw new Error(`Job must be in running state to execute, got: ${job.status}`);
|
|
590
|
+
}
|
|
591
|
+
await this.processGenerationJob(job);
|
|
592
|
+
}
|
|
593
|
+
async processGenerationJob(job) {
|
|
594
|
+
console.log(`[GenerationWorker] Processing generation for reference ${job.params.referenceId} (job: ${job.metadata.id})`);
|
|
595
|
+
const basePath = this.config.services.filesystem.path;
|
|
596
|
+
const projectRoot = this.config._metadata?.projectRoot;
|
|
597
|
+
const repStore = new FilesystemRepresentationStore2({ basePath }, projectRoot);
|
|
598
|
+
let updatedJob = {
|
|
599
|
+
...job,
|
|
600
|
+
progress: {
|
|
601
|
+
stage: "fetching",
|
|
602
|
+
percentage: 20,
|
|
603
|
+
message: "Fetching source resource..."
|
|
275
604
|
}
|
|
605
|
+
};
|
|
606
|
+
console.log(`[GenerationWorker] \u{1F4E5} ${updatedJob.progress.message}`);
|
|
607
|
+
await this.updateJobProgress(updatedJob);
|
|
608
|
+
const { FilesystemViewStorage: FilesystemViewStorage3 } = await import("@semiont/event-sourcing");
|
|
609
|
+
const viewStorage = new FilesystemViewStorage3(basePath, projectRoot);
|
|
610
|
+
const view = await viewStorage.get(job.params.sourceResourceId);
|
|
611
|
+
if (!view) {
|
|
612
|
+
throw new Error(`Resource ${job.params.sourceResourceId} not found`);
|
|
276
613
|
}
|
|
277
|
-
|
|
278
|
-
|
|
614
|
+
const projection = view.annotations;
|
|
615
|
+
const expectedAnnotationUri = `${this.config.services.backend.publicURL}/annotations/${job.params.referenceId}`;
|
|
616
|
+
const annotation = projection.annotations.find(
|
|
617
|
+
(a) => a.id === expectedAnnotationUri && a.motivation === "linking"
|
|
618
|
+
);
|
|
619
|
+
if (!annotation) {
|
|
620
|
+
throw new Error(`Annotation ${job.params.referenceId} not found in resource ${job.params.sourceResourceId}`);
|
|
279
621
|
}
|
|
280
|
-
const
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
} catch (e) {
|
|
622
|
+
const sourceResource = await ResourceContext.getResourceMetadata(job.params.sourceResourceId, this.config);
|
|
623
|
+
if (!sourceResource) {
|
|
624
|
+
throw new Error(`Source resource ${job.params.sourceResourceId} not found`);
|
|
625
|
+
}
|
|
626
|
+
const targetSelector = getTargetSelector(annotation.target);
|
|
627
|
+
const resourceName = job.params.title || (targetSelector ? getExactText(targetSelector) : "") || "New Resource";
|
|
628
|
+
console.log(`[GenerationWorker] Generating resource: "${resourceName}"`);
|
|
629
|
+
if (!job.params.context) {
|
|
630
|
+
throw new Error("Generation context is required but was not provided in job");
|
|
631
|
+
}
|
|
632
|
+
console.log(`[GenerationWorker] Using pre-fetched context: ${job.params.context.sourceContext?.before?.length || 0} chars before, ${job.params.context.sourceContext?.selected?.length || 0} chars selected, ${job.params.context.sourceContext?.after?.length || 0} chars after`);
|
|
633
|
+
updatedJob = {
|
|
634
|
+
...updatedJob,
|
|
635
|
+
progress: {
|
|
636
|
+
stage: "generating",
|
|
637
|
+
percentage: 40,
|
|
638
|
+
message: "Creating content with AI..."
|
|
298
639
|
}
|
|
299
|
-
|
|
640
|
+
};
|
|
641
|
+
console.log(`[GenerationWorker] \u{1F916} ${updatedJob.progress.message}`);
|
|
642
|
+
await this.updateJobProgress(updatedJob);
|
|
643
|
+
const prompt = job.params.prompt || `Create a comprehensive resource about "${resourceName}"`;
|
|
644
|
+
const annotationEntityTypes = getEntityTypes({ body: annotation.body });
|
|
645
|
+
const generatedContent = await generateResourceFromTopic(
|
|
646
|
+
resourceName,
|
|
647
|
+
job.params.entityTypes || annotationEntityTypes,
|
|
648
|
+
this.inferenceClient,
|
|
649
|
+
prompt,
|
|
650
|
+
job.params.language,
|
|
651
|
+
job.params.context,
|
|
652
|
+
// NEW - context from job (passed from modal)
|
|
653
|
+
job.params.temperature,
|
|
654
|
+
// NEW - from job
|
|
655
|
+
job.params.maxTokens
|
|
656
|
+
// NEW - from job
|
|
657
|
+
);
|
|
658
|
+
console.log(`[GenerationWorker] \u2705 Generated ${generatedContent.content.length} bytes of content`);
|
|
659
|
+
updatedJob = {
|
|
660
|
+
...updatedJob,
|
|
661
|
+
progress: {
|
|
662
|
+
stage: "generating",
|
|
663
|
+
percentage: 70,
|
|
664
|
+
message: "Content ready, creating resource..."
|
|
665
|
+
}
|
|
666
|
+
};
|
|
667
|
+
await this.updateJobProgress(updatedJob);
|
|
668
|
+
const rId = resourceId(generateUuid());
|
|
669
|
+
updatedJob = {
|
|
670
|
+
...updatedJob,
|
|
671
|
+
progress: {
|
|
672
|
+
stage: "creating",
|
|
673
|
+
percentage: 85,
|
|
674
|
+
message: "Saving resource..."
|
|
675
|
+
}
|
|
676
|
+
};
|
|
677
|
+
console.log(`[GenerationWorker] \u{1F4BE} ${updatedJob.progress.message}`);
|
|
678
|
+
await this.updateJobProgress(updatedJob);
|
|
679
|
+
const storedRep = await repStore.store(Buffer.from(generatedContent.content), {
|
|
680
|
+
mediaType: "text/markdown",
|
|
681
|
+
rel: "original"
|
|
300
682
|
});
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
683
|
+
console.log(`[GenerationWorker] \u2705 Saved resource representation to filesystem: ${rId}`);
|
|
684
|
+
await this.eventStore.appendEvent({
|
|
685
|
+
type: "resource.created",
|
|
686
|
+
resourceId: rId,
|
|
687
|
+
userId: job.metadata.userId,
|
|
688
|
+
version: 1,
|
|
689
|
+
payload: {
|
|
690
|
+
name: resourceName,
|
|
691
|
+
format: "text/markdown",
|
|
692
|
+
contentChecksum: storedRep.checksum,
|
|
693
|
+
creationMethod: CREATION_METHODS.GENERATED,
|
|
694
|
+
entityTypes: job.params.entityTypes || annotationEntityTypes,
|
|
695
|
+
language: job.params.language,
|
|
696
|
+
isDraft: true,
|
|
697
|
+
generatedFrom: job.params.referenceId,
|
|
698
|
+
generationPrompt: void 0
|
|
699
|
+
// Could be added if we track the prompt
|
|
306
700
|
}
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
701
|
+
});
|
|
702
|
+
console.log(`[GenerationWorker] Emitted resource.created event for ${rId}`);
|
|
703
|
+
updatedJob = {
|
|
704
|
+
...updatedJob,
|
|
705
|
+
progress: {
|
|
706
|
+
stage: "linking",
|
|
707
|
+
percentage: 95,
|
|
708
|
+
message: "Linking reference...",
|
|
709
|
+
resultResourceId: rId
|
|
710
|
+
// Store for job.completed event
|
|
711
|
+
}
|
|
712
|
+
};
|
|
713
|
+
console.log(`[GenerationWorker] \u{1F517} ${updatedJob.progress.message}`);
|
|
714
|
+
await this.updateJobProgress(updatedJob);
|
|
715
|
+
const newResourceUri = resourceUri(`${this.config.services.backend.publicURL}/resources/${rId}`);
|
|
716
|
+
const operations = [{
|
|
717
|
+
op: "add",
|
|
718
|
+
item: {
|
|
719
|
+
type: "SpecificResource",
|
|
720
|
+
source: newResourceUri,
|
|
721
|
+
purpose: "linking"
|
|
722
|
+
}
|
|
723
|
+
}];
|
|
724
|
+
const annotationIdSegment = job.params.referenceId.split("/").pop();
|
|
725
|
+
await this.eventStore.appendEvent({
|
|
726
|
+
type: "annotation.body.updated",
|
|
727
|
+
resourceId: job.params.sourceResourceId,
|
|
728
|
+
userId: job.metadata.userId,
|
|
729
|
+
version: 1,
|
|
730
|
+
payload: {
|
|
731
|
+
annotationId: annotationId(annotationIdSegment),
|
|
732
|
+
operations
|
|
323
733
|
}
|
|
324
|
-
return ann;
|
|
325
734
|
});
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
updatedAt: annotations.updatedAt
|
|
735
|
+
console.log(`[GenerationWorker] \u2705 Emitted annotation.body.updated event linking ${job.params.referenceId} \u2192 ${rId}`);
|
|
736
|
+
updatedJob = {
|
|
737
|
+
...updatedJob,
|
|
738
|
+
progress: {
|
|
739
|
+
stage: "linking",
|
|
740
|
+
percentage: 100,
|
|
741
|
+
message: "Complete!",
|
|
742
|
+
resultResourceId: rId
|
|
743
|
+
// Store for job.completed event
|
|
744
|
+
}
|
|
337
745
|
};
|
|
746
|
+
await this.updateJobProgress(updatedJob);
|
|
747
|
+
console.log(`[GenerationWorker] \u2705 Generation complete: created resource ${rId}`);
|
|
338
748
|
}
|
|
339
749
|
/**
|
|
340
|
-
*
|
|
750
|
+
* Update job progress and emit events to Event Store
|
|
751
|
+
* Overrides base class to also emit job progress events
|
|
341
752
|
*/
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
753
|
+
async updateJobProgress(job) {
|
|
754
|
+
await super.updateJobProgress(job);
|
|
755
|
+
if (job.metadata.type !== "generation") {
|
|
756
|
+
return;
|
|
757
|
+
}
|
|
758
|
+
if (job.status !== "running") {
|
|
759
|
+
return;
|
|
760
|
+
}
|
|
761
|
+
const genJob = job;
|
|
762
|
+
const baseEvent = {
|
|
763
|
+
resourceId: genJob.params.sourceResourceId,
|
|
764
|
+
userId: genJob.metadata.userId,
|
|
765
|
+
version: 1
|
|
766
|
+
};
|
|
767
|
+
if (genJob.progress.stage === "fetching" && genJob.progress.percentage === 20) {
|
|
768
|
+
await this.eventStore.appendEvent({
|
|
769
|
+
type: "job.started",
|
|
770
|
+
...baseEvent,
|
|
771
|
+
payload: {
|
|
772
|
+
jobId: genJob.metadata.id,
|
|
773
|
+
jobType: genJob.metadata.type,
|
|
774
|
+
totalSteps: 5
|
|
775
|
+
// fetching, generating, creating, linking, complete
|
|
776
|
+
}
|
|
777
|
+
});
|
|
778
|
+
} else if (genJob.progress.stage === "linking" && genJob.progress.percentage === 100) {
|
|
779
|
+
await this.eventStore.appendEvent({
|
|
780
|
+
type: "job.completed",
|
|
781
|
+
...baseEvent,
|
|
782
|
+
payload: {
|
|
783
|
+
jobId: genJob.metadata.id,
|
|
784
|
+
jobType: genJob.metadata.type,
|
|
785
|
+
resultResourceId: genJob.progress.resultResourceId,
|
|
786
|
+
annotationUri: annotationUri(`${this.config.services.backend.publicURL}/annotations/${genJob.params.referenceId}`)
|
|
787
|
+
}
|
|
788
|
+
});
|
|
789
|
+
} else {
|
|
790
|
+
await this.eventStore.appendEvent({
|
|
791
|
+
type: "job.progress",
|
|
792
|
+
...baseEvent,
|
|
793
|
+
payload: {
|
|
794
|
+
jobId: genJob.metadata.id,
|
|
795
|
+
jobType: genJob.metadata.type,
|
|
796
|
+
currentStep: genJob.progress.stage,
|
|
797
|
+
percentage: genJob.progress.percentage,
|
|
798
|
+
message: genJob.progress.message
|
|
799
|
+
}
|
|
800
|
+
});
|
|
345
801
|
}
|
|
346
|
-
const basePath = config.services.filesystem.path;
|
|
347
|
-
const projectRoot = config._metadata?.projectRoot;
|
|
348
|
-
const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
|
|
349
|
-
return await viewStorage.exists(resourceId2);
|
|
350
802
|
}
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
803
|
+
};
|
|
804
|
+
|
|
805
|
+
// src/jobs/highlight-detection-worker.ts
|
|
806
|
+
import { JobWorker as JobWorker3 } from "@semiont/jobs";
|
|
807
|
+
import { generateAnnotationId as generateAnnotationId2 } from "@semiont/event-sourcing";
|
|
808
|
+
import { resourceIdToURI as resourceIdToURI2 } from "@semiont/core";
|
|
809
|
+
import { userId } from "@semiont/core";
|
|
810
|
+
var HighlightDetectionWorker = class extends JobWorker3 {
|
|
811
|
+
constructor(jobQueue, config, eventStore, inferenceClient) {
|
|
812
|
+
super(jobQueue);
|
|
813
|
+
this.config = config;
|
|
814
|
+
this.eventStore = eventStore;
|
|
815
|
+
this.inferenceClient = inferenceClient;
|
|
361
816
|
}
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
817
|
+
isFirstProgress = true;
|
|
818
|
+
getWorkerName() {
|
|
819
|
+
return "HighlightDetectionWorker";
|
|
820
|
+
}
|
|
821
|
+
canProcessJob(job) {
|
|
822
|
+
return job.metadata.type === "highlight-detection";
|
|
823
|
+
}
|
|
824
|
+
async executeJob(job) {
|
|
825
|
+
if (job.metadata.type !== "highlight-detection") {
|
|
826
|
+
throw new Error(`Invalid job type: ${job.metadata.type}`);
|
|
370
827
|
}
|
|
371
|
-
|
|
828
|
+
if (job.status !== "running") {
|
|
829
|
+
throw new Error(`Job must be in running state to execute, got: ${job.status}`);
|
|
830
|
+
}
|
|
831
|
+
this.isFirstProgress = true;
|
|
832
|
+
await this.processHighlightDetectionJob(job);
|
|
372
833
|
}
|
|
373
834
|
/**
|
|
374
|
-
*
|
|
835
|
+
* Override updateJobProgress to emit events to Event Store
|
|
375
836
|
*/
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
if (!annotation) {
|
|
382
|
-
throw new Error("Annotation not found");
|
|
837
|
+
async updateJobProgress(job) {
|
|
838
|
+
await super.updateJobProgress(job);
|
|
839
|
+
if (job.metadata.type !== "highlight-detection") return;
|
|
840
|
+
if (job.status !== "running") {
|
|
841
|
+
return;
|
|
383
842
|
}
|
|
384
|
-
const
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
843
|
+
const hlJob = job;
|
|
844
|
+
const baseEvent = {
|
|
845
|
+
resourceId: hlJob.params.resourceId,
|
|
846
|
+
userId: hlJob.metadata.userId,
|
|
847
|
+
version: 1
|
|
848
|
+
};
|
|
849
|
+
const isComplete = hlJob.progress.percentage === 100;
|
|
850
|
+
if (this.isFirstProgress) {
|
|
851
|
+
this.isFirstProgress = false;
|
|
852
|
+
await this.eventStore.appendEvent({
|
|
853
|
+
type: "job.started",
|
|
854
|
+
...baseEvent,
|
|
855
|
+
payload: {
|
|
856
|
+
jobId: hlJob.metadata.id,
|
|
857
|
+
jobType: hlJob.metadata.type
|
|
858
|
+
}
|
|
859
|
+
});
|
|
860
|
+
} else if (isComplete) {
|
|
861
|
+
await this.eventStore.appendEvent({
|
|
862
|
+
type: "job.completed",
|
|
863
|
+
...baseEvent,
|
|
864
|
+
payload: {
|
|
865
|
+
jobId: hlJob.metadata.id,
|
|
866
|
+
jobType: hlJob.metadata.type
|
|
867
|
+
// Note: result would come from job.result, but that's handled by base class
|
|
868
|
+
}
|
|
869
|
+
});
|
|
870
|
+
} else {
|
|
871
|
+
await this.eventStore.appendEvent({
|
|
872
|
+
type: "job.progress",
|
|
873
|
+
...baseEvent,
|
|
874
|
+
payload: {
|
|
875
|
+
jobId: hlJob.metadata.id,
|
|
876
|
+
jobType: hlJob.metadata.type,
|
|
877
|
+
progress: hlJob.progress
|
|
878
|
+
}
|
|
879
|
+
});
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
async handleJobFailure(job, error) {
|
|
883
|
+
await super.handleJobFailure(job, error);
|
|
884
|
+
if (job.status === "failed" && job.metadata.type === "highlight-detection") {
|
|
885
|
+
const hlJob = job;
|
|
886
|
+
await this.eventStore.appendEvent({
|
|
887
|
+
type: "job.failed",
|
|
888
|
+
resourceId: hlJob.params.resourceId,
|
|
889
|
+
userId: hlJob.metadata.userId,
|
|
890
|
+
version: 1,
|
|
891
|
+
payload: {
|
|
892
|
+
jobId: hlJob.metadata.id,
|
|
893
|
+
jobType: hlJob.metadata.type,
|
|
894
|
+
error: "Highlight detection failed. Please try again later."
|
|
895
|
+
}
|
|
896
|
+
});
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
async processHighlightDetectionJob(job) {
|
|
900
|
+
console.log(`[HighlightDetectionWorker] Processing highlight detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
|
|
901
|
+
const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
|
|
388
902
|
if (!resource) {
|
|
389
|
-
throw new Error(
|
|
903
|
+
throw new Error(`Resource ${job.params.resourceId} not found`);
|
|
390
904
|
}
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
"@context": resource["@context"],
|
|
398
|
-
"@id": resource["@id"],
|
|
399
|
-
name: resource.name,
|
|
400
|
-
entityTypes: resource.entityTypes,
|
|
401
|
-
representations: resource.representations,
|
|
402
|
-
archived: resource.archived,
|
|
403
|
-
creationMethod: resource.creationMethod,
|
|
404
|
-
wasAttributedTo: resource.wasAttributedTo,
|
|
405
|
-
dateCreated: resource.dateCreated
|
|
905
|
+
let updatedJob = {
|
|
906
|
+
...job,
|
|
907
|
+
progress: {
|
|
908
|
+
stage: "analyzing",
|
|
909
|
+
percentage: 10,
|
|
910
|
+
message: "Loading resource..."
|
|
406
911
|
}
|
|
407
912
|
};
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
913
|
+
await this.updateJobProgress(updatedJob);
|
|
914
|
+
updatedJob = {
|
|
915
|
+
...updatedJob,
|
|
916
|
+
progress: {
|
|
917
|
+
stage: "analyzing",
|
|
918
|
+
percentage: 30,
|
|
919
|
+
message: "Analyzing text..."
|
|
920
|
+
}
|
|
921
|
+
};
|
|
922
|
+
await this.updateJobProgress(updatedJob);
|
|
923
|
+
const highlights = await AnnotationDetection.detectHighlights(
|
|
924
|
+
job.params.resourceId,
|
|
925
|
+
this.config,
|
|
926
|
+
this.inferenceClient,
|
|
927
|
+
job.params.instructions,
|
|
928
|
+
job.params.density
|
|
423
929
|
);
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
const summary = await this.generateSummary(resource, context, annotationEntityTypes, config);
|
|
432
|
-
return {
|
|
433
|
-
summary,
|
|
434
|
-
relevantFields: {
|
|
435
|
-
resourceId: resource.id,
|
|
436
|
-
resourceName: resource.name,
|
|
437
|
-
entityTypes: annotationEntityTypes
|
|
438
|
-
},
|
|
439
|
-
context: {
|
|
440
|
-
before: context.before.substring(Math.max(0, context.before.length - 200)),
|
|
441
|
-
// Last 200 chars
|
|
442
|
-
selected: context.selected,
|
|
443
|
-
after: context.after.substring(0, 200)
|
|
444
|
-
// First 200 chars
|
|
930
|
+
console.log(`[HighlightDetectionWorker] Found ${highlights.length} highlights to create`);
|
|
931
|
+
updatedJob = {
|
|
932
|
+
...updatedJob,
|
|
933
|
+
progress: {
|
|
934
|
+
stage: "creating",
|
|
935
|
+
percentage: 60,
|
|
936
|
+
message: `Creating ${highlights.length} annotations...`
|
|
445
937
|
}
|
|
446
938
|
};
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
const content = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
|
|
457
|
-
return decodeRepresentation2(content, primaryRep.mediaType);
|
|
458
|
-
}
|
|
459
|
-
/**
|
|
460
|
-
* Extract annotation context from resource content
|
|
461
|
-
*/
|
|
462
|
-
static extractAnnotationContext(annotation, contentStr, contextBefore, contextAfter) {
|
|
463
|
-
const targetSelector = getTargetSelector(annotation.target);
|
|
464
|
-
const posSelector = targetSelector ? getTextPositionSelector(targetSelector) : null;
|
|
465
|
-
if (!posSelector) {
|
|
466
|
-
throw new Error("TextPositionSelector required for context");
|
|
939
|
+
await this.updateJobProgress(updatedJob);
|
|
940
|
+
let created = 0;
|
|
941
|
+
for (const highlight of highlights) {
|
|
942
|
+
try {
|
|
943
|
+
await this.createHighlightAnnotation(job.params.resourceId, job.metadata.userId, highlight);
|
|
944
|
+
created++;
|
|
945
|
+
} catch (error) {
|
|
946
|
+
console.error(`[HighlightDetectionWorker] Failed to create highlight:`, error);
|
|
947
|
+
}
|
|
467
948
|
}
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
after: contentStr.substring(selEnd, end)
|
|
949
|
+
updatedJob = {
|
|
950
|
+
...updatedJob,
|
|
951
|
+
progress: {
|
|
952
|
+
stage: "creating",
|
|
953
|
+
percentage: 100,
|
|
954
|
+
message: `Complete! Created ${created} highlights`
|
|
955
|
+
}
|
|
476
956
|
};
|
|
957
|
+
await this.updateJobProgress(updatedJob);
|
|
958
|
+
console.log(`[HighlightDetectionWorker] \u2705 Created ${created}/${highlights.length} highlights`);
|
|
477
959
|
}
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
const
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
960
|
+
async createHighlightAnnotation(resourceId2, creatorUserId, highlight) {
|
|
961
|
+
const backendUrl = this.config.services.backend?.publicURL;
|
|
962
|
+
if (!backendUrl) throw new Error("Backend publicURL not configured");
|
|
963
|
+
const annotationId2 = generateAnnotationId2(backendUrl);
|
|
964
|
+
const resourceUri3 = resourceIdToURI2(resourceId2, backendUrl);
|
|
965
|
+
const annotation = {
|
|
966
|
+
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
967
|
+
"type": "Annotation",
|
|
968
|
+
"id": annotationId2,
|
|
969
|
+
"motivation": "highlighting",
|
|
970
|
+
"creator": userId(creatorUserId),
|
|
971
|
+
"created": (/* @__PURE__ */ new Date()).toISOString(),
|
|
972
|
+
"target": {
|
|
973
|
+
type: "SpecificResource",
|
|
974
|
+
source: resourceUri3,
|
|
975
|
+
selector: [
|
|
976
|
+
{
|
|
977
|
+
type: "TextPositionSelector",
|
|
978
|
+
start: highlight.start,
|
|
979
|
+
end: highlight.end
|
|
980
|
+
},
|
|
981
|
+
{
|
|
982
|
+
type: "TextQuoteSelector",
|
|
983
|
+
exact: highlight.exact,
|
|
984
|
+
...highlight.prefix && { prefix: highlight.prefix },
|
|
985
|
+
...highlight.suffix && { suffix: highlight.suffix }
|
|
986
|
+
}
|
|
987
|
+
]
|
|
988
|
+
},
|
|
989
|
+
"body": []
|
|
990
|
+
// Empty body for highlights
|
|
991
|
+
};
|
|
992
|
+
await this.eventStore.appendEvent({
|
|
993
|
+
type: "annotation.added",
|
|
994
|
+
resourceId: resourceId2,
|
|
995
|
+
userId: userId(creatorUserId),
|
|
996
|
+
version: 1,
|
|
997
|
+
payload: { annotation }
|
|
998
|
+
});
|
|
491
999
|
}
|
|
492
1000
|
};
|
|
493
1001
|
|
|
494
|
-
// src/
|
|
495
|
-
import {
|
|
496
|
-
import {
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
return await graphDb.getResourceReferencedBy(resourceUri2);
|
|
506
|
-
}
|
|
507
|
-
/**
|
|
508
|
-
* Find shortest path between two resources
|
|
509
|
-
* Requires graph traversal - must use graph database
|
|
510
|
-
*/
|
|
511
|
-
static async findPath(fromResourceId, toResourceId, config, maxDepth) {
|
|
512
|
-
const graphDb = await getGraphDatabase(config);
|
|
513
|
-
return await graphDb.findPath(fromResourceId, toResourceId, maxDepth);
|
|
1002
|
+
// src/jobs/assessment-detection-worker.ts
|
|
1003
|
+
import { JobWorker as JobWorker4 } from "@semiont/jobs";
|
|
1004
|
+
import { generateAnnotationId as generateAnnotationId3 } from "@semiont/event-sourcing";
|
|
1005
|
+
import { resourceIdToURI as resourceIdToURI3 } from "@semiont/core";
|
|
1006
|
+
import { userId as userId2 } from "@semiont/core";
|
|
1007
|
+
var AssessmentDetectionWorker = class extends JobWorker4 {
|
|
1008
|
+
constructor(jobQueue, config, eventStore, inferenceClient) {
|
|
1009
|
+
super(jobQueue);
|
|
1010
|
+
this.config = config;
|
|
1011
|
+
this.eventStore = eventStore;
|
|
1012
|
+
this.inferenceClient = inferenceClient;
|
|
514
1013
|
}
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
*/
|
|
519
|
-
static async getResourceConnections(resourceId2, config) {
|
|
520
|
-
const graphDb = await getGraphDatabase(config);
|
|
521
|
-
return await graphDb.getResourceConnections(resourceId2);
|
|
1014
|
+
isFirstProgress = true;
|
|
1015
|
+
getWorkerName() {
|
|
1016
|
+
return "AssessmentDetectionWorker";
|
|
522
1017
|
}
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
* Requires full-text search - must use graph database
|
|
526
|
-
*/
|
|
527
|
-
static async searchResources(query, config, limit) {
|
|
528
|
-
const graphDb = await getGraphDatabase(config);
|
|
529
|
-
return await graphDb.searchResources(query, limit);
|
|
1018
|
+
canProcessJob(job) {
|
|
1019
|
+
return job.metadata.type === "assessment-detection";
|
|
530
1020
|
}
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
import { FilesystemRepresentationStore as FilesystemRepresentationStore3 } from "@semiont/content";
|
|
535
|
-
import { getPrimaryRepresentation as getPrimaryRepresentation3, decodeRepresentation as decodeRepresentation3 } from "@semiont/api-client";
|
|
536
|
-
import {
|
|
537
|
-
MotivationPrompts,
|
|
538
|
-
MotivationParsers,
|
|
539
|
-
generateText as generateText2
|
|
540
|
-
} from "@semiont/inference";
|
|
541
|
-
import { getTagSchema, getSchemaCategory } from "@semiont/ontology";
|
|
542
|
-
var AnnotationDetection = class {
|
|
543
|
-
/**
|
|
544
|
-
* Detect comments in a resource
|
|
545
|
-
*
|
|
546
|
-
* @param resourceId - The resource to analyze
|
|
547
|
-
* @param config - Environment configuration
|
|
548
|
-
* @param instructions - Optional user instructions for comment generation
|
|
549
|
-
* @param tone - Optional tone guidance (e.g., "academic", "conversational")
|
|
550
|
-
* @param density - Optional target number of comments per 2000 words
|
|
551
|
-
* @returns Array of validated comment matches
|
|
552
|
-
*/
|
|
553
|
-
static async detectComments(resourceId2, config, instructions, tone, density) {
|
|
554
|
-
const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
|
|
555
|
-
if (!resource) {
|
|
556
|
-
throw new Error(`Resource ${resourceId2} not found`);
|
|
1021
|
+
async executeJob(job) {
|
|
1022
|
+
if (job.metadata.type !== "assessment-detection") {
|
|
1023
|
+
throw new Error(`Invalid job type: ${job.metadata.type}`);
|
|
557
1024
|
}
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
throw new Error(`Could not load content for resource ${resourceId2}`);
|
|
1025
|
+
if (job.status !== "running") {
|
|
1026
|
+
throw new Error(`Job must be in running state to execute, got: ${job.status}`);
|
|
561
1027
|
}
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
prompt,
|
|
565
|
-
config,
|
|
566
|
-
3e3,
|
|
567
|
-
// maxTokens: Higher than highlights/assessments due to comment text
|
|
568
|
-
0.4
|
|
569
|
-
// temperature: Slightly higher to allow creative context
|
|
570
|
-
);
|
|
571
|
-
return MotivationParsers.parseComments(response, content);
|
|
1028
|
+
this.isFirstProgress = true;
|
|
1029
|
+
await this.processAssessmentDetectionJob(job);
|
|
572
1030
|
}
|
|
573
1031
|
/**
|
|
574
|
-
*
|
|
575
|
-
*
|
|
576
|
-
* @param resourceId - The resource to analyze
|
|
577
|
-
* @param config - Environment configuration
|
|
578
|
-
* @param instructions - Optional user instructions for highlight selection
|
|
579
|
-
* @param density - Optional target number of highlights per 2000 words
|
|
580
|
-
* @returns Array of validated highlight matches
|
|
1032
|
+
* Override updateJobProgress to emit events to Event Store
|
|
581
1033
|
*/
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
if (
|
|
585
|
-
|
|
1034
|
+
async updateJobProgress(job) {
|
|
1035
|
+
await super.updateJobProgress(job);
|
|
1036
|
+
if (job.metadata.type !== "assessment-detection") return;
|
|
1037
|
+
if (job.status !== "running") {
|
|
1038
|
+
return;
|
|
586
1039
|
}
|
|
587
|
-
const
|
|
588
|
-
|
|
589
|
-
|
|
1040
|
+
const assJob = job;
|
|
1041
|
+
const baseEvent = {
|
|
1042
|
+
resourceId: assJob.params.resourceId,
|
|
1043
|
+
userId: assJob.metadata.userId,
|
|
1044
|
+
version: 1
|
|
1045
|
+
};
|
|
1046
|
+
const isComplete = assJob.progress.percentage === 100;
|
|
1047
|
+
if (this.isFirstProgress) {
|
|
1048
|
+
this.isFirstProgress = false;
|
|
1049
|
+
await this.eventStore.appendEvent({
|
|
1050
|
+
type: "job.started",
|
|
1051
|
+
...baseEvent,
|
|
1052
|
+
payload: {
|
|
1053
|
+
jobId: assJob.metadata.id,
|
|
1054
|
+
jobType: assJob.metadata.type
|
|
1055
|
+
}
|
|
1056
|
+
});
|
|
1057
|
+
} else if (isComplete) {
|
|
1058
|
+
await this.eventStore.appendEvent({
|
|
1059
|
+
type: "job.completed",
|
|
1060
|
+
...baseEvent,
|
|
1061
|
+
payload: {
|
|
1062
|
+
jobId: assJob.metadata.id,
|
|
1063
|
+
jobType: assJob.metadata.type
|
|
1064
|
+
// Note: result would come from job.result, but that's handled by base class
|
|
1065
|
+
}
|
|
1066
|
+
});
|
|
1067
|
+
} else {
|
|
1068
|
+
await this.eventStore.appendEvent({
|
|
1069
|
+
type: "job.progress",
|
|
1070
|
+
...baseEvent,
|
|
1071
|
+
payload: {
|
|
1072
|
+
jobId: assJob.metadata.id,
|
|
1073
|
+
jobType: assJob.metadata.type,
|
|
1074
|
+
progress: assJob.progress
|
|
1075
|
+
}
|
|
1076
|
+
});
|
|
590
1077
|
}
|
|
591
|
-
const prompt = MotivationPrompts.buildHighlightPrompt(content, instructions, density);
|
|
592
|
-
const response = await generateText2(
|
|
593
|
-
prompt,
|
|
594
|
-
config,
|
|
595
|
-
2e3,
|
|
596
|
-
// maxTokens: Lower than comments/assessments (no body text)
|
|
597
|
-
0.3
|
|
598
|
-
// temperature: Low for consistent importance judgments
|
|
599
|
-
);
|
|
600
|
-
return MotivationParsers.parseHighlights(response, content);
|
|
601
1078
|
}
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
const content = await this.loadResourceContent(resourceId2, config);
|
|
618
|
-
if (!content) {
|
|
619
|
-
throw new Error(`Could not load content for resource ${resourceId2}`);
|
|
1079
|
+
async handleJobFailure(job, error) {
|
|
1080
|
+
await super.handleJobFailure(job, error);
|
|
1081
|
+
if (job.status === "failed" && job.metadata.type === "assessment-detection") {
|
|
1082
|
+
const aJob = job;
|
|
1083
|
+
await this.eventStore.appendEvent({
|
|
1084
|
+
type: "job.failed",
|
|
1085
|
+
resourceId: aJob.params.resourceId,
|
|
1086
|
+
userId: aJob.metadata.userId,
|
|
1087
|
+
version: 1,
|
|
1088
|
+
payload: {
|
|
1089
|
+
jobId: aJob.metadata.id,
|
|
1090
|
+
jobType: aJob.metadata.type,
|
|
1091
|
+
error: "Assessment detection failed. Please try again later."
|
|
1092
|
+
}
|
|
1093
|
+
});
|
|
620
1094
|
}
|
|
621
|
-
const prompt = MotivationPrompts.buildAssessmentPrompt(content, instructions, tone, density);
|
|
622
|
-
const response = await generateText2(
|
|
623
|
-
prompt,
|
|
624
|
-
config,
|
|
625
|
-
3e3,
|
|
626
|
-
// maxTokens: Higher for assessment text
|
|
627
|
-
0.3
|
|
628
|
-
// temperature: Lower for analytical consistency
|
|
629
|
-
);
|
|
630
|
-
return MotivationParsers.parseAssessments(response, content);
|
|
631
1095
|
}
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
* @param resourceId - The resource to analyze
|
|
636
|
-
* @param config - Environment configuration
|
|
637
|
-
* @param schemaId - The tag schema identifier (e.g., "irac", "imrad")
|
|
638
|
-
* @param category - The specific category to detect
|
|
639
|
-
* @returns Array of validated tag matches
|
|
640
|
-
*/
|
|
641
|
-
static async detectTags(resourceId2, config, schemaId, category) {
|
|
642
|
-
const schema = getTagSchema(schemaId);
|
|
643
|
-
if (!schema) {
|
|
644
|
-
throw new Error(`Invalid tag schema: ${schemaId}`);
|
|
645
|
-
}
|
|
646
|
-
const categoryInfo = getSchemaCategory(schemaId, category);
|
|
647
|
-
if (!categoryInfo) {
|
|
648
|
-
throw new Error(`Invalid category "${category}" for schema ${schemaId}`);
|
|
649
|
-
}
|
|
650
|
-
const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
|
|
1096
|
+
async processAssessmentDetectionJob(job) {
|
|
1097
|
+
console.log(`[AssessmentDetectionWorker] Processing assessment detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
|
|
1098
|
+
const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
|
|
651
1099
|
if (!resource) {
|
|
652
|
-
throw new Error(`Resource ${
|
|
653
|
-
}
|
|
654
|
-
const content = await this.loadResourceContent(resourceId2, config);
|
|
655
|
-
if (!content) {
|
|
656
|
-
throw new Error(`Could not load content for resource ${resourceId2}`);
|
|
1100
|
+
throw new Error(`Resource ${job.params.resourceId} not found`);
|
|
657
1101
|
}
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
);
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
1102
|
+
let updatedJob = {
|
|
1103
|
+
...job,
|
|
1104
|
+
progress: {
|
|
1105
|
+
stage: "analyzing",
|
|
1106
|
+
percentage: 10,
|
|
1107
|
+
message: "Loading resource..."
|
|
1108
|
+
}
|
|
1109
|
+
};
|
|
1110
|
+
await this.updateJobProgress(updatedJob);
|
|
1111
|
+
updatedJob = {
|
|
1112
|
+
...updatedJob,
|
|
1113
|
+
progress: {
|
|
1114
|
+
stage: "analyzing",
|
|
1115
|
+
percentage: 30,
|
|
1116
|
+
message: "Analyzing text..."
|
|
1117
|
+
}
|
|
1118
|
+
};
|
|
1119
|
+
await this.updateJobProgress(updatedJob);
|
|
1120
|
+
const assessments = await AnnotationDetection.detectAssessments(
|
|
1121
|
+
job.params.resourceId,
|
|
1122
|
+
this.config,
|
|
1123
|
+
this.inferenceClient,
|
|
1124
|
+
job.params.instructions,
|
|
1125
|
+
job.params.tone,
|
|
1126
|
+
job.params.density
|
|
674
1127
|
);
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
return null;
|
|
1128
|
+
console.log(`[AssessmentDetectionWorker] Found ${assessments.length} assessments to create`);
|
|
1129
|
+
updatedJob = {
|
|
1130
|
+
...updatedJob,
|
|
1131
|
+
progress: {
|
|
1132
|
+
stage: "creating",
|
|
1133
|
+
percentage: 60,
|
|
1134
|
+
message: `Creating ${assessments.length} annotations...`
|
|
1135
|
+
}
|
|
1136
|
+
};
|
|
1137
|
+
await this.updateJobProgress(updatedJob);
|
|
1138
|
+
let created = 0;
|
|
1139
|
+
for (const assessment of assessments) {
|
|
1140
|
+
try {
|
|
1141
|
+
await this.createAssessmentAnnotation(job.params.resourceId, job.metadata.userId, assessment);
|
|
1142
|
+
created++;
|
|
1143
|
+
} catch (error) {
|
|
1144
|
+
console.error(`[AssessmentDetectionWorker] Failed to create assessment:`, error);
|
|
1145
|
+
}
|
|
694
1146
|
}
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
1147
|
+
updatedJob = {
|
|
1148
|
+
...updatedJob,
|
|
1149
|
+
progress: {
|
|
1150
|
+
stage: "creating",
|
|
1151
|
+
percentage: 100,
|
|
1152
|
+
message: `Complete! Created ${created} assessments`
|
|
1153
|
+
}
|
|
1154
|
+
};
|
|
1155
|
+
await this.updateJobProgress(updatedJob);
|
|
1156
|
+
console.log(`[AssessmentDetectionWorker] \u2705 Created ${created}/${assessments.length} assessments`);
|
|
1157
|
+
}
|
|
1158
|
+
async createAssessmentAnnotation(resourceId2, creatorUserId, assessment) {
|
|
1159
|
+
const backendUrl = this.config.services.backend?.publicURL;
|
|
1160
|
+
if (!backendUrl) throw new Error("Backend publicURL not configured");
|
|
1161
|
+
const annotationId2 = generateAnnotationId3(backendUrl);
|
|
1162
|
+
const resourceUri3 = resourceIdToURI3(resourceId2, backendUrl);
|
|
1163
|
+
const annotation = {
|
|
1164
|
+
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
1165
|
+
"type": "Annotation",
|
|
1166
|
+
"id": annotationId2,
|
|
1167
|
+
"motivation": "assessing",
|
|
1168
|
+
"creator": userId2(creatorUserId),
|
|
1169
|
+
"created": (/* @__PURE__ */ new Date()).toISOString(),
|
|
1170
|
+
"target": {
|
|
1171
|
+
type: "SpecificResource",
|
|
1172
|
+
source: resourceUri3,
|
|
1173
|
+
selector: [
|
|
1174
|
+
{
|
|
1175
|
+
type: "TextPositionSelector",
|
|
1176
|
+
start: assessment.start,
|
|
1177
|
+
end: assessment.end
|
|
1178
|
+
},
|
|
1179
|
+
{
|
|
1180
|
+
type: "TextQuoteSelector",
|
|
1181
|
+
exact: assessment.exact,
|
|
1182
|
+
...assessment.prefix && { prefix: assessment.prefix },
|
|
1183
|
+
...assessment.suffix && { suffix: assessment.suffix }
|
|
1184
|
+
}
|
|
1185
|
+
]
|
|
1186
|
+
},
|
|
1187
|
+
"body": {
|
|
1188
|
+
type: "TextualBody",
|
|
1189
|
+
value: assessment.assessment,
|
|
1190
|
+
format: "text/plain"
|
|
1191
|
+
}
|
|
1192
|
+
};
|
|
1193
|
+
await this.eventStore.appendEvent({
|
|
1194
|
+
type: "annotation.added",
|
|
1195
|
+
resourceId: resourceId2,
|
|
1196
|
+
userId: userId2(creatorUserId),
|
|
1197
|
+
version: 1,
|
|
1198
|
+
payload: { annotation }
|
|
1199
|
+
});
|
|
701
1200
|
}
|
|
702
1201
|
};
|
|
703
1202
|
|
|
704
|
-
// src/jobs/
|
|
705
|
-
import { JobWorker } from "@semiont/jobs";
|
|
706
|
-
import { generateAnnotationId } from "@semiont/event-sourcing";
|
|
707
|
-
import { resourceIdToURI as
|
|
708
|
-
import { userId } from "@semiont/core";
|
|
709
|
-
var CommentDetectionWorker = class extends
|
|
710
|
-
constructor(jobQueue, config, eventStore) {
|
|
1203
|
+
// src/jobs/comment-detection-worker.ts
|
|
1204
|
+
import { JobWorker as JobWorker5 } from "@semiont/jobs";
|
|
1205
|
+
import { generateAnnotationId as generateAnnotationId4 } from "@semiont/event-sourcing";
|
|
1206
|
+
import { resourceIdToURI as resourceIdToURI4 } from "@semiont/core";
|
|
1207
|
+
import { userId as userId3 } from "@semiont/core";
|
|
1208
|
+
var CommentDetectionWorker = class extends JobWorker5 {
|
|
1209
|
+
constructor(jobQueue, config, eventStore, inferenceClient) {
|
|
711
1210
|
super(jobQueue);
|
|
712
1211
|
this.config = config;
|
|
713
1212
|
this.eventStore = eventStore;
|
|
1213
|
+
this.inferenceClient = inferenceClient;
|
|
714
1214
|
}
|
|
715
1215
|
isFirstProgress = true;
|
|
716
1216
|
getWorkerName() {
|
|
717
1217
|
return "CommentDetectionWorker";
|
|
718
1218
|
}
|
|
719
1219
|
canProcessJob(job) {
|
|
720
|
-
return job.type === "comment-detection";
|
|
1220
|
+
return job.metadata.type === "comment-detection";
|
|
721
1221
|
}
|
|
722
1222
|
async executeJob(job) {
|
|
723
|
-
if (job.type !== "comment-detection") {
|
|
724
|
-
throw new Error(`Invalid job type: ${job.type}`);
|
|
1223
|
+
if (job.metadata.type !== "comment-detection") {
|
|
1224
|
+
throw new Error(`Invalid job type: ${job.metadata.type}`);
|
|
1225
|
+
}
|
|
1226
|
+
if (job.status !== "running") {
|
|
1227
|
+
throw new Error(`Job must be in running state to execute, got: ${job.status}`);
|
|
725
1228
|
}
|
|
726
1229
|
this.isFirstProgress = true;
|
|
727
1230
|
await this.processCommentDetectionJob(job);
|
|
@@ -731,23 +1234,25 @@ var CommentDetectionWorker = class extends JobWorker {
|
|
|
731
1234
|
*/
|
|
732
1235
|
async updateJobProgress(job) {
|
|
733
1236
|
await super.updateJobProgress(job);
|
|
734
|
-
if (job.type !== "comment-detection") return;
|
|
1237
|
+
if (job.metadata.type !== "comment-detection") return;
|
|
1238
|
+
if (job.status !== "running") {
|
|
1239
|
+
return;
|
|
1240
|
+
}
|
|
735
1241
|
const cdJob = job;
|
|
736
|
-
if (!cdJob.progress) return;
|
|
737
1242
|
const baseEvent = {
|
|
738
|
-
resourceId: cdJob.resourceId,
|
|
739
|
-
userId: cdJob.userId,
|
|
1243
|
+
resourceId: cdJob.params.resourceId,
|
|
1244
|
+
userId: cdJob.metadata.userId,
|
|
740
1245
|
version: 1
|
|
741
1246
|
};
|
|
742
|
-
const isComplete = cdJob.progress.percentage === 100
|
|
1247
|
+
const isComplete = cdJob.progress.percentage === 100;
|
|
743
1248
|
if (this.isFirstProgress) {
|
|
744
1249
|
this.isFirstProgress = false;
|
|
745
1250
|
await this.eventStore.appendEvent({
|
|
746
1251
|
type: "job.started",
|
|
747
1252
|
...baseEvent,
|
|
748
1253
|
payload: {
|
|
749
|
-
jobId: cdJob.id,
|
|
750
|
-
jobType: cdJob.type
|
|
1254
|
+
jobId: cdJob.metadata.id,
|
|
1255
|
+
jobType: cdJob.metadata.type
|
|
751
1256
|
}
|
|
752
1257
|
});
|
|
753
1258
|
} else if (isComplete) {
|
|
@@ -755,9 +1260,9 @@ var CommentDetectionWorker = class extends JobWorker {
|
|
|
755
1260
|
type: "job.completed",
|
|
756
1261
|
...baseEvent,
|
|
757
1262
|
payload: {
|
|
758
|
-
jobId: cdJob.id,
|
|
759
|
-
jobType: cdJob.type
|
|
760
|
-
|
|
1263
|
+
jobId: cdJob.metadata.id,
|
|
1264
|
+
jobType: cdJob.metadata.type
|
|
1265
|
+
// Note: result would come from job.result, but that's handled by base class
|
|
761
1266
|
}
|
|
762
1267
|
});
|
|
763
1268
|
} else {
|
|
@@ -765,8 +1270,8 @@ var CommentDetectionWorker = class extends JobWorker {
|
|
|
765
1270
|
type: "job.progress",
|
|
766
1271
|
...baseEvent,
|
|
767
1272
|
payload: {
|
|
768
|
-
jobId: cdJob.id,
|
|
769
|
-
jobType: cdJob.type,
|
|
1273
|
+
jobId: cdJob.metadata.id,
|
|
1274
|
+
jobType: cdJob.metadata.type,
|
|
770
1275
|
progress: cdJob.progress
|
|
771
1276
|
}
|
|
772
1277
|
});
|
|
@@ -774,72 +1279,81 @@ var CommentDetectionWorker = class extends JobWorker {
|
|
|
774
1279
|
}
|
|
775
1280
|
async handleJobFailure(job, error) {
|
|
776
1281
|
await super.handleJobFailure(job, error);
|
|
777
|
-
if (job.status === "failed" && job.type === "comment-detection") {
|
|
1282
|
+
if (job.status === "failed" && job.metadata.type === "comment-detection") {
|
|
778
1283
|
const cdJob = job;
|
|
779
1284
|
await this.eventStore.appendEvent({
|
|
780
1285
|
type: "job.failed",
|
|
781
|
-
resourceId: cdJob.resourceId,
|
|
782
|
-
userId: cdJob.userId,
|
|
1286
|
+
resourceId: cdJob.params.resourceId,
|
|
1287
|
+
userId: cdJob.metadata.userId,
|
|
783
1288
|
version: 1,
|
|
784
1289
|
payload: {
|
|
785
|
-
jobId: cdJob.id,
|
|
786
|
-
jobType: cdJob.type,
|
|
1290
|
+
jobId: cdJob.metadata.id,
|
|
1291
|
+
jobType: cdJob.metadata.type,
|
|
787
1292
|
error: "Comment detection failed. Please try again later."
|
|
788
1293
|
}
|
|
789
1294
|
});
|
|
790
1295
|
}
|
|
791
1296
|
}
|
|
792
1297
|
async processCommentDetectionJob(job) {
|
|
793
|
-
console.log(`[CommentDetectionWorker] Processing comment detection for resource ${job.resourceId} (job: ${job.id})`);
|
|
794
|
-
const resource = await ResourceContext.getResourceMetadata(job.resourceId, this.config);
|
|
1298
|
+
console.log(`[CommentDetectionWorker] Processing comment detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
|
|
1299
|
+
const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
|
|
795
1300
|
if (!resource) {
|
|
796
|
-
throw new Error(`Resource ${job.resourceId} not found`);
|
|
1301
|
+
throw new Error(`Resource ${job.params.resourceId} not found`);
|
|
797
1302
|
}
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
1303
|
+
let updatedJob = {
|
|
1304
|
+
...job,
|
|
1305
|
+
progress: {
|
|
1306
|
+
stage: "analyzing",
|
|
1307
|
+
percentage: 10,
|
|
1308
|
+
message: "Loading resource..."
|
|
1309
|
+
}
|
|
802
1310
|
};
|
|
803
|
-
await this.updateJobProgress(
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
1311
|
+
await this.updateJobProgress(updatedJob);
|
|
1312
|
+
updatedJob = {
|
|
1313
|
+
...updatedJob,
|
|
1314
|
+
progress: {
|
|
1315
|
+
stage: "analyzing",
|
|
1316
|
+
percentage: 30,
|
|
1317
|
+
message: "Analyzing text and generating comments..."
|
|
1318
|
+
}
|
|
808
1319
|
};
|
|
809
|
-
await this.updateJobProgress(
|
|
1320
|
+
await this.updateJobProgress(updatedJob);
|
|
810
1321
|
const comments = await AnnotationDetection.detectComments(
|
|
811
|
-
job.resourceId,
|
|
1322
|
+
job.params.resourceId,
|
|
812
1323
|
this.config,
|
|
813
|
-
|
|
814
|
-
job.
|
|
815
|
-
job.
|
|
1324
|
+
this.inferenceClient,
|
|
1325
|
+
job.params.instructions,
|
|
1326
|
+
job.params.tone,
|
|
1327
|
+
job.params.density
|
|
816
1328
|
);
|
|
817
1329
|
console.log(`[CommentDetectionWorker] Found ${comments.length} comments to create`);
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
1330
|
+
updatedJob = {
|
|
1331
|
+
...updatedJob,
|
|
1332
|
+
progress: {
|
|
1333
|
+
stage: "creating",
|
|
1334
|
+
percentage: 60,
|
|
1335
|
+
message: `Creating ${comments.length} annotations...`
|
|
1336
|
+
}
|
|
822
1337
|
};
|
|
823
|
-
await this.updateJobProgress(
|
|
1338
|
+
await this.updateJobProgress(updatedJob);
|
|
824
1339
|
let created = 0;
|
|
825
1340
|
for (const comment of comments) {
|
|
826
1341
|
try {
|
|
827
|
-
await this.createCommentAnnotation(job.resourceId, job.userId, comment);
|
|
1342
|
+
await this.createCommentAnnotation(job.params.resourceId, job.metadata.userId, comment);
|
|
828
1343
|
created++;
|
|
829
1344
|
} catch (error) {
|
|
830
1345
|
console.error(`[CommentDetectionWorker] Failed to create comment:`, error);
|
|
831
1346
|
}
|
|
832
1347
|
}
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
message: `Complete! Created ${created} comments`
|
|
1348
|
+
updatedJob = {
|
|
1349
|
+
...updatedJob,
|
|
1350
|
+
progress: {
|
|
1351
|
+
stage: "creating",
|
|
1352
|
+
percentage: 100,
|
|
1353
|
+
message: `Complete! Created ${created} comments`
|
|
1354
|
+
}
|
|
841
1355
|
};
|
|
842
|
-
await this.updateJobProgress(
|
|
1356
|
+
await this.updateJobProgress(updatedJob);
|
|
843
1357
|
console.log(`[CommentDetectionWorker] \u2705 Created ${created}/${comments.length} comments`);
|
|
844
1358
|
}
|
|
845
1359
|
async createCommentAnnotation(resourceId2, userId_, comment) {
|
|
@@ -847,8 +1361,8 @@ var CommentDetectionWorker = class extends JobWorker {
|
|
|
847
1361
|
if (!backendUrl) {
|
|
848
1362
|
throw new Error("Backend publicURL not configured");
|
|
849
1363
|
}
|
|
850
|
-
const
|
|
851
|
-
const annotationId2 =
|
|
1364
|
+
const resourceUri3 = resourceIdToURI4(resourceId2, backendUrl);
|
|
1365
|
+
const annotationId2 = generateAnnotationId4(backendUrl);
|
|
852
1366
|
const annotation = {
|
|
853
1367
|
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
854
1368
|
type: "Annotation",
|
|
@@ -856,7 +1370,7 @@ var CommentDetectionWorker = class extends JobWorker {
|
|
|
856
1370
|
motivation: "commenting",
|
|
857
1371
|
target: {
|
|
858
1372
|
type: "SpecificResource",
|
|
859
|
-
source:
|
|
1373
|
+
source: resourceUri3,
|
|
860
1374
|
selector: [
|
|
861
1375
|
{
|
|
862
1376
|
type: "TextPositionSelector",
|
|
@@ -884,7 +1398,7 @@ var CommentDetectionWorker = class extends JobWorker {
|
|
|
884
1398
|
await this.eventStore.appendEvent({
|
|
885
1399
|
type: "annotation.added",
|
|
886
1400
|
resourceId: resourceId2,
|
|
887
|
-
userId:
|
|
1401
|
+
userId: userId3(userId_),
|
|
888
1402
|
version: 1,
|
|
889
1403
|
payload: {
|
|
890
1404
|
annotation
|
|
@@ -894,53 +1408,60 @@ var CommentDetectionWorker = class extends JobWorker {
|
|
|
894
1408
|
}
|
|
895
1409
|
};
|
|
896
1410
|
|
|
897
|
-
// src/jobs/
|
|
898
|
-
import { JobWorker as
|
|
899
|
-
import { generateAnnotationId as
|
|
900
|
-
import { resourceIdToURI as
|
|
901
|
-
import {
|
|
902
|
-
|
|
903
|
-
|
|
1411
|
+
// src/jobs/tag-detection-worker.ts
|
|
1412
|
+
import { JobWorker as JobWorker6 } from "@semiont/jobs";
|
|
1413
|
+
import { generateAnnotationId as generateAnnotationId5 } from "@semiont/event-sourcing";
|
|
1414
|
+
import { resourceIdToURI as resourceIdToURI5 } from "@semiont/core";
|
|
1415
|
+
import { getTagSchema } from "@semiont/ontology";
|
|
1416
|
+
import { userId as userId4 } from "@semiont/core";
|
|
1417
|
+
var TagDetectionWorker = class extends JobWorker6 {
|
|
1418
|
+
constructor(jobQueue, config, eventStore, inferenceClient) {
|
|
904
1419
|
super(jobQueue);
|
|
905
1420
|
this.config = config;
|
|
906
1421
|
this.eventStore = eventStore;
|
|
1422
|
+
this.inferenceClient = inferenceClient;
|
|
907
1423
|
}
|
|
908
1424
|
isFirstProgress = true;
|
|
909
1425
|
getWorkerName() {
|
|
910
|
-
return "
|
|
1426
|
+
return "TagDetectionWorker";
|
|
911
1427
|
}
|
|
912
1428
|
canProcessJob(job) {
|
|
913
|
-
return job.type === "
|
|
1429
|
+
return job.metadata.type === "tag-detection";
|
|
914
1430
|
}
|
|
915
1431
|
async executeJob(job) {
|
|
916
|
-
if (job.type !== "
|
|
917
|
-
throw new Error(`Invalid job type: ${job.type}`);
|
|
1432
|
+
if (job.metadata.type !== "tag-detection") {
|
|
1433
|
+
throw new Error(`Invalid job type: ${job.metadata.type}`);
|
|
1434
|
+
}
|
|
1435
|
+
if (job.status !== "running") {
|
|
1436
|
+
throw new Error(`Job must be in running state to execute, got: ${job.status}`);
|
|
918
1437
|
}
|
|
919
1438
|
this.isFirstProgress = true;
|
|
920
|
-
await this.
|
|
1439
|
+
await this.processTagDetectionJob(job);
|
|
921
1440
|
}
|
|
922
1441
|
/**
|
|
923
1442
|
* Override updateJobProgress to emit events to Event Store
|
|
924
1443
|
*/
|
|
925
1444
|
async updateJobProgress(job) {
|
|
926
1445
|
await super.updateJobProgress(job);
|
|
927
|
-
if (job.type !== "
|
|
928
|
-
|
|
929
|
-
|
|
1446
|
+
if (job.metadata.type !== "tag-detection") return;
|
|
1447
|
+
if (job.status !== "running") {
|
|
1448
|
+
return;
|
|
1449
|
+
}
|
|
1450
|
+
const tdJob = job;
|
|
930
1451
|
const baseEvent = {
|
|
931
|
-
resourceId:
|
|
932
|
-
userId:
|
|
1452
|
+
resourceId: tdJob.params.resourceId,
|
|
1453
|
+
userId: tdJob.metadata.userId,
|
|
933
1454
|
version: 1
|
|
934
1455
|
};
|
|
935
|
-
const isComplete =
|
|
1456
|
+
const isComplete = tdJob.progress.percentage === 100;
|
|
936
1457
|
if (this.isFirstProgress) {
|
|
937
1458
|
this.isFirstProgress = false;
|
|
938
1459
|
await this.eventStore.appendEvent({
|
|
939
1460
|
type: "job.started",
|
|
940
1461
|
...baseEvent,
|
|
941
1462
|
payload: {
|
|
942
|
-
jobId:
|
|
943
|
-
jobType:
|
|
1463
|
+
jobId: tdJob.metadata.id,
|
|
1464
|
+
jobType: tdJob.metadata.type
|
|
944
1465
|
}
|
|
945
1466
|
});
|
|
946
1467
|
} else if (isComplete) {
|
|
@@ -948,9 +1469,9 @@ var HighlightDetectionWorker = class extends JobWorker2 {
|
|
|
948
1469
|
type: "job.completed",
|
|
949
1470
|
...baseEvent,
|
|
950
1471
|
payload: {
|
|
951
|
-
jobId:
|
|
952
|
-
jobType:
|
|
953
|
-
|
|
1472
|
+
jobId: tdJob.metadata.id,
|
|
1473
|
+
jobType: tdJob.metadata.type
|
|
1474
|
+
// Note: result would come from job.result, but that's handled by base class
|
|
954
1475
|
}
|
|
955
1476
|
});
|
|
956
1477
|
} else {
|
|
@@ -958,1020 +1479,1804 @@ var HighlightDetectionWorker = class extends JobWorker2 {
|
|
|
958
1479
|
type: "job.progress",
|
|
959
1480
|
...baseEvent,
|
|
960
1481
|
payload: {
|
|
961
|
-
jobId:
|
|
962
|
-
jobType:
|
|
963
|
-
progress:
|
|
1482
|
+
jobId: tdJob.metadata.id,
|
|
1483
|
+
jobType: tdJob.metadata.type,
|
|
1484
|
+
progress: tdJob.progress
|
|
964
1485
|
}
|
|
965
1486
|
});
|
|
966
1487
|
}
|
|
967
1488
|
}
|
|
968
1489
|
async handleJobFailure(job, error) {
|
|
969
1490
|
await super.handleJobFailure(job, error);
|
|
970
|
-
if (job.status === "failed" && job.type === "
|
|
971
|
-
const
|
|
1491
|
+
if (job.status === "failed" && job.metadata.type === "tag-detection") {
|
|
1492
|
+
const tdJob = job;
|
|
972
1493
|
await this.eventStore.appendEvent({
|
|
973
1494
|
type: "job.failed",
|
|
974
|
-
resourceId:
|
|
975
|
-
userId:
|
|
1495
|
+
resourceId: tdJob.params.resourceId,
|
|
1496
|
+
userId: tdJob.metadata.userId,
|
|
976
1497
|
version: 1,
|
|
977
1498
|
payload: {
|
|
978
|
-
jobId:
|
|
979
|
-
jobType:
|
|
980
|
-
error: "
|
|
1499
|
+
jobId: tdJob.metadata.id,
|
|
1500
|
+
jobType: tdJob.metadata.type,
|
|
1501
|
+
error: "Tag detection failed. Please try again later."
|
|
981
1502
|
}
|
|
982
1503
|
});
|
|
983
1504
|
}
|
|
984
1505
|
}
|
|
985
|
-
async
|
|
986
|
-
console.log(`[
|
|
987
|
-
const
|
|
1506
|
+
async processTagDetectionJob(job) {
|
|
1507
|
+
console.log(`[TagDetectionWorker] Processing tag detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
|
|
1508
|
+
const schema = getTagSchema(job.params.schemaId);
|
|
1509
|
+
if (!schema) {
|
|
1510
|
+
throw new Error(`Invalid tag schema: ${job.params.schemaId}`);
|
|
1511
|
+
}
|
|
1512
|
+
for (const category of job.params.categories) {
|
|
1513
|
+
if (!schema.tags.some((t) => t.name === category)) {
|
|
1514
|
+
throw new Error(`Invalid category "${category}" for schema ${job.params.schemaId}`);
|
|
1515
|
+
}
|
|
1516
|
+
}
|
|
1517
|
+
const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
|
|
988
1518
|
if (!resource) {
|
|
989
|
-
throw new Error(`Resource ${job.resourceId} not found`);
|
|
1519
|
+
throw new Error(`Resource ${job.params.resourceId} not found`);
|
|
990
1520
|
}
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
message: "Analyzing text..."
|
|
1521
|
+
let updatedJob = {
|
|
1522
|
+
...job,
|
|
1523
|
+
progress: {
|
|
1524
|
+
stage: "analyzing",
|
|
1525
|
+
percentage: 10,
|
|
1526
|
+
processedCategories: 0,
|
|
1527
|
+
totalCategories: job.params.categories.length,
|
|
1528
|
+
message: "Loading resource..."
|
|
1529
|
+
}
|
|
1001
1530
|
};
|
|
1002
|
-
await this.updateJobProgress(
|
|
1003
|
-
const
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
job.
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1531
|
+
await this.updateJobProgress(updatedJob);
|
|
1532
|
+
const allTags = [];
|
|
1533
|
+
const byCategory = {};
|
|
1534
|
+
for (let i = 0; i < job.params.categories.length; i++) {
|
|
1535
|
+
const category = job.params.categories[i];
|
|
1536
|
+
updatedJob = {
|
|
1537
|
+
...updatedJob,
|
|
1538
|
+
progress: {
|
|
1539
|
+
stage: "analyzing",
|
|
1540
|
+
percentage: 10 + Math.floor(i / job.params.categories.length * 50),
|
|
1541
|
+
currentCategory: category,
|
|
1542
|
+
processedCategories: i + 1,
|
|
1543
|
+
totalCategories: job.params.categories.length,
|
|
1544
|
+
message: `Analyzing ${category}...`
|
|
1545
|
+
}
|
|
1546
|
+
};
|
|
1547
|
+
await this.updateJobProgress(updatedJob);
|
|
1548
|
+
const tags = await AnnotationDetection.detectTags(
|
|
1549
|
+
job.params.resourceId,
|
|
1550
|
+
this.config,
|
|
1551
|
+
this.inferenceClient,
|
|
1552
|
+
job.params.schemaId,
|
|
1553
|
+
category
|
|
1554
|
+
);
|
|
1555
|
+
console.log(`[TagDetectionWorker] Found ${tags.length} tags for category "${category}"`);
|
|
1556
|
+
allTags.push(...tags);
|
|
1557
|
+
byCategory[category] = tags.length;
|
|
1558
|
+
}
|
|
1559
|
+
updatedJob = {
|
|
1560
|
+
...updatedJob,
|
|
1561
|
+
progress: {
|
|
1562
|
+
stage: "creating",
|
|
1563
|
+
percentage: 60,
|
|
1564
|
+
processedCategories: job.params.categories.length,
|
|
1565
|
+
totalCategories: job.params.categories.length,
|
|
1566
|
+
message: `Creating ${allTags.length} tag annotations...`
|
|
1567
|
+
}
|
|
1014
1568
|
};
|
|
1015
|
-
await this.updateJobProgress(
|
|
1569
|
+
await this.updateJobProgress(updatedJob);
|
|
1016
1570
|
let created = 0;
|
|
1017
|
-
for (const
|
|
1571
|
+
for (const tag of allTags) {
|
|
1018
1572
|
try {
|
|
1019
|
-
await this.
|
|
1573
|
+
await this.createTagAnnotation(job.params.resourceId, job.metadata.userId, job.params.schemaId, tag);
|
|
1020
1574
|
created++;
|
|
1021
1575
|
} catch (error) {
|
|
1022
|
-
console.error(`[
|
|
1576
|
+
console.error(`[TagDetectionWorker] Failed to create tag:`, error);
|
|
1023
1577
|
}
|
|
1024
1578
|
}
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1579
|
+
updatedJob = {
|
|
1580
|
+
...updatedJob,
|
|
1581
|
+
progress: {
|
|
1582
|
+
stage: "creating",
|
|
1583
|
+
percentage: 100,
|
|
1584
|
+
processedCategories: job.params.categories.length,
|
|
1585
|
+
totalCategories: job.params.categories.length,
|
|
1586
|
+
message: `Complete! Created ${created} tags`
|
|
1587
|
+
}
|
|
1033
1588
|
};
|
|
1034
|
-
await this.updateJobProgress(
|
|
1035
|
-
console.log(`[
|
|
1589
|
+
await this.updateJobProgress(updatedJob);
|
|
1590
|
+
console.log(`[TagDetectionWorker] \u2705 Created ${created}/${allTags.length} tags across ${job.params.categories.length} categories`);
|
|
1036
1591
|
}
|
|
1037
|
-
async
|
|
1592
|
+
async createTagAnnotation(resourceId2, userId_, schemaId, tag) {
|
|
1038
1593
|
const backendUrl = this.config.services.backend?.publicURL;
|
|
1039
|
-
if (!backendUrl)
|
|
1040
|
-
|
|
1041
|
-
|
|
1594
|
+
if (!backendUrl) {
|
|
1595
|
+
throw new Error("Backend publicURL not configured");
|
|
1596
|
+
}
|
|
1597
|
+
const resourceUri3 = resourceIdToURI5(resourceId2, backendUrl);
|
|
1598
|
+
const annotationId2 = generateAnnotationId5(backendUrl);
|
|
1042
1599
|
const annotation = {
|
|
1043
1600
|
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
"created": (/* @__PURE__ */ new Date()).toISOString(),
|
|
1049
|
-
"target": {
|
|
1601
|
+
type: "Annotation",
|
|
1602
|
+
id: annotationId2,
|
|
1603
|
+
motivation: "tagging",
|
|
1604
|
+
target: {
|
|
1050
1605
|
type: "SpecificResource",
|
|
1051
|
-
source:
|
|
1606
|
+
source: resourceUri3,
|
|
1052
1607
|
selector: [
|
|
1053
1608
|
{
|
|
1054
1609
|
type: "TextPositionSelector",
|
|
1055
|
-
start:
|
|
1056
|
-
end:
|
|
1610
|
+
start: tag.start,
|
|
1611
|
+
end: tag.end
|
|
1057
1612
|
},
|
|
1058
1613
|
{
|
|
1059
1614
|
type: "TextQuoteSelector",
|
|
1060
|
-
exact:
|
|
1061
|
-
|
|
1062
|
-
|
|
1615
|
+
exact: tag.exact,
|
|
1616
|
+
prefix: tag.prefix || "",
|
|
1617
|
+
suffix: tag.suffix || ""
|
|
1063
1618
|
}
|
|
1064
1619
|
]
|
|
1065
1620
|
},
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1621
|
+
body: [
|
|
1622
|
+
{
|
|
1623
|
+
type: "TextualBody",
|
|
1624
|
+
value: tag.category,
|
|
1625
|
+
purpose: "tagging",
|
|
1626
|
+
format: "text/plain",
|
|
1627
|
+
language: "en"
|
|
1628
|
+
},
|
|
1629
|
+
{
|
|
1630
|
+
type: "TextualBody",
|
|
1631
|
+
value: schemaId,
|
|
1632
|
+
purpose: "classifying",
|
|
1633
|
+
format: "text/plain"
|
|
1634
|
+
}
|
|
1635
|
+
]
|
|
1636
|
+
};
|
|
1637
|
+
await this.eventStore.appendEvent({
|
|
1638
|
+
type: "annotation.added",
|
|
1639
|
+
resourceId: resourceId2,
|
|
1640
|
+
userId: userId4(userId_),
|
|
1641
|
+
version: 1,
|
|
1642
|
+
payload: {
|
|
1643
|
+
annotation
|
|
1644
|
+
}
|
|
1645
|
+
});
|
|
1646
|
+
console.log(`[TagDetectionWorker] Created tag annotation ${annotationId2} for "${tag.category}": "${tag.exact.substring(0, 50)}..."`);
|
|
1647
|
+
}
|
|
1648
|
+
};
|
|
1078
1649
|
|
|
1079
|
-
// src/
|
|
1080
|
-
import {
|
|
1081
|
-
import {
|
|
1082
|
-
import {
|
|
1083
|
-
import {
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1650
|
+
// src/graph/consumer.ts
|
|
1651
|
+
import { EventQuery } from "@semiont/event-sourcing";
|
|
1652
|
+
import { didToAgent } from "@semiont/core";
|
|
1653
|
+
import { resourceId as makeResourceId, findBodyItem } from "@semiont/core";
|
|
1654
|
+
import { toResourceUri, toAnnotationUri } from "@semiont/event-sourcing";
|
|
1655
|
+
import { resourceUri as resourceUri2 } from "@semiont/api-client";
|
|
1656
|
+
var GraphDBConsumer = class {
|
|
1657
|
+
constructor(config, eventStore, graphDb) {
|
|
1087
1658
|
this.config = config;
|
|
1088
1659
|
this.eventStore = eventStore;
|
|
1660
|
+
this.graphDb = graphDb;
|
|
1089
1661
|
}
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1662
|
+
subscriptions = /* @__PURE__ */ new Map();
|
|
1663
|
+
_globalSubscription = null;
|
|
1664
|
+
// Subscription to system-level events (kept for cleanup)
|
|
1665
|
+
processing = /* @__PURE__ */ new Map();
|
|
1666
|
+
lastProcessed = /* @__PURE__ */ new Map();
|
|
1667
|
+
async initialize() {
|
|
1668
|
+
console.log("[GraphDBConsumer] Initialized");
|
|
1669
|
+
await this.subscribeToGlobalEvents();
|
|
1093
1670
|
}
|
|
1094
|
-
|
|
1095
|
-
|
|
1671
|
+
/**
|
|
1672
|
+
* Subscribe to global system-level events (no resourceId)
|
|
1673
|
+
* This allows the consumer to react to events like entitytype.added
|
|
1674
|
+
*/
|
|
1675
|
+
async subscribeToGlobalEvents() {
|
|
1676
|
+
this._globalSubscription = this.eventStore.bus.subscriptions.subscribeGlobal(async (storedEvent) => {
|
|
1677
|
+
console.log(`[GraphDBConsumer] Received global event: ${storedEvent.event.type}`);
|
|
1678
|
+
await this.processEvent(storedEvent);
|
|
1679
|
+
});
|
|
1680
|
+
console.log("[GraphDBConsumer] Subscribed to global system events");
|
|
1096
1681
|
}
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1682
|
+
ensureInitialized() {
|
|
1683
|
+
return this.graphDb;
|
|
1684
|
+
}
|
|
1685
|
+
/**
|
|
1686
|
+
* Subscribe to events for a resource
|
|
1687
|
+
* Apply each event to GraphDB
|
|
1688
|
+
*/
|
|
1689
|
+
async subscribeToResource(resourceId2) {
|
|
1690
|
+
this.ensureInitialized();
|
|
1691
|
+
const publicURL = this.config.services.backend.publicURL;
|
|
1692
|
+
const rUri = resourceUri2(`${publicURL}/resources/${resourceId2}`);
|
|
1693
|
+
const subscription = this.eventStore.bus.subscriptions.subscribe(rUri, async (storedEvent) => {
|
|
1694
|
+
await this.processEvent(storedEvent);
|
|
1695
|
+
});
|
|
1696
|
+
this.subscriptions.set(resourceId2, subscription);
|
|
1697
|
+
console.log(`[GraphDBConsumer] Subscribed to ${resourceId2}`);
|
|
1698
|
+
}
|
|
1699
|
+
/**
|
|
1700
|
+
* Stop the consumer and unsubscribe from all events
|
|
1701
|
+
*/
|
|
1702
|
+
async stop() {
|
|
1703
|
+
console.log("[GraphDBConsumer] Stopping...");
|
|
1704
|
+
for (const subscription of this.subscriptions.values()) {
|
|
1705
|
+
if (subscription && typeof subscription.unsubscribe === "function") {
|
|
1706
|
+
subscription.unsubscribe();
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
this.subscriptions.clear();
|
|
1710
|
+
if (this._globalSubscription && typeof this._globalSubscription.unsubscribe === "function") {
|
|
1711
|
+
this._globalSubscription.unsubscribe();
|
|
1712
|
+
}
|
|
1713
|
+
this._globalSubscription = null;
|
|
1714
|
+
console.log("[GraphDBConsumer] Stopped");
|
|
1715
|
+
}
|
|
1716
|
+
/**
|
|
1717
|
+
* Process event with ordering guarantee (sequential per resource)
|
|
1718
|
+
*/
|
|
1719
|
+
async processEvent(storedEvent) {
|
|
1720
|
+
const { resourceId: resourceId2 } = storedEvent.event;
|
|
1721
|
+
if (!resourceId2) {
|
|
1722
|
+
await this.applyEventToGraph(storedEvent);
|
|
1723
|
+
return;
|
|
1724
|
+
}
|
|
1725
|
+
const previousProcessing = this.processing.get(resourceId2);
|
|
1726
|
+
if (previousProcessing) {
|
|
1727
|
+
await previousProcessing;
|
|
1728
|
+
}
|
|
1729
|
+
const processingPromise = this.applyEventToGraph(storedEvent);
|
|
1730
|
+
this.processing.set(resourceId2, processingPromise);
|
|
1731
|
+
try {
|
|
1732
|
+
await processingPromise;
|
|
1733
|
+
this.lastProcessed.set(resourceId2, storedEvent.metadata.sequenceNumber);
|
|
1734
|
+
} catch (error) {
|
|
1735
|
+
console.error(`[GraphDBConsumer] Failed to process event:`, error);
|
|
1736
|
+
throw error;
|
|
1737
|
+
} finally {
|
|
1738
|
+
this.processing.delete(resourceId2);
|
|
1739
|
+
}
|
|
1740
|
+
}
|
|
1741
|
+
/**
|
|
1742
|
+
* Apply event to GraphDB
|
|
1743
|
+
*/
|
|
1744
|
+
async applyEventToGraph(storedEvent) {
|
|
1745
|
+
const graphDb = this.ensureInitialized();
|
|
1746
|
+
const event = storedEvent.event;
|
|
1747
|
+
console.log(`[GraphDBConsumer] Applying ${event.type} to GraphDB (seq=${storedEvent.metadata.sequenceNumber})`);
|
|
1748
|
+
switch (event.type) {
|
|
1749
|
+
case "resource.created": {
|
|
1750
|
+
if (!event.resourceId) throw new Error("resource.created requires resourceId");
|
|
1751
|
+
const resourceUri3 = toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId);
|
|
1752
|
+
const resource = {
|
|
1753
|
+
"@context": "https://schema.org/",
|
|
1754
|
+
"@id": resourceUri3,
|
|
1755
|
+
name: event.payload.name,
|
|
1756
|
+
entityTypes: event.payload.entityTypes || [],
|
|
1757
|
+
representations: [{
|
|
1758
|
+
mediaType: event.payload.format,
|
|
1759
|
+
checksum: event.payload.contentChecksum,
|
|
1760
|
+
rel: "original"
|
|
1761
|
+
}],
|
|
1762
|
+
archived: false,
|
|
1763
|
+
dateCreated: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1764
|
+
wasAttributedTo: didToAgent(event.userId),
|
|
1765
|
+
creationMethod: "api"
|
|
1766
|
+
};
|
|
1767
|
+
console.log(`[GraphDBConsumer] Creating resource in graph: ${resourceUri3}`);
|
|
1768
|
+
await graphDb.createResource(resource);
|
|
1769
|
+
console.log(`[GraphDBConsumer] \u2705 Resource created in graph: ${resourceUri3}`);
|
|
1770
|
+
break;
|
|
1771
|
+
}
|
|
1772
|
+
case "resource.cloned": {
|
|
1773
|
+
if (!event.resourceId) throw new Error("resource.cloned requires resourceId");
|
|
1774
|
+
const resourceUri3 = toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId);
|
|
1775
|
+
const resource = {
|
|
1776
|
+
"@context": "https://schema.org/",
|
|
1777
|
+
"@id": resourceUri3,
|
|
1778
|
+
name: event.payload.name,
|
|
1779
|
+
entityTypes: event.payload.entityTypes || [],
|
|
1780
|
+
representations: [{
|
|
1781
|
+
mediaType: event.payload.format,
|
|
1782
|
+
checksum: event.payload.contentChecksum,
|
|
1783
|
+
rel: "original"
|
|
1784
|
+
}],
|
|
1785
|
+
archived: false,
|
|
1786
|
+
dateCreated: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1787
|
+
wasAttributedTo: didToAgent(event.userId),
|
|
1788
|
+
creationMethod: "clone"
|
|
1789
|
+
};
|
|
1790
|
+
console.log(`[GraphDBConsumer] Creating cloned resource in graph: ${resourceUri3}`);
|
|
1791
|
+
await graphDb.createResource(resource);
|
|
1792
|
+
console.log(`[GraphDBConsumer] \u2705 Cloned resource created in graph: ${resourceUri3}`);
|
|
1793
|
+
break;
|
|
1794
|
+
}
|
|
1795
|
+
case "resource.archived":
|
|
1796
|
+
if (!event.resourceId) throw new Error("resource.archived requires resourceId");
|
|
1797
|
+
await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
|
|
1798
|
+
archived: true
|
|
1799
|
+
});
|
|
1800
|
+
break;
|
|
1801
|
+
case "resource.unarchived":
|
|
1802
|
+
if (!event.resourceId) throw new Error("resource.unarchived requires resourceId");
|
|
1803
|
+
await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
|
|
1804
|
+
archived: false
|
|
1805
|
+
});
|
|
1806
|
+
break;
|
|
1807
|
+
case "annotation.added":
|
|
1808
|
+
console.log(`[GraphDBConsumer] \u{1F50D} ENTERED annotation.added case block`);
|
|
1809
|
+
console.log(`[GraphDBConsumer] Annotation ID: ${event.payload.annotation.id}`);
|
|
1810
|
+
await graphDb.createAnnotation({
|
|
1811
|
+
...event.payload.annotation,
|
|
1812
|
+
creator: didToAgent(event.userId)
|
|
1813
|
+
});
|
|
1814
|
+
console.log(`[GraphDBConsumer] \u2705 Annotation created in graph: ${event.payload.annotation.id}`);
|
|
1815
|
+
break;
|
|
1816
|
+
case "annotation.removed":
|
|
1817
|
+
await graphDb.deleteAnnotation(toAnnotationUri({ baseUrl: this.config.services.backend.publicURL }, event.payload.annotationId));
|
|
1818
|
+
break;
|
|
1819
|
+
case "annotation.body.updated":
|
|
1820
|
+
console.log(`[GraphDBConsumer] \u{1F50D} ENTERED annotation.body.updated case block`);
|
|
1821
|
+
console.log(`[GraphDBConsumer] Event payload:`, JSON.stringify(event.payload));
|
|
1822
|
+
try {
|
|
1823
|
+
console.log(`[GraphDBConsumer] Creating annotation URI for: ${event.payload.annotationId}`);
|
|
1824
|
+
const annotationUri2 = toAnnotationUri({ baseUrl: this.config.services.backend.publicURL }, event.payload.annotationId);
|
|
1825
|
+
console.log(`[GraphDBConsumer] \u2705 Annotation URI created: ${annotationUri2}`);
|
|
1826
|
+
console.log(`[GraphDBConsumer] Processing annotation.body.updated for ${annotationUri2}`);
|
|
1827
|
+
console.log(`[GraphDBConsumer] Operations:`, JSON.stringify(event.payload.operations));
|
|
1828
|
+
const currentAnnotation = await graphDb.getAnnotation(annotationUri2);
|
|
1829
|
+
console.log(`[GraphDBConsumer] Current annotation in graph:`, currentAnnotation ? "FOUND" : "NOT FOUND");
|
|
1830
|
+
if (currentAnnotation) {
|
|
1831
|
+
console.log(`[GraphDBConsumer] Current body:`, JSON.stringify(currentAnnotation.body));
|
|
1832
|
+
let bodyArray = Array.isArray(currentAnnotation.body) ? [...currentAnnotation.body] : currentAnnotation.body ? [currentAnnotation.body] : [];
|
|
1833
|
+
for (const op of event.payload.operations) {
|
|
1834
|
+
console.log(`[GraphDBConsumer] Applying operation:`, JSON.stringify(op));
|
|
1835
|
+
if (op.op === "add") {
|
|
1836
|
+
const exists = findBodyItem(bodyArray, op.item) !== -1;
|
|
1837
|
+
if (!exists) {
|
|
1838
|
+
bodyArray.push(op.item);
|
|
1839
|
+
console.log(`[GraphDBConsumer] Added item to body`);
|
|
1840
|
+
} else {
|
|
1841
|
+
console.log(`[GraphDBConsumer] Item already exists, skipping`);
|
|
1842
|
+
}
|
|
1843
|
+
} else if (op.op === "remove") {
|
|
1844
|
+
const index = findBodyItem(bodyArray, op.item);
|
|
1845
|
+
if (index !== -1) {
|
|
1846
|
+
bodyArray.splice(index, 1);
|
|
1847
|
+
console.log(`[GraphDBConsumer] Removed item from body`);
|
|
1848
|
+
}
|
|
1849
|
+
} else if (op.op === "replace") {
|
|
1850
|
+
const index = findBodyItem(bodyArray, op.oldItem);
|
|
1851
|
+
if (index !== -1) {
|
|
1852
|
+
bodyArray[index] = op.newItem;
|
|
1853
|
+
console.log(`[GraphDBConsumer] Replaced item in body`);
|
|
1854
|
+
}
|
|
1855
|
+
}
|
|
1856
|
+
}
|
|
1857
|
+
console.log(`[GraphDBConsumer] New body array:`, JSON.stringify(bodyArray));
|
|
1858
|
+
console.log(`[GraphDBConsumer] Calling updateAnnotation...`);
|
|
1859
|
+
await graphDb.updateAnnotation(annotationUri2, {
|
|
1860
|
+
body: bodyArray
|
|
1861
|
+
});
|
|
1862
|
+
console.log(`[GraphDBConsumer] \u2705 updateAnnotation completed successfully`);
|
|
1863
|
+
} else {
|
|
1864
|
+
console.log(`[GraphDBConsumer] \u26A0\uFE0F Annotation not found in graph, skipping update`);
|
|
1865
|
+
}
|
|
1866
|
+
} catch (error) {
|
|
1867
|
+
console.error(`[GraphDBConsumer] \u274C ERROR in annotation.body.updated handler`);
|
|
1868
|
+
console.error(`[GraphDBConsumer] Annotation ID: ${event.payload.annotationId}`);
|
|
1869
|
+
console.error(`[GraphDBConsumer] Error:`, error);
|
|
1870
|
+
console.error(`[GraphDBConsumer] Error stack:`, error instanceof Error ? error.stack : "N/A");
|
|
1871
|
+
}
|
|
1872
|
+
break;
|
|
1873
|
+
case "entitytag.added":
|
|
1874
|
+
if (!event.resourceId) throw new Error("entitytag.added requires resourceId");
|
|
1875
|
+
const doc = await graphDb.getResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId));
|
|
1876
|
+
if (doc) {
|
|
1877
|
+
await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
|
|
1878
|
+
entityTypes: [...doc.entityTypes || [], event.payload.entityType]
|
|
1879
|
+
});
|
|
1880
|
+
}
|
|
1881
|
+
break;
|
|
1882
|
+
case "entitytag.removed":
|
|
1883
|
+
if (!event.resourceId) throw new Error("entitytag.removed requires resourceId");
|
|
1884
|
+
const doc2 = await graphDb.getResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId));
|
|
1885
|
+
if (doc2) {
|
|
1886
|
+
await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
|
|
1887
|
+
entityTypes: (doc2.entityTypes || []).filter((t) => t !== event.payload.entityType)
|
|
1888
|
+
});
|
|
1889
|
+
}
|
|
1890
|
+
break;
|
|
1891
|
+
case "entitytype.added":
|
|
1892
|
+
await graphDb.addEntityType(event.payload.entityType);
|
|
1893
|
+
break;
|
|
1894
|
+
default:
|
|
1895
|
+
console.warn(`[GraphDBConsumer] Unknown event type: ${event.type}`);
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
/**
|
|
1899
|
+
* Rebuild entire resource from events
|
|
1900
|
+
* Useful for recovery or initial sync
|
|
1901
|
+
*/
|
|
1902
|
+
async rebuildResource(resourceId2) {
|
|
1903
|
+
const graphDb = this.ensureInitialized();
|
|
1904
|
+
console.log(`[GraphDBConsumer] Rebuilding resource ${resourceId2} from events`);
|
|
1905
|
+
try {
|
|
1906
|
+
await graphDb.deleteResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, makeResourceId(resourceId2)));
|
|
1907
|
+
} catch (error) {
|
|
1908
|
+
console.log(`[GraphDBConsumer] No existing resource to delete: ${resourceId2}`);
|
|
1909
|
+
}
|
|
1910
|
+
const query = new EventQuery(this.eventStore.log.storage);
|
|
1911
|
+
const events = await query.getResourceEvents(resourceId2);
|
|
1912
|
+
for (const storedEvent of events) {
|
|
1913
|
+
await this.applyEventToGraph(storedEvent);
|
|
1914
|
+
}
|
|
1915
|
+
console.log(`[GraphDBConsumer] Rebuilt ${resourceId2} from ${events.length} events`);
|
|
1916
|
+
}
|
|
1917
|
+
/**
|
|
1918
|
+
* Rebuild entire GraphDB from all events
|
|
1919
|
+
* Uses two-pass approach to ensure all resources exist before creating REFERENCES edges
|
|
1920
|
+
*/
|
|
1921
|
+
async rebuildAll() {
|
|
1922
|
+
const graphDb = this.ensureInitialized();
|
|
1923
|
+
console.log("[GraphDBConsumer] Rebuilding entire GraphDB from events...");
|
|
1924
|
+
console.log("[GraphDBConsumer] Using two-pass approach: nodes first, then edges\n");
|
|
1925
|
+
await graphDb.clearDatabase();
|
|
1926
|
+
const query = new EventQuery(this.eventStore.log.storage);
|
|
1927
|
+
const allResourceIds = await this.eventStore.log.getAllResourceIds();
|
|
1928
|
+
console.log(`[GraphDBConsumer] Found ${allResourceIds.length} resources to rebuild`);
|
|
1929
|
+
console.log("\n[GraphDBConsumer] === PASS 1: Creating all nodes (resources + annotations) ===");
|
|
1930
|
+
for (const resourceId2 of allResourceIds) {
|
|
1931
|
+
const events = await query.getResourceEvents(makeResourceId(resourceId2));
|
|
1932
|
+
for (const storedEvent of events) {
|
|
1933
|
+
if (storedEvent.event.type === "annotation.body.updated") {
|
|
1934
|
+
continue;
|
|
1935
|
+
}
|
|
1936
|
+
await this.applyEventToGraph(storedEvent);
|
|
1937
|
+
}
|
|
1938
|
+
}
|
|
1939
|
+
console.log("[GraphDBConsumer] \u2705 Pass 1 complete - all nodes created\n");
|
|
1940
|
+
console.log("[GraphDBConsumer] === PASS 2: Creating all REFERENCES edges ===");
|
|
1941
|
+
for (const resourceId2 of allResourceIds) {
|
|
1942
|
+
const events = await query.getResourceEvents(makeResourceId(resourceId2));
|
|
1943
|
+
for (const storedEvent of events) {
|
|
1944
|
+
if (storedEvent.event.type === "annotation.body.updated") {
|
|
1945
|
+
await this.applyEventToGraph(storedEvent);
|
|
1946
|
+
}
|
|
1947
|
+
}
|
|
1948
|
+
}
|
|
1949
|
+
console.log("[GraphDBConsumer] \u2705 Pass 2 complete - all edges created\n");
|
|
1950
|
+
console.log("[GraphDBConsumer] Rebuild complete");
|
|
1951
|
+
}
|
|
1952
|
+
/**
|
|
1953
|
+
* Get consumer health metrics
|
|
1954
|
+
*/
|
|
1955
|
+
getHealthMetrics() {
|
|
1956
|
+
return {
|
|
1957
|
+
subscriptions: this.subscriptions.size,
|
|
1958
|
+
lastProcessed: Object.fromEntries(this.lastProcessed),
|
|
1959
|
+
processing: Array.from(this.processing.keys())
|
|
1960
|
+
};
|
|
1961
|
+
}
|
|
1962
|
+
/**
|
|
1963
|
+
* Unsubscribe from resource
|
|
1964
|
+
*/
|
|
1965
|
+
async unsubscribeFromResource(resourceId2) {
|
|
1966
|
+
const subscription = this.subscriptions.get(resourceId2);
|
|
1967
|
+
if (subscription) {
|
|
1968
|
+
subscription.unsubscribe();
|
|
1969
|
+
this.subscriptions.delete(resourceId2);
|
|
1970
|
+
console.log(`[GraphDBConsumer] Unsubscribed from ${resourceId2}`);
|
|
1971
|
+
}
|
|
1972
|
+
}
|
|
1973
|
+
/**
|
|
1974
|
+
* Unsubscribe from all resources
|
|
1975
|
+
*/
|
|
1976
|
+
async unsubscribeAll() {
|
|
1977
|
+
for (const [_resourceId, subscription] of this.subscriptions) {
|
|
1978
|
+
subscription.unsubscribe();
|
|
1979
|
+
}
|
|
1980
|
+
this.subscriptions.clear();
|
|
1981
|
+
console.log("[GraphDBConsumer] Unsubscribed from all resources");
|
|
1982
|
+
}
|
|
1983
|
+
/**
|
|
1984
|
+
* Shutdown consumer
|
|
1985
|
+
*/
|
|
1986
|
+
async shutdown() {
|
|
1987
|
+
await this.unsubscribeAll();
|
|
1988
|
+
if (this._globalSubscription) {
|
|
1989
|
+
this._globalSubscription.unsubscribe();
|
|
1990
|
+
this._globalSubscription = null;
|
|
1991
|
+
console.log("[GraphDBConsumer] Unsubscribed from global events");
|
|
1992
|
+
}
|
|
1993
|
+
console.log("[GraphDBConsumer] Shut down");
|
|
1994
|
+
}
|
|
1995
|
+
};
|
|
1996
|
+
|
|
1997
|
+
// src/service.ts
|
|
1998
|
+
async function startMakeMeaning(config) {
|
|
1999
|
+
console.log("\u{1F9E0} Starting Make-Meaning service...");
|
|
2000
|
+
const configuredPath = config.services?.filesystem?.path;
|
|
2001
|
+
if (!configuredPath) {
|
|
2002
|
+
throw new Error("services.filesystem.path is required for make-meaning service");
|
|
2003
|
+
}
|
|
2004
|
+
const baseUrl = config.services?.backend?.publicURL;
|
|
2005
|
+
if (!baseUrl) {
|
|
2006
|
+
throw new Error("services.backend.publicURL is required for make-meaning service");
|
|
2007
|
+
}
|
|
2008
|
+
const projectRoot = config._metadata?.projectRoot;
|
|
2009
|
+
let basePath;
|
|
2010
|
+
if (path.isAbsolute(configuredPath)) {
|
|
2011
|
+
basePath = configuredPath;
|
|
2012
|
+
} else if (projectRoot) {
|
|
2013
|
+
basePath = path.resolve(projectRoot, configuredPath);
|
|
2014
|
+
} else {
|
|
2015
|
+
basePath = path.resolve(configuredPath);
|
|
2016
|
+
}
|
|
2017
|
+
console.log("\u{1F4BC} Initializing job queue...");
|
|
2018
|
+
const jobQueue = new JobQueue({ dataDir: basePath });
|
|
2019
|
+
await jobQueue.initialize();
|
|
2020
|
+
console.log("\u2705 Job queue initialized");
|
|
2021
|
+
console.log("\u{1F4CA} Creating event store connection...");
|
|
2022
|
+
const eventStore = createEventStoreCore(basePath, baseUrl);
|
|
2023
|
+
console.log("\u{1F4E6} Creating representation store...");
|
|
2024
|
+
const repStore = new FilesystemRepresentationStore3({ basePath }, projectRoot);
|
|
2025
|
+
console.log("\u2705 Representation store created");
|
|
2026
|
+
console.log("\u{1F916} Creating inference client...");
|
|
2027
|
+
const inferenceClient = await getInferenceClient(config);
|
|
2028
|
+
console.log("\u2705 Inference client created");
|
|
2029
|
+
console.log("\u{1F4CA} Connecting to graph database...");
|
|
2030
|
+
const graphDb = await getGraphDatabase(config);
|
|
2031
|
+
console.log("\u2705 Graph database connected");
|
|
2032
|
+
console.log("\u{1F504} Starting graph consumer...");
|
|
2033
|
+
const graphConsumer = new GraphDBConsumer(config, eventStore, graphDb);
|
|
2034
|
+
await graphConsumer.initialize();
|
|
2035
|
+
const allResourceIds = await eventStore.log.getAllResourceIds();
|
|
2036
|
+
console.log(`[GraphDBConsumer] Subscribing to ${allResourceIds.length} resources`);
|
|
2037
|
+
for (const resourceId2 of allResourceIds) {
|
|
2038
|
+
await graphConsumer.subscribeToResource(makeResourceId2(resourceId2));
|
|
2039
|
+
}
|
|
2040
|
+
console.log("\u2705 Graph consumer started");
|
|
2041
|
+
console.log("\u{1F477} Creating workers...");
|
|
2042
|
+
const workers = {
|
|
2043
|
+
detection: new ReferenceDetectionWorker(jobQueue, config, eventStore, inferenceClient),
|
|
2044
|
+
generation: new GenerationWorker(jobQueue, config, eventStore, inferenceClient),
|
|
2045
|
+
highlight: new HighlightDetectionWorker(jobQueue, config, eventStore, inferenceClient),
|
|
2046
|
+
assessment: new AssessmentDetectionWorker(jobQueue, config, eventStore, inferenceClient),
|
|
2047
|
+
comment: new CommentDetectionWorker(jobQueue, config, eventStore, inferenceClient),
|
|
2048
|
+
tag: new TagDetectionWorker(jobQueue, config, eventStore, inferenceClient)
|
|
2049
|
+
};
|
|
2050
|
+
console.log("\u{1F680} Starting workers...");
|
|
2051
|
+
workers.detection.start().catch((error) => {
|
|
2052
|
+
console.error("\u26A0\uFE0F Detection worker stopped:", error);
|
|
2053
|
+
});
|
|
2054
|
+
workers.generation.start().catch((error) => {
|
|
2055
|
+
console.error("\u26A0\uFE0F Generation worker stopped:", error);
|
|
2056
|
+
});
|
|
2057
|
+
workers.highlight.start().catch((error) => {
|
|
2058
|
+
console.error("\u26A0\uFE0F Highlight worker stopped:", error);
|
|
2059
|
+
});
|
|
2060
|
+
workers.assessment.start().catch((error) => {
|
|
2061
|
+
console.error("\u26A0\uFE0F Assessment worker stopped:", error);
|
|
2062
|
+
});
|
|
2063
|
+
workers.comment.start().catch((error) => {
|
|
2064
|
+
console.error("\u26A0\uFE0F Comment worker stopped:", error);
|
|
2065
|
+
});
|
|
2066
|
+
workers.tag.start().catch((error) => {
|
|
2067
|
+
console.error("\u26A0\uFE0F Tag worker stopped:", error);
|
|
2068
|
+
});
|
|
2069
|
+
console.log("\u2705 All workers started");
|
|
2070
|
+
console.log("\u2705 Make-Meaning service started");
|
|
2071
|
+
return {
|
|
2072
|
+
jobQueue,
|
|
2073
|
+
eventStore,
|
|
2074
|
+
repStore,
|
|
2075
|
+
inferenceClient,
|
|
2076
|
+
graphDb,
|
|
2077
|
+
workers,
|
|
2078
|
+
graphConsumer,
|
|
2079
|
+
stop: async () => {
|
|
2080
|
+
console.log("\u23F9\uFE0F Stopping Make-Meaning service...");
|
|
2081
|
+
await Promise.all([
|
|
2082
|
+
workers.detection.stop(),
|
|
2083
|
+
workers.generation.stop(),
|
|
2084
|
+
workers.highlight.stop(),
|
|
2085
|
+
workers.assessment.stop(),
|
|
2086
|
+
workers.comment.stop(),
|
|
2087
|
+
workers.tag.stop()
|
|
2088
|
+
]);
|
|
2089
|
+
await graphConsumer.stop();
|
|
2090
|
+
await graphDb.disconnect();
|
|
2091
|
+
console.log("\u2705 Make-Meaning service stopped");
|
|
2092
|
+
}
|
|
2093
|
+
};
|
|
2094
|
+
}
|
|
2095
|
+
|
|
2096
|
+
// src/resource-context.ts
|
|
2097
|
+
import { FilesystemViewStorage } from "@semiont/event-sourcing";
|
|
2098
|
+
import { FilesystemRepresentationStore as FilesystemRepresentationStore4 } from "@semiont/content";
|
|
2099
|
+
import { getPrimaryRepresentation as getPrimaryRepresentation2, decodeRepresentation as decodeRepresentation2 } from "@semiont/api-client";
|
|
2100
|
+
var ResourceContext = class {
|
|
2101
|
+
/**
|
|
2102
|
+
* Get resource metadata from view storage
|
|
2103
|
+
*/
|
|
2104
|
+
static async getResourceMetadata(resourceId2, config) {
|
|
2105
|
+
const basePath = config.services.filesystem.path;
|
|
2106
|
+
const projectRoot = config._metadata?.projectRoot;
|
|
2107
|
+
const viewStorage = new FilesystemViewStorage(basePath, projectRoot);
|
|
2108
|
+
const view = await viewStorage.get(resourceId2);
|
|
2109
|
+
if (!view) {
|
|
2110
|
+
return null;
|
|
2111
|
+
}
|
|
2112
|
+
return view.resource;
|
|
2113
|
+
}
|
|
2114
|
+
/**
|
|
2115
|
+
* List all resources by scanning view storage
|
|
2116
|
+
*/
|
|
2117
|
+
static async listResources(filters, config) {
|
|
2118
|
+
const basePath = config.services.filesystem.path;
|
|
2119
|
+
const projectRoot = config._metadata?.projectRoot;
|
|
2120
|
+
const viewStorage = new FilesystemViewStorage(basePath, projectRoot);
|
|
2121
|
+
const allViews = await viewStorage.getAll();
|
|
2122
|
+
const resources = [];
|
|
2123
|
+
for (const view of allViews) {
|
|
2124
|
+
const doc = view.resource;
|
|
2125
|
+
if (filters?.archived !== void 0 && doc.archived !== filters.archived) {
|
|
2126
|
+
continue;
|
|
2127
|
+
}
|
|
2128
|
+
if (filters?.search) {
|
|
2129
|
+
const searchLower = filters.search.toLowerCase();
|
|
2130
|
+
if (!doc.name.toLowerCase().includes(searchLower)) {
|
|
2131
|
+
continue;
|
|
2132
|
+
}
|
|
2133
|
+
}
|
|
2134
|
+
resources.push(doc);
|
|
2135
|
+
}
|
|
2136
|
+
resources.sort((a, b) => {
|
|
2137
|
+
const aTime = a.dateCreated ? new Date(a.dateCreated).getTime() : 0;
|
|
2138
|
+
const bTime = b.dateCreated ? new Date(b.dateCreated).getTime() : 0;
|
|
2139
|
+
return bTime - aTime;
|
|
2140
|
+
});
|
|
2141
|
+
return resources;
|
|
2142
|
+
}
|
|
2143
|
+
/**
|
|
2144
|
+
* Add content previews to resources (for search results)
|
|
2145
|
+
* Retrieves and decodes the first 200 characters of each resource's primary representation
|
|
2146
|
+
*/
|
|
2147
|
+
static async addContentPreviews(resources, config) {
|
|
2148
|
+
const basePath = config.services.filesystem.path;
|
|
2149
|
+
const projectRoot = config._metadata?.projectRoot;
|
|
2150
|
+
const repStore = new FilesystemRepresentationStore4({ basePath }, projectRoot);
|
|
2151
|
+
return await Promise.all(
|
|
2152
|
+
resources.map(async (doc) => {
|
|
2153
|
+
try {
|
|
2154
|
+
const primaryRep = getPrimaryRepresentation2(doc);
|
|
2155
|
+
if (primaryRep?.checksum && primaryRep?.mediaType) {
|
|
2156
|
+
const contentBuffer = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
|
|
2157
|
+
const contentPreview = decodeRepresentation2(contentBuffer, primaryRep.mediaType).slice(0, 200);
|
|
2158
|
+
return { ...doc, content: contentPreview };
|
|
2159
|
+
}
|
|
2160
|
+
return { ...doc, content: "" };
|
|
2161
|
+
} catch {
|
|
2162
|
+
return { ...doc, content: "" };
|
|
2163
|
+
}
|
|
2164
|
+
})
|
|
2165
|
+
);
|
|
2166
|
+
}
|
|
2167
|
+
};
|
|
2168
|
+
|
|
2169
|
+
// src/annotation-context.ts
|
|
2170
|
+
import { getInferenceClient as getInferenceClient2 } from "@semiont/inference";
|
|
2171
|
+
import {
|
|
2172
|
+
getBodySource,
|
|
2173
|
+
getTargetSource,
|
|
2174
|
+
getTargetSelector as getTargetSelector2,
|
|
2175
|
+
getResourceEntityTypes,
|
|
2176
|
+
getTextPositionSelector,
|
|
2177
|
+
getPrimaryRepresentation as getPrimaryRepresentation3,
|
|
2178
|
+
decodeRepresentation as decodeRepresentation3
|
|
2179
|
+
} from "@semiont/api-client";
|
|
2180
|
+
import { FilesystemRepresentationStore as FilesystemRepresentationStore5 } from "@semiont/content";
|
|
2181
|
+
import { FilesystemViewStorage as FilesystemViewStorage2 } from "@semiont/event-sourcing";
|
|
2182
|
+
import { resourceId as createResourceId, uriToResourceId } from "@semiont/core";
|
|
2183
|
+
import { getEntityTypes as getEntityTypes2 } from "@semiont/ontology";
|
|
2184
|
+
var AnnotationContext = class {
|
|
2185
|
+
/**
|
|
2186
|
+
* Build LLM context for an annotation
|
|
2187
|
+
*
|
|
2188
|
+
* @param annotationUri - Full annotation URI (e.g., http://localhost:4000/annotations/abc123)
|
|
2189
|
+
* @param resourceId - Source resource ID
|
|
2190
|
+
* @param config - Application configuration
|
|
2191
|
+
* @param options - Context building options
|
|
2192
|
+
* @returns Rich context for LLM processing
|
|
2193
|
+
* @throws Error if annotation or resource not found
|
|
2194
|
+
*/
|
|
2195
|
+
static async buildLLMContext(annotationUri2, resourceId2, config, options = {}) {
|
|
2196
|
+
const {
|
|
2197
|
+
includeSourceContext = true,
|
|
2198
|
+
includeTargetContext = true,
|
|
2199
|
+
contextWindow = 1e3
|
|
2200
|
+
} = options;
|
|
2201
|
+
if (contextWindow < 100 || contextWindow > 5e3) {
|
|
2202
|
+
throw new Error("contextWindow must be between 100 and 5000");
|
|
2203
|
+
}
|
|
2204
|
+
console.log(`[AnnotationContext] buildLLMContext called with annotationUri=${annotationUri2}, resourceId=${resourceId2}`);
|
|
2205
|
+
const basePath = config.services.filesystem.path;
|
|
2206
|
+
console.log(`[AnnotationContext] basePath=${basePath}`);
|
|
2207
|
+
const projectRoot = config._metadata?.projectRoot;
|
|
2208
|
+
const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
|
|
2209
|
+
const repStore = new FilesystemRepresentationStore5({ basePath }, projectRoot);
|
|
2210
|
+
console.log(`[AnnotationContext] Getting view for resourceId=${resourceId2}`);
|
|
2211
|
+
let sourceView;
|
|
2212
|
+
try {
|
|
2213
|
+
sourceView = await viewStorage.get(resourceId2);
|
|
2214
|
+
console.log(`[AnnotationContext] Got view:`, !!sourceView);
|
|
2215
|
+
if (!sourceView) {
|
|
2216
|
+
throw new Error("Source resource not found");
|
|
2217
|
+
}
|
|
2218
|
+
} catch (error) {
|
|
2219
|
+
console.error(`[AnnotationContext] Error getting view:`, error);
|
|
2220
|
+
throw error;
|
|
2221
|
+
}
|
|
2222
|
+
console.log(`[AnnotationContext] Looking for annotation ${annotationUri2} in resource ${resourceId2}`);
|
|
2223
|
+
console.log(`[AnnotationContext] View has ${sourceView.annotations.annotations.length} annotations`);
|
|
2224
|
+
console.log(`[AnnotationContext] First 5 annotation IDs:`, sourceView.annotations.annotations.slice(0, 5).map((a) => a.id));
|
|
2225
|
+
const annotation = sourceView.annotations.annotations.find((a) => a.id === annotationUri2);
|
|
2226
|
+
console.log(`[AnnotationContext] Found annotation:`, !!annotation);
|
|
2227
|
+
if (!annotation) {
|
|
2228
|
+
throw new Error("Annotation not found in view");
|
|
2229
|
+
}
|
|
2230
|
+
const targetSource = getTargetSource(annotation.target);
|
|
2231
|
+
const targetResourceId = targetSource.split("/").pop();
|
|
2232
|
+
console.log(`[AnnotationContext] Target source: ${targetSource}, Expected resource ID: ${resourceId2}, Extracted ID: ${targetResourceId}`);
|
|
2233
|
+
if (targetResourceId !== resourceId2) {
|
|
2234
|
+
throw new Error(`Annotation target resource ID (${targetResourceId}) does not match expected resource ID (${resourceId2})`);
|
|
2235
|
+
}
|
|
2236
|
+
const sourceDoc = sourceView.resource;
|
|
2237
|
+
const bodySource = getBodySource(annotation.body);
|
|
2238
|
+
let targetDoc = null;
|
|
2239
|
+
if (bodySource) {
|
|
2240
|
+
const parts = bodySource.split("/");
|
|
2241
|
+
const lastPart = parts[parts.length - 1];
|
|
2242
|
+
if (!lastPart) {
|
|
2243
|
+
throw new Error(`Invalid body source URI: ${bodySource}`);
|
|
2244
|
+
}
|
|
2245
|
+
const targetResourceId2 = createResourceId(lastPart);
|
|
2246
|
+
const targetView = await viewStorage.get(targetResourceId2);
|
|
2247
|
+
targetDoc = targetView?.resource || null;
|
|
2248
|
+
}
|
|
2249
|
+
let sourceContext;
|
|
2250
|
+
if (includeSourceContext) {
|
|
2251
|
+
const primaryRep = getPrimaryRepresentation3(sourceDoc);
|
|
2252
|
+
if (!primaryRep?.checksum || !primaryRep?.mediaType) {
|
|
2253
|
+
throw new Error("Source content not found");
|
|
2254
|
+
}
|
|
2255
|
+
const sourceContent = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
|
|
2256
|
+
const contentStr = decodeRepresentation3(sourceContent, primaryRep.mediaType);
|
|
2257
|
+
const targetSelectorRaw = getTargetSelector2(annotation.target);
|
|
2258
|
+
const targetSelector = Array.isArray(targetSelectorRaw) ? targetSelectorRaw[0] : targetSelectorRaw;
|
|
2259
|
+
console.log(`[AnnotationContext] Target selector type:`, targetSelector?.type);
|
|
2260
|
+
if (!targetSelector) {
|
|
2261
|
+
console.warn(`[AnnotationContext] No target selector found`);
|
|
2262
|
+
} else if (targetSelector.type === "TextPositionSelector") {
|
|
2263
|
+
const selector = targetSelector;
|
|
2264
|
+
const start = selector.start;
|
|
2265
|
+
const end = selector.end;
|
|
2266
|
+
const before = contentStr.slice(Math.max(0, start - contextWindow), start);
|
|
2267
|
+
const selected = contentStr.slice(start, end);
|
|
2268
|
+
const after = contentStr.slice(end, Math.min(contentStr.length, end + contextWindow));
|
|
2269
|
+
sourceContext = { before, selected, after };
|
|
2270
|
+
console.log(`[AnnotationContext] Built source context using TextPositionSelector (${start}-${end})`);
|
|
2271
|
+
} else if (targetSelector.type === "TextQuoteSelector") {
|
|
2272
|
+
const selector = targetSelector;
|
|
2273
|
+
const exact = selector.exact;
|
|
2274
|
+
const index = contentStr.indexOf(exact);
|
|
2275
|
+
if (index !== -1) {
|
|
2276
|
+
const start = index;
|
|
2277
|
+
const end = index + exact.length;
|
|
2278
|
+
const before = contentStr.slice(Math.max(0, start - contextWindow), start);
|
|
2279
|
+
const selected = exact;
|
|
2280
|
+
const after = contentStr.slice(end, Math.min(contentStr.length, end + contextWindow));
|
|
2281
|
+
sourceContext = { before, selected, after };
|
|
2282
|
+
console.log(`[AnnotationContext] Built source context using TextQuoteSelector (found at ${index})`);
|
|
2283
|
+
} else {
|
|
2284
|
+
console.warn(`[AnnotationContext] TextQuoteSelector exact text not found in content: "${exact.substring(0, 50)}..."`);
|
|
2285
|
+
}
|
|
2286
|
+
} else {
|
|
2287
|
+
console.warn(`[AnnotationContext] Unknown selector type: ${targetSelector.type}`);
|
|
2288
|
+
}
|
|
2289
|
+
}
|
|
2290
|
+
let targetContext;
|
|
2291
|
+
if (includeTargetContext && targetDoc) {
|
|
2292
|
+
const targetRep = getPrimaryRepresentation3(targetDoc);
|
|
2293
|
+
if (targetRep?.checksum && targetRep?.mediaType) {
|
|
2294
|
+
const targetContent = await repStore.retrieve(targetRep.checksum, targetRep.mediaType);
|
|
2295
|
+
const contentStr = decodeRepresentation3(targetContent, targetRep.mediaType);
|
|
2296
|
+
const client = await getInferenceClient2(config);
|
|
2297
|
+
targetContext = {
|
|
2298
|
+
content: contentStr.slice(0, contextWindow * 2),
|
|
2299
|
+
summary: await generateResourceSummary(targetDoc.name, contentStr, getResourceEntityTypes(targetDoc), client)
|
|
2300
|
+
};
|
|
2301
|
+
}
|
|
2302
|
+
}
|
|
2303
|
+
const suggestedResolution = void 0;
|
|
2304
|
+
const generationContext = sourceContext ? {
|
|
2305
|
+
sourceContext: {
|
|
2306
|
+
before: sourceContext.before || "",
|
|
2307
|
+
selected: sourceContext.selected,
|
|
2308
|
+
after: sourceContext.after || ""
|
|
2309
|
+
},
|
|
2310
|
+
metadata: {
|
|
2311
|
+
resourceType: "document",
|
|
2312
|
+
language: sourceDoc.language,
|
|
2313
|
+
entityTypes: getEntityTypes2(annotation)
|
|
2314
|
+
}
|
|
2315
|
+
} : void 0;
|
|
2316
|
+
const response = {
|
|
2317
|
+
annotation,
|
|
2318
|
+
sourceResource: sourceDoc,
|
|
2319
|
+
targetResource: targetDoc,
|
|
2320
|
+
...generationContext ? { context: generationContext } : {},
|
|
2321
|
+
...sourceContext ? { sourceContext } : {},
|
|
2322
|
+
// Keep for backward compatibility
|
|
2323
|
+
...targetContext ? { targetContext } : {},
|
|
2324
|
+
...suggestedResolution ? { suggestedResolution } : {}
|
|
2325
|
+
};
|
|
2326
|
+
return response;
|
|
2327
|
+
}
|
|
2328
|
+
/**
|
|
2329
|
+
* Get resource annotations from view storage (fast path)
|
|
2330
|
+
* Throws if view missing
|
|
2331
|
+
*/
|
|
2332
|
+
static async getResourceAnnotations(resourceId2, config) {
|
|
2333
|
+
if (!config.services?.filesystem?.path) {
|
|
2334
|
+
throw new Error("Filesystem path not found in configuration");
|
|
1100
2335
|
}
|
|
1101
|
-
|
|
1102
|
-
|
|
2336
|
+
const basePath = config.services.filesystem.path;
|
|
2337
|
+
const projectRoot = config._metadata?.projectRoot;
|
|
2338
|
+
const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
|
|
2339
|
+
const view = await viewStorage.get(resourceId2);
|
|
2340
|
+
if (!view) {
|
|
2341
|
+
throw new Error(`Resource ${resourceId2} not found in view storage`);
|
|
2342
|
+
}
|
|
2343
|
+
return view.annotations;
|
|
1103
2344
|
}
|
|
1104
2345
|
/**
|
|
1105
|
-
*
|
|
2346
|
+
* Get all annotations
|
|
2347
|
+
* @returns Array of all annotation objects
|
|
1106
2348
|
*/
|
|
1107
|
-
async
|
|
1108
|
-
await
|
|
1109
|
-
|
|
1110
|
-
const assJob = job;
|
|
1111
|
-
if (!assJob.progress) return;
|
|
1112
|
-
const baseEvent = {
|
|
1113
|
-
resourceId: assJob.resourceId,
|
|
1114
|
-
userId: assJob.userId,
|
|
1115
|
-
version: 1
|
|
1116
|
-
};
|
|
1117
|
-
const isComplete = assJob.progress.percentage === 100 && assJob.result;
|
|
1118
|
-
if (this.isFirstProgress) {
|
|
1119
|
-
this.isFirstProgress = false;
|
|
1120
|
-
await this.eventStore.appendEvent({
|
|
1121
|
-
type: "job.started",
|
|
1122
|
-
...baseEvent,
|
|
1123
|
-
payload: {
|
|
1124
|
-
jobId: assJob.id,
|
|
1125
|
-
jobType: assJob.type
|
|
1126
|
-
}
|
|
1127
|
-
});
|
|
1128
|
-
} else if (isComplete) {
|
|
1129
|
-
await this.eventStore.appendEvent({
|
|
1130
|
-
type: "job.completed",
|
|
1131
|
-
...baseEvent,
|
|
1132
|
-
payload: {
|
|
1133
|
-
jobId: assJob.id,
|
|
1134
|
-
jobType: assJob.type,
|
|
1135
|
-
result: assJob.result
|
|
1136
|
-
}
|
|
1137
|
-
});
|
|
1138
|
-
} else {
|
|
1139
|
-
await this.eventStore.appendEvent({
|
|
1140
|
-
type: "job.progress",
|
|
1141
|
-
...baseEvent,
|
|
1142
|
-
payload: {
|
|
1143
|
-
jobId: assJob.id,
|
|
1144
|
-
jobType: assJob.type,
|
|
1145
|
-
progress: assJob.progress
|
|
1146
|
-
}
|
|
1147
|
-
});
|
|
1148
|
-
}
|
|
2349
|
+
static async getAllAnnotations(resourceId2, config) {
|
|
2350
|
+
const annotations = await this.getResourceAnnotations(resourceId2, config);
|
|
2351
|
+
return await this.enrichResolvedReferences(annotations.annotations, config);
|
|
1149
2352
|
}
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
2353
|
+
/**
|
|
2354
|
+
* Enrich reference annotations with resolved document names
|
|
2355
|
+
* Adds _resolvedDocumentName property to annotations that link to documents
|
|
2356
|
+
* @private
|
|
2357
|
+
*/
|
|
2358
|
+
static async enrichResolvedReferences(annotations, config) {
|
|
2359
|
+
if (!config.services?.filesystem?.path) {
|
|
2360
|
+
return annotations;
|
|
2361
|
+
}
|
|
2362
|
+
const resolvedUris = /* @__PURE__ */ new Set();
|
|
2363
|
+
for (const ann of annotations) {
|
|
2364
|
+
if (ann.motivation === "linking" && ann.body) {
|
|
2365
|
+
const body = Array.isArray(ann.body) ? ann.body : [ann.body];
|
|
2366
|
+
for (const item of body) {
|
|
2367
|
+
if (item.type === "SpecificResource" && item.purpose === "linking" && item.source) {
|
|
2368
|
+
resolvedUris.add(item.source);
|
|
2369
|
+
}
|
|
1163
2370
|
}
|
|
1164
|
-
}
|
|
2371
|
+
}
|
|
1165
2372
|
}
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
console.log(`[AssessmentDetectionWorker] Processing assessment detection for resource ${job.resourceId} (job: ${job.id})`);
|
|
1169
|
-
const resource = await ResourceContext.getResourceMetadata(job.resourceId, this.config);
|
|
1170
|
-
if (!resource) {
|
|
1171
|
-
throw new Error(`Resource ${job.resourceId} not found`);
|
|
2373
|
+
if (resolvedUris.size === 0) {
|
|
2374
|
+
return annotations;
|
|
1172
2375
|
}
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
job.progress = {
|
|
1180
|
-
stage: "analyzing",
|
|
1181
|
-
percentage: 30,
|
|
1182
|
-
message: "Analyzing text..."
|
|
1183
|
-
};
|
|
1184
|
-
await this.updateJobProgress(job);
|
|
1185
|
-
const assessments = await AnnotationDetection.detectAssessments(
|
|
1186
|
-
job.resourceId,
|
|
1187
|
-
this.config,
|
|
1188
|
-
job.instructions,
|
|
1189
|
-
job.tone,
|
|
1190
|
-
job.density
|
|
1191
|
-
);
|
|
1192
|
-
console.log(`[AssessmentDetectionWorker] Found ${assessments.length} assessments to create`);
|
|
1193
|
-
job.progress = {
|
|
1194
|
-
stage: "creating",
|
|
1195
|
-
percentage: 60,
|
|
1196
|
-
message: `Creating ${assessments.length} annotations...`
|
|
1197
|
-
};
|
|
1198
|
-
await this.updateJobProgress(job);
|
|
1199
|
-
let created = 0;
|
|
1200
|
-
for (const assessment of assessments) {
|
|
2376
|
+
const basePath = config.services.filesystem.path;
|
|
2377
|
+
const projectRoot = config._metadata?.projectRoot;
|
|
2378
|
+
const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
|
|
2379
|
+
const metadataPromises = Array.from(resolvedUris).map(async (uri) => {
|
|
2380
|
+
const docId = uri.split("/resources/")[1];
|
|
2381
|
+
if (!docId) return null;
|
|
1201
2382
|
try {
|
|
1202
|
-
await
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
2383
|
+
const view = await viewStorage.get(docId);
|
|
2384
|
+
if (view?.resource?.name) {
|
|
2385
|
+
return {
|
|
2386
|
+
uri,
|
|
2387
|
+
metadata: {
|
|
2388
|
+
name: view.resource.name,
|
|
2389
|
+
mediaType: view.resource.mediaType
|
|
2390
|
+
}
|
|
2391
|
+
};
|
|
2392
|
+
}
|
|
2393
|
+
} catch (e) {
|
|
2394
|
+
}
|
|
2395
|
+
return null;
|
|
2396
|
+
});
|
|
2397
|
+
const results = await Promise.all(metadataPromises);
|
|
2398
|
+
const uriToMetadata = /* @__PURE__ */ new Map();
|
|
2399
|
+
for (const result of results) {
|
|
2400
|
+
if (result) {
|
|
2401
|
+
uriToMetadata.set(result.uri, result.metadata);
|
|
1206
2402
|
}
|
|
1207
2403
|
}
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
const backendUrl = this.config.services.backend?.publicURL;
|
|
1222
|
-
if (!backendUrl) throw new Error("Backend publicURL not configured");
|
|
1223
|
-
const annotationId2 = generateAnnotationId3(backendUrl);
|
|
1224
|
-
const resourceUri2 = resourceIdToURI4(resourceId2, backendUrl);
|
|
1225
|
-
const annotation = {
|
|
1226
|
-
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
1227
|
-
"type": "Annotation",
|
|
1228
|
-
"id": annotationId2,
|
|
1229
|
-
"motivation": "assessing",
|
|
1230
|
-
"creator": userId3(creatorUserId),
|
|
1231
|
-
"created": (/* @__PURE__ */ new Date()).toISOString(),
|
|
1232
|
-
"target": {
|
|
1233
|
-
type: "SpecificResource",
|
|
1234
|
-
source: resourceUri2,
|
|
1235
|
-
selector: [
|
|
1236
|
-
{
|
|
1237
|
-
type: "TextPositionSelector",
|
|
1238
|
-
start: assessment.start,
|
|
1239
|
-
end: assessment.end
|
|
1240
|
-
},
|
|
1241
|
-
{
|
|
1242
|
-
type: "TextQuoteSelector",
|
|
1243
|
-
exact: assessment.exact,
|
|
1244
|
-
...assessment.prefix && { prefix: assessment.prefix },
|
|
1245
|
-
...assessment.suffix && { suffix: assessment.suffix }
|
|
2404
|
+
return annotations.map((ann) => {
|
|
2405
|
+
if (ann.motivation === "linking" && ann.body) {
|
|
2406
|
+
const body = Array.isArray(ann.body) ? ann.body : [ann.body];
|
|
2407
|
+
for (const item of body) {
|
|
2408
|
+
if (item.type === "SpecificResource" && item.purpose === "linking" && item.source) {
|
|
2409
|
+
const metadata = uriToMetadata.get(item.source);
|
|
2410
|
+
if (metadata) {
|
|
2411
|
+
return {
|
|
2412
|
+
...ann,
|
|
2413
|
+
_resolvedDocumentName: metadata.name,
|
|
2414
|
+
_resolvedDocumentMediaType: metadata.mediaType
|
|
2415
|
+
};
|
|
2416
|
+
}
|
|
1246
2417
|
}
|
|
1247
|
-
|
|
1248
|
-
},
|
|
1249
|
-
"body": {
|
|
1250
|
-
type: "TextualBody",
|
|
1251
|
-
value: assessment.assessment,
|
|
1252
|
-
format: "text/plain"
|
|
2418
|
+
}
|
|
1253
2419
|
}
|
|
1254
|
-
|
|
1255
|
-
await this.eventStore.appendEvent({
|
|
1256
|
-
type: "annotation.added",
|
|
1257
|
-
resourceId: resourceId2,
|
|
1258
|
-
userId: userId3(creatorUserId),
|
|
1259
|
-
version: 1,
|
|
1260
|
-
payload: { annotation }
|
|
2420
|
+
return ann;
|
|
1261
2421
|
});
|
|
1262
2422
|
}
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
this.config = config;
|
|
1275
|
-
this.eventStore = eventStore;
|
|
1276
|
-
}
|
|
1277
|
-
isFirstProgress = true;
|
|
1278
|
-
getWorkerName() {
|
|
1279
|
-
return "TagDetectionWorker";
|
|
1280
|
-
}
|
|
1281
|
-
canProcessJob(job) {
|
|
1282
|
-
return job.type === "tag-detection";
|
|
2423
|
+
/**
|
|
2424
|
+
* Get resource stats (version info)
|
|
2425
|
+
* @returns Version and timestamp info for the annotations
|
|
2426
|
+
*/
|
|
2427
|
+
static async getResourceStats(resourceId2, config) {
|
|
2428
|
+
const annotations = await this.getResourceAnnotations(resourceId2, config);
|
|
2429
|
+
return {
|
|
2430
|
+
resourceId: annotations.resourceId,
|
|
2431
|
+
version: annotations.version,
|
|
2432
|
+
updatedAt: annotations.updatedAt
|
|
2433
|
+
};
|
|
1283
2434
|
}
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
2435
|
+
/**
|
|
2436
|
+
* Check if resource exists in view storage
|
|
2437
|
+
*/
|
|
2438
|
+
static async resourceExists(resourceId2, config) {
|
|
2439
|
+
if (!config.services?.filesystem?.path) {
|
|
2440
|
+
throw new Error("Filesystem path not found in configuration");
|
|
1287
2441
|
}
|
|
1288
|
-
|
|
1289
|
-
|
|
2442
|
+
const basePath = config.services.filesystem.path;
|
|
2443
|
+
const projectRoot = config._metadata?.projectRoot;
|
|
2444
|
+
const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
|
|
2445
|
+
return await viewStorage.exists(resourceId2);
|
|
1290
2446
|
}
|
|
1291
2447
|
/**
|
|
1292
|
-
*
|
|
2448
|
+
* Get a single annotation by ID
|
|
2449
|
+
* O(1) lookup using resource ID to access view storage
|
|
1293
2450
|
*/
|
|
1294
|
-
async
|
|
1295
|
-
await
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
...baseEvent,
|
|
1310
|
-
payload: {
|
|
1311
|
-
jobId: tdJob.id,
|
|
1312
|
-
jobType: tdJob.type
|
|
1313
|
-
}
|
|
1314
|
-
});
|
|
1315
|
-
} else if (isComplete) {
|
|
1316
|
-
await this.eventStore.appendEvent({
|
|
1317
|
-
type: "job.completed",
|
|
1318
|
-
...baseEvent,
|
|
1319
|
-
payload: {
|
|
1320
|
-
jobId: tdJob.id,
|
|
1321
|
-
jobType: tdJob.type,
|
|
1322
|
-
result: tdJob.result
|
|
1323
|
-
}
|
|
1324
|
-
});
|
|
1325
|
-
} else {
|
|
1326
|
-
await this.eventStore.appendEvent({
|
|
1327
|
-
type: "job.progress",
|
|
1328
|
-
...baseEvent,
|
|
1329
|
-
payload: {
|
|
1330
|
-
jobId: tdJob.id,
|
|
1331
|
-
jobType: tdJob.type,
|
|
1332
|
-
progress: tdJob.progress
|
|
1333
|
-
}
|
|
1334
|
-
});
|
|
2451
|
+
static async getAnnotation(annotationId2, resourceId2, config) {
|
|
2452
|
+
const annotations = await this.getResourceAnnotations(resourceId2, config);
|
|
2453
|
+
return annotations.annotations.find((a) => {
|
|
2454
|
+
const shortId = a.id.split("/").pop();
|
|
2455
|
+
return shortId === annotationId2;
|
|
2456
|
+
}) || null;
|
|
2457
|
+
}
|
|
2458
|
+
/**
|
|
2459
|
+
* List annotations with optional filtering
|
|
2460
|
+
* @param filters - Optional filters like resourceId and type
|
|
2461
|
+
* @throws Error if resourceId not provided (cross-resource queries not supported in view storage)
|
|
2462
|
+
*/
|
|
2463
|
+
static async listAnnotations(filters, config) {
|
|
2464
|
+
if (!filters?.resourceId) {
|
|
2465
|
+
throw new Error("resourceId is required for annotation listing - cross-resource queries not supported in view storage");
|
|
1335
2466
|
}
|
|
2467
|
+
return await this.getAllAnnotations(filters.resourceId, config);
|
|
1336
2468
|
}
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
jobId: tdJob.id,
|
|
1348
|
-
jobType: tdJob.type,
|
|
1349
|
-
error: "Tag detection failed. Please try again later."
|
|
1350
|
-
}
|
|
1351
|
-
});
|
|
2469
|
+
/**
|
|
2470
|
+
* Get annotation context (selected text with surrounding context)
|
|
2471
|
+
*/
|
|
2472
|
+
static async getAnnotationContext(annotationId2, resourceId2, contextBefore, contextAfter, config) {
|
|
2473
|
+
const basePath = config.services.filesystem.path;
|
|
2474
|
+
const projectRoot = config._metadata?.projectRoot;
|
|
2475
|
+
const repStore = new FilesystemRepresentationStore5({ basePath }, projectRoot);
|
|
2476
|
+
const annotation = await this.getAnnotation(annotationId2, resourceId2, config);
|
|
2477
|
+
if (!annotation) {
|
|
2478
|
+
throw new Error("Annotation not found");
|
|
1352
2479
|
}
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
if (!
|
|
1358
|
-
throw new Error(
|
|
2480
|
+
const resource = await ResourceContext.getResourceMetadata(
|
|
2481
|
+
uriToResourceId(getTargetSource(annotation.target)),
|
|
2482
|
+
config
|
|
2483
|
+
);
|
|
2484
|
+
if (!resource) {
|
|
2485
|
+
throw new Error("Resource not found");
|
|
1359
2486
|
}
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
2487
|
+
const contentStr = await this.getResourceContent(resource, repStore);
|
|
2488
|
+
const context = this.extractAnnotationContext(annotation, contentStr, contextBefore, contextAfter);
|
|
2489
|
+
return {
|
|
2490
|
+
annotation,
|
|
2491
|
+
context,
|
|
2492
|
+
resource: {
|
|
2493
|
+
"@context": resource["@context"],
|
|
2494
|
+
"@id": resource["@id"],
|
|
2495
|
+
name: resource.name,
|
|
2496
|
+
entityTypes: resource.entityTypes,
|
|
2497
|
+
representations: resource.representations,
|
|
2498
|
+
archived: resource.archived,
|
|
2499
|
+
creationMethod: resource.creationMethod,
|
|
2500
|
+
wasAttributedTo: resource.wasAttributedTo,
|
|
2501
|
+
dateCreated: resource.dateCreated
|
|
1363
2502
|
}
|
|
2503
|
+
};
|
|
2504
|
+
}
|
|
2505
|
+
/**
|
|
2506
|
+
* Generate AI summary of annotation in context
|
|
2507
|
+
*/
|
|
2508
|
+
static async generateAnnotationSummary(annotationId2, resourceId2, config) {
|
|
2509
|
+
const basePath = config.services.filesystem.path;
|
|
2510
|
+
const projectRoot = config._metadata?.projectRoot;
|
|
2511
|
+
const repStore = new FilesystemRepresentationStore5({ basePath }, projectRoot);
|
|
2512
|
+
const annotation = await this.getAnnotation(annotationId2, resourceId2, config);
|
|
2513
|
+
if (!annotation) {
|
|
2514
|
+
throw new Error("Annotation not found");
|
|
1364
2515
|
}
|
|
1365
|
-
const resource = await ResourceContext.getResourceMetadata(
|
|
2516
|
+
const resource = await ResourceContext.getResourceMetadata(
|
|
2517
|
+
uriToResourceId(getTargetSource(annotation.target)),
|
|
2518
|
+
config
|
|
2519
|
+
);
|
|
1366
2520
|
if (!resource) {
|
|
1367
|
-
throw new Error(
|
|
1368
|
-
}
|
|
1369
|
-
job.progress = {
|
|
1370
|
-
stage: "analyzing",
|
|
1371
|
-
percentage: 10,
|
|
1372
|
-
processedCategories: 0,
|
|
1373
|
-
totalCategories: job.categories.length,
|
|
1374
|
-
message: "Loading resource..."
|
|
1375
|
-
};
|
|
1376
|
-
await this.updateJobProgress(job);
|
|
1377
|
-
const allTags = [];
|
|
1378
|
-
const byCategory = {};
|
|
1379
|
-
for (let i = 0; i < job.categories.length; i++) {
|
|
1380
|
-
const category = job.categories[i];
|
|
1381
|
-
job.progress = {
|
|
1382
|
-
stage: "analyzing",
|
|
1383
|
-
percentage: 10 + Math.floor(i / job.categories.length * 50),
|
|
1384
|
-
currentCategory: category,
|
|
1385
|
-
processedCategories: i + 1,
|
|
1386
|
-
totalCategories: job.categories.length,
|
|
1387
|
-
message: `Analyzing ${category}...`
|
|
1388
|
-
};
|
|
1389
|
-
await this.updateJobProgress(job);
|
|
1390
|
-
const tags = await AnnotationDetection.detectTags(
|
|
1391
|
-
job.resourceId,
|
|
1392
|
-
this.config,
|
|
1393
|
-
job.schemaId,
|
|
1394
|
-
category
|
|
1395
|
-
);
|
|
1396
|
-
console.log(`[TagDetectionWorker] Found ${tags.length} tags for category "${category}"`);
|
|
1397
|
-
allTags.push(...tags);
|
|
1398
|
-
byCategory[category] = tags.length;
|
|
2521
|
+
throw new Error("Resource not found");
|
|
1399
2522
|
}
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
2523
|
+
const contentStr = await this.getResourceContent(resource, repStore);
|
|
2524
|
+
const contextSize = 500;
|
|
2525
|
+
const context = this.extractAnnotationContext(annotation, contentStr, contextSize, contextSize);
|
|
2526
|
+
const annotationEntityTypes = getEntityTypes2(annotation);
|
|
2527
|
+
const summary = await this.generateSummary(resource, context, annotationEntityTypes, config);
|
|
2528
|
+
return {
|
|
2529
|
+
summary,
|
|
2530
|
+
relevantFields: {
|
|
2531
|
+
resourceId: resource.id,
|
|
2532
|
+
resourceName: resource.name,
|
|
2533
|
+
entityTypes: annotationEntityTypes
|
|
2534
|
+
},
|
|
2535
|
+
context: {
|
|
2536
|
+
before: context.before.substring(Math.max(0, context.before.length - 200)),
|
|
2537
|
+
// Last 200 chars
|
|
2538
|
+
selected: context.selected,
|
|
2539
|
+
after: context.after.substring(0, 200)
|
|
2540
|
+
// First 200 chars
|
|
1415
2541
|
}
|
|
1416
|
-
}
|
|
1417
|
-
job.result = {
|
|
1418
|
-
tagsFound: allTags.length,
|
|
1419
|
-
tagsCreated: created,
|
|
1420
|
-
byCategory
|
|
1421
2542
|
};
|
|
1422
|
-
job.progress = {
|
|
1423
|
-
stage: "creating",
|
|
1424
|
-
percentage: 100,
|
|
1425
|
-
processedCategories: job.categories.length,
|
|
1426
|
-
totalCategories: job.categories.length,
|
|
1427
|
-
message: `Complete! Created ${created} tags`
|
|
1428
|
-
};
|
|
1429
|
-
await this.updateJobProgress(job);
|
|
1430
|
-
console.log(`[TagDetectionWorker] \u2705 Created ${created}/${allTags.length} tags across ${job.categories.length} categories`);
|
|
1431
2543
|
}
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
2544
|
+
/**
|
|
2545
|
+
* Get resource content as string
|
|
2546
|
+
*/
|
|
2547
|
+
static async getResourceContent(resource, repStore) {
|
|
2548
|
+
const primaryRep = getPrimaryRepresentation3(resource);
|
|
2549
|
+
if (!primaryRep?.checksum || !primaryRep?.mediaType) {
|
|
2550
|
+
throw new Error("Resource content not found");
|
|
1436
2551
|
}
|
|
1437
|
-
const
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
suffix: tag.suffix || ""
|
|
1458
|
-
}
|
|
1459
|
-
]
|
|
1460
|
-
},
|
|
1461
|
-
body: [
|
|
1462
|
-
{
|
|
1463
|
-
type: "TextualBody",
|
|
1464
|
-
value: tag.category,
|
|
1465
|
-
purpose: "tagging",
|
|
1466
|
-
format: "text/plain",
|
|
1467
|
-
language: "en"
|
|
1468
|
-
},
|
|
1469
|
-
{
|
|
1470
|
-
type: "TextualBody",
|
|
1471
|
-
value: schemaId,
|
|
1472
|
-
purpose: "classifying",
|
|
1473
|
-
format: "text/plain"
|
|
1474
|
-
}
|
|
1475
|
-
]
|
|
2552
|
+
const content = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
|
|
2553
|
+
return decodeRepresentation3(content, primaryRep.mediaType);
|
|
2554
|
+
}
|
|
2555
|
+
/**
|
|
2556
|
+
* Extract annotation context from resource content
|
|
2557
|
+
*/
|
|
2558
|
+
static extractAnnotationContext(annotation, contentStr, contextBefore, contextAfter) {
|
|
2559
|
+
const targetSelector = getTargetSelector2(annotation.target);
|
|
2560
|
+
const posSelector = targetSelector ? getTextPositionSelector(targetSelector) : null;
|
|
2561
|
+
if (!posSelector) {
|
|
2562
|
+
throw new Error("TextPositionSelector required for context");
|
|
2563
|
+
}
|
|
2564
|
+
const selStart = posSelector.start;
|
|
2565
|
+
const selEnd = posSelector.end;
|
|
2566
|
+
const start = Math.max(0, selStart - contextBefore);
|
|
2567
|
+
const end = Math.min(contentStr.length, selEnd + contextAfter);
|
|
2568
|
+
return {
|
|
2569
|
+
before: contentStr.substring(start, selStart),
|
|
2570
|
+
selected: contentStr.substring(selStart, selEnd),
|
|
2571
|
+
after: contentStr.substring(selEnd, end)
|
|
1476
2572
|
};
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
2573
|
+
}
|
|
2574
|
+
/**
|
|
2575
|
+
* Generate LLM summary of annotation in context
|
|
2576
|
+
* Creates inference client per-request (HTTP handler context)
|
|
2577
|
+
*/
|
|
2578
|
+
static async generateSummary(resource, context, entityTypes, config) {
|
|
2579
|
+
const summaryPrompt = `Summarize this text in context:
|
|
2580
|
+
|
|
2581
|
+
Context before: "${context.before.substring(Math.max(0, context.before.length - 200))}"
|
|
2582
|
+
Selected exact: "${context.selected}"
|
|
2583
|
+
Context after: "${context.after.substring(0, 200)}"
|
|
2584
|
+
|
|
2585
|
+
Resource: ${resource.name}
|
|
2586
|
+
Entity types: ${entityTypes.join(", ")}`;
|
|
2587
|
+
const client = await getInferenceClient2(config);
|
|
2588
|
+
return await client.generateText(summaryPrompt, 500, 0.5);
|
|
1487
2589
|
}
|
|
1488
2590
|
};
|
|
1489
2591
|
|
|
1490
|
-
// src/
|
|
1491
|
-
import {
|
|
1492
|
-
import { generateAnnotationId as generateAnnotationId5 } from "@semiont/event-sourcing";
|
|
2592
|
+
// src/graph-context.ts
|
|
2593
|
+
import { getGraphDatabase as getGraphDatabase2 } from "@semiont/graph";
|
|
1493
2594
|
import { resourceIdToURI as resourceIdToURI6 } from "@semiont/core";
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
super(jobQueue);
|
|
1504
|
-
this.config = config;
|
|
1505
|
-
this.eventStore = eventStore;
|
|
2595
|
+
var GraphContext = class {
|
|
2596
|
+
/**
|
|
2597
|
+
* Get all resources referencing this resource (backlinks)
|
|
2598
|
+
* Requires graph traversal - must use graph database
|
|
2599
|
+
*/
|
|
2600
|
+
static async getBacklinks(resourceId2, config) {
|
|
2601
|
+
const graphDb = await getGraphDatabase2(config);
|
|
2602
|
+
const resourceUri3 = resourceIdToURI6(resourceId2, config.services.backend.publicURL);
|
|
2603
|
+
return await graphDb.getResourceReferencedBy(resourceUri3);
|
|
1506
2604
|
}
|
|
1507
|
-
|
|
1508
|
-
|
|
2605
|
+
/**
|
|
2606
|
+
* Find shortest path between two resources
|
|
2607
|
+
* Requires graph traversal - must use graph database
|
|
2608
|
+
*/
|
|
2609
|
+
static async findPath(fromResourceId, toResourceId, config, maxDepth) {
|
|
2610
|
+
const graphDb = await getGraphDatabase2(config);
|
|
2611
|
+
return await graphDb.findPath(fromResourceId, toResourceId, maxDepth);
|
|
1509
2612
|
}
|
|
1510
|
-
|
|
1511
|
-
|
|
2613
|
+
/**
|
|
2614
|
+
* Get resource connections (graph edges)
|
|
2615
|
+
* Requires graph traversal - must use graph database
|
|
2616
|
+
*/
|
|
2617
|
+
static async getResourceConnections(resourceId2, config) {
|
|
2618
|
+
const graphDb = await getGraphDatabase2(config);
|
|
2619
|
+
return await graphDb.getResourceConnections(resourceId2);
|
|
2620
|
+
}
|
|
2621
|
+
/**
|
|
2622
|
+
* Search resources by name (cross-resource query)
|
|
2623
|
+
* Requires full-text search - must use graph database
|
|
2624
|
+
*/
|
|
2625
|
+
static async searchResources(query, config, limit) {
|
|
2626
|
+
const graphDb = await getGraphDatabase2(config);
|
|
2627
|
+
return await graphDb.searchResources(query, limit);
|
|
2628
|
+
}
|
|
2629
|
+
};
|
|
2630
|
+
|
|
2631
|
+
// src/annotation-detection.ts
|
|
2632
|
+
import { FilesystemRepresentationStore as FilesystemRepresentationStore6 } from "@semiont/content";
|
|
2633
|
+
import { getPrimaryRepresentation as getPrimaryRepresentation4, decodeRepresentation as decodeRepresentation4 } from "@semiont/api-client";
|
|
2634
|
+
|
|
2635
|
+
// src/detection/motivation-prompts.ts
|
|
2636
|
+
var MotivationPrompts = class {
|
|
2637
|
+
/**
|
|
2638
|
+
* Build a prompt for detecting comment-worthy passages
|
|
2639
|
+
*
|
|
2640
|
+
* @param content - The text content to analyze (will be truncated to 8000 chars)
|
|
2641
|
+
* @param instructions - Optional user-provided instructions
|
|
2642
|
+
* @param tone - Optional tone guidance (e.g., "academic", "conversational")
|
|
2643
|
+
* @param density - Optional target number of comments per 2000 words
|
|
2644
|
+
* @returns Formatted prompt string
|
|
2645
|
+
*/
|
|
2646
|
+
static buildCommentPrompt(content, instructions, tone, density) {
|
|
2647
|
+
let prompt;
|
|
2648
|
+
if (instructions) {
|
|
2649
|
+
const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
|
|
2650
|
+
const densityGuidance = density ? `
|
|
2651
|
+
|
|
2652
|
+
Aim for approximately ${density} comments per 2000 words of text.` : "";
|
|
2653
|
+
prompt = `Add comments to passages in this text following these instructions:
|
|
2654
|
+
|
|
2655
|
+
${instructions}${toneGuidance}${densityGuidance}
|
|
2656
|
+
|
|
2657
|
+
Text to analyze:
|
|
2658
|
+
---
|
|
2659
|
+
${content.substring(0, 8e3)}
|
|
2660
|
+
---
|
|
2661
|
+
|
|
2662
|
+
Return a JSON array of comments. Each comment must have:
|
|
2663
|
+
- "exact": the exact text passage being commented on (quoted verbatim from source)
|
|
2664
|
+
- "start": character offset where the passage starts
|
|
2665
|
+
- "end": character offset where the passage ends
|
|
2666
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2667
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2668
|
+
- "comment": your comment following the instructions above
|
|
2669
|
+
|
|
2670
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2671
|
+
|
|
2672
|
+
Example:
|
|
2673
|
+
[
|
|
2674
|
+
{"exact": "the quarterly review meeting", "start": 142, "end": 169, "prefix": "We need to schedule ", "suffix": " for next month.", "comment": "Who will lead this? Should we invite the external auditors?"}
|
|
2675
|
+
]`;
|
|
2676
|
+
} else {
|
|
2677
|
+
const toneGuidance = tone ? `
|
|
2678
|
+
|
|
2679
|
+
Tone: Use a ${tone} style in your comments.` : "";
|
|
2680
|
+
const densityGuidance = density ? `
|
|
2681
|
+
- Aim for approximately ${density} comments per 2000 words` : `
|
|
2682
|
+
- Aim for 3-8 comments per 2000 words (not too sparse or dense)`;
|
|
2683
|
+
prompt = `Identify passages in this text that would benefit from explanatory comments.
|
|
2684
|
+
For each passage, provide contextual information, clarification, or background.${toneGuidance}
|
|
2685
|
+
|
|
2686
|
+
Guidelines:
|
|
2687
|
+
- Select passages that reference technical terms, historical figures, complex concepts, or unclear references
|
|
2688
|
+
- Provide comments that ADD VALUE beyond restating the text
|
|
2689
|
+
- Focus on explanation, background, or connections to other ideas
|
|
2690
|
+
- Avoid obvious or trivial comments
|
|
2691
|
+
- Keep comments concise (1-3 sentences typically)${densityGuidance}
|
|
2692
|
+
|
|
2693
|
+
Text to analyze:
|
|
2694
|
+
---
|
|
2695
|
+
${content.substring(0, 8e3)}
|
|
2696
|
+
---
|
|
2697
|
+
|
|
2698
|
+
Return a JSON array of comments. Each comment should have:
|
|
2699
|
+
- "exact": the exact text passage being commented on (quoted verbatim from source)
|
|
2700
|
+
- "start": character offset where the passage starts
|
|
2701
|
+
- "end": character offset where the passage ends
|
|
2702
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2703
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2704
|
+
- "comment": your explanatory comment (1-3 sentences, provide context/background/clarification)
|
|
2705
|
+
|
|
2706
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2707
|
+
|
|
2708
|
+
Example format:
|
|
2709
|
+
[
|
|
2710
|
+
{"exact": "Ouranos", "start": 52, "end": 59, "prefix": "In the beginning, ", "suffix": " ruled the universe", "comment": "Ouranos (also spelled Uranus) is the primordial Greek deity personifying the sky. In Hesiod's Theogony, he is the son and husband of Gaia (Earth) and father of the Titans."}
|
|
2711
|
+
]`;
|
|
2712
|
+
}
|
|
2713
|
+
return prompt;
|
|
2714
|
+
}
|
|
2715
|
+
/**
|
|
2716
|
+
* Build a prompt for detecting highlight-worthy passages
|
|
2717
|
+
*
|
|
2718
|
+
* @param content - The text content to analyze (will be truncated to 8000 chars)
|
|
2719
|
+
* @param instructions - Optional user-provided instructions
|
|
2720
|
+
* @param density - Optional target number of highlights per 2000 words
|
|
2721
|
+
* @returns Formatted prompt string
|
|
2722
|
+
*/
|
|
2723
|
+
static buildHighlightPrompt(content, instructions, density) {
|
|
2724
|
+
let prompt;
|
|
2725
|
+
if (instructions) {
|
|
2726
|
+
const densityGuidance = density ? `
|
|
2727
|
+
|
|
2728
|
+
Aim for approximately ${density} highlights per 2000 words of text.` : "";
|
|
2729
|
+
prompt = `Identify passages in this text to highlight following these instructions:
|
|
2730
|
+
|
|
2731
|
+
${instructions}${densityGuidance}
|
|
2732
|
+
|
|
2733
|
+
Text to analyze:
|
|
2734
|
+
---
|
|
2735
|
+
${content.substring(0, 8e3)}
|
|
2736
|
+
---
|
|
2737
|
+
|
|
2738
|
+
Return a JSON array of highlights. Each highlight must have:
|
|
2739
|
+
- "exact": the exact text passage to highlight (quoted verbatim from source)
|
|
2740
|
+
- "start": character offset where the passage starts
|
|
2741
|
+
- "end": character offset where the passage ends
|
|
2742
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2743
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2744
|
+
|
|
2745
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2746
|
+
|
|
2747
|
+
Example:
|
|
2748
|
+
[
|
|
2749
|
+
{"exact": "revenue grew 45% year-over-year", "start": 142, "end": 174, "prefix": "In Q3 2024, ", "suffix": ", exceeding all forecasts."}
|
|
2750
|
+
]`;
|
|
2751
|
+
} else {
|
|
2752
|
+
const densityGuidance = density ? `
|
|
2753
|
+
- Aim for approximately ${density} highlights per 2000 words` : `
|
|
2754
|
+
- Aim for 3-8 highlights per 2000 words (be selective)`;
|
|
2755
|
+
prompt = `Identify passages in this text that merit highlighting for their importance or salience.
|
|
2756
|
+
Focus on content that readers should notice and remember.
|
|
2757
|
+
|
|
2758
|
+
Guidelines:
|
|
2759
|
+
- Highlight key claims, findings, or conclusions
|
|
2760
|
+
- Highlight important definitions, terminology, or concepts
|
|
2761
|
+
- Highlight notable quotes or particularly striking statements
|
|
2762
|
+
- Highlight critical decisions, action items, or turning points
|
|
2763
|
+
- Select passages that are SIGNIFICANT, not just interesting
|
|
2764
|
+
- Avoid trivial or obvious content${densityGuidance}
|
|
2765
|
+
|
|
2766
|
+
Text to analyze:
|
|
2767
|
+
---
|
|
2768
|
+
${content.substring(0, 8e3)}
|
|
2769
|
+
---
|
|
2770
|
+
|
|
2771
|
+
Return a JSON array of highlights. Each highlight should have:
|
|
2772
|
+
- "exact": the exact text passage to highlight (quoted verbatim from source)
|
|
2773
|
+
- "start": character offset where the passage starts
|
|
2774
|
+
- "end": character offset where the passage ends
|
|
2775
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2776
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2777
|
+
|
|
2778
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2779
|
+
|
|
2780
|
+
Example format:
|
|
2781
|
+
[
|
|
2782
|
+
{"exact": "we will discontinue support for legacy systems by March 2025", "start": 52, "end": 113, "prefix": "After careful consideration, ", "suffix": ". This decision affects"}
|
|
2783
|
+
]`;
|
|
2784
|
+
}
|
|
2785
|
+
return prompt;
|
|
1512
2786
|
}
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
2787
|
+
/**
|
|
2788
|
+
* Build a prompt for detecting assessment-worthy passages
|
|
2789
|
+
*
|
|
2790
|
+
* @param content - The text content to analyze (will be truncated to 8000 chars)
|
|
2791
|
+
* @param instructions - Optional user-provided instructions
|
|
2792
|
+
* @param tone - Optional tone guidance (e.g., "critical", "supportive")
|
|
2793
|
+
* @param density - Optional target number of assessments per 2000 words
|
|
2794
|
+
* @returns Formatted prompt string
|
|
2795
|
+
*/
|
|
2796
|
+
static buildAssessmentPrompt(content, instructions, tone, density) {
|
|
2797
|
+
let prompt;
|
|
2798
|
+
if (instructions) {
|
|
2799
|
+
const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
|
|
2800
|
+
const densityGuidance = density ? `
|
|
2801
|
+
|
|
2802
|
+
Aim for approximately ${density} assessments per 2000 words of text.` : "";
|
|
2803
|
+
prompt = `Assess passages in this text following these instructions:
|
|
2804
|
+
|
|
2805
|
+
${instructions}${toneGuidance}${densityGuidance}
|
|
2806
|
+
|
|
2807
|
+
Text to analyze:
|
|
2808
|
+
---
|
|
2809
|
+
${content.substring(0, 8e3)}
|
|
2810
|
+
---
|
|
2811
|
+
|
|
2812
|
+
Return a JSON array of assessments. Each assessment must have:
|
|
2813
|
+
- "exact": the exact text passage being assessed (quoted verbatim from source)
|
|
2814
|
+
- "start": character offset where the passage starts
|
|
2815
|
+
- "end": character offset where the passage ends
|
|
2816
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2817
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2818
|
+
- "assessment": your assessment following the instructions above
|
|
2819
|
+
|
|
2820
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2821
|
+
|
|
2822
|
+
Example:
|
|
2823
|
+
[
|
|
2824
|
+
{"exact": "the quarterly revenue target", "start": 142, "end": 169, "prefix": "We established ", "suffix": " for Q4 2024.", "assessment": "This target seems ambitious given market conditions. Consider revising based on recent trends."}
|
|
2825
|
+
]`;
|
|
2826
|
+
} else {
|
|
2827
|
+
const toneGuidance = tone ? `
|
|
2828
|
+
|
|
2829
|
+
Tone: Use a ${tone} style in your assessments.` : "";
|
|
2830
|
+
const densityGuidance = density ? `
|
|
2831
|
+
- Aim for approximately ${density} assessments per 2000 words` : `
|
|
2832
|
+
- Aim for 2-6 assessments per 2000 words (focus on key passages)`;
|
|
2833
|
+
prompt = `Identify passages in this text that merit critical assessment or evaluation.
|
|
2834
|
+
For each passage, provide analysis of its validity, strength, or implications.${toneGuidance}
|
|
2835
|
+
|
|
2836
|
+
Guidelines:
|
|
2837
|
+
- Select passages containing claims, arguments, conclusions, or assertions
|
|
2838
|
+
- Assess evidence quality, logical soundness, or practical implications
|
|
2839
|
+
- Provide assessments that ADD INSIGHT beyond restating the text
|
|
2840
|
+
- Focus on passages where evaluation would help readers form judgments
|
|
2841
|
+
- Keep assessments concise yet substantive (1-3 sentences typically)${densityGuidance}
|
|
2842
|
+
|
|
2843
|
+
Text to analyze:
|
|
2844
|
+
---
|
|
2845
|
+
${content.substring(0, 8e3)}
|
|
2846
|
+
---
|
|
2847
|
+
|
|
2848
|
+
Return a JSON array of assessments. Each assessment should have:
|
|
2849
|
+
- "exact": the exact text passage being assessed (quoted verbatim from source)
|
|
2850
|
+
- "start": character offset where the passage starts
|
|
2851
|
+
- "end": character offset where the passage ends
|
|
2852
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2853
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2854
|
+
- "assessment": your analytical assessment (1-3 sentences, evaluate validity/strength/implications)
|
|
2855
|
+
|
|
2856
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2857
|
+
|
|
2858
|
+
Example format:
|
|
2859
|
+
[
|
|
2860
|
+
{"exact": "AI will replace most jobs by 2030", "start": 52, "end": 89, "prefix": "Many experts predict that ", "suffix": ", fundamentally reshaping", "assessment": "This claim lacks nuance and supporting evidence. Employment patterns historically show job transformation rather than wholesale replacement. The timeline appears speculative without specific sector analysis."}
|
|
2861
|
+
]`;
|
|
1516
2862
|
}
|
|
1517
|
-
|
|
2863
|
+
return prompt;
|
|
1518
2864
|
}
|
|
1519
2865
|
/**
|
|
1520
|
-
*
|
|
1521
|
-
* Self-contained implementation for reference detection
|
|
2866
|
+
* Build a prompt for detecting structural tags
|
|
1522
2867
|
*
|
|
1523
|
-
*
|
|
2868
|
+
* @param content - The full text content to analyze (NOT truncated for structural analysis)
|
|
2869
|
+
* @param category - The specific category to detect
|
|
2870
|
+
* @param schemaName - Human-readable schema name
|
|
2871
|
+
* @param schemaDescription - Schema description
|
|
2872
|
+
* @param schemaDomain - Schema domain
|
|
2873
|
+
* @param categoryDescription - Category description
|
|
2874
|
+
* @param categoryExamples - Example questions/guidance for this category
|
|
2875
|
+
* @returns Formatted prompt string
|
|
1524
2876
|
*/
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
2877
|
+
static buildTagPrompt(content, category, schemaName, schemaDescription, schemaDomain, categoryDescription, categoryExamples) {
|
|
2878
|
+
const prompt = `You are analyzing a text using the ${schemaName} framework.
|
|
2879
|
+
|
|
2880
|
+
Schema: ${schemaDescription}
|
|
2881
|
+
Domain: ${schemaDomain}
|
|
2882
|
+
|
|
2883
|
+
Your task: Identify passages that serve the structural role of "${category}".
|
|
2884
|
+
|
|
2885
|
+
Category: ${category}
|
|
2886
|
+
Description: ${categoryDescription}
|
|
2887
|
+
Key questions:
|
|
2888
|
+
${categoryExamples.map((ex) => `- ${ex}`).join("\n")}
|
|
2889
|
+
|
|
2890
|
+
Guidelines:
|
|
2891
|
+
- Focus on STRUCTURAL FUNCTION, not semantic content
|
|
2892
|
+
- A passage serves the "${category}" role if it performs this function in the document's structure
|
|
2893
|
+
- Look for passages that explicitly fulfill this role
|
|
2894
|
+
- Passages can be sentences, paragraphs, or sections
|
|
2895
|
+
- Aim for precision - only tag passages that clearly serve this structural role
|
|
2896
|
+
- Typical documents have 1-5 instances of each category (some may have 0)
|
|
2897
|
+
|
|
2898
|
+
Text to analyze:
|
|
2899
|
+
---
|
|
2900
|
+
${content}
|
|
2901
|
+
---
|
|
2902
|
+
|
|
2903
|
+
Return a JSON array of tags. Each tag should have:
|
|
2904
|
+
- "exact": the exact text passage (quoted verbatim from source)
|
|
2905
|
+
- "start": character offset where the passage starts
|
|
2906
|
+
- "end": character offset where the passage ends
|
|
2907
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2908
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2909
|
+
|
|
2910
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2911
|
+
|
|
2912
|
+
Example format:
|
|
2913
|
+
[
|
|
2914
|
+
{"exact": "What duty did the defendant owe?", "start": 142, "end": 175, "prefix": "The central question is: ", "suffix": " This question must be"},
|
|
2915
|
+
{"exact": "In tort law, a duty of care is established when...", "start": 412, "end": 520, "prefix": "Legal framework:\\n", "suffix": "\\n\\nApplying this standard"}
|
|
2916
|
+
]`;
|
|
2917
|
+
return prompt;
|
|
2918
|
+
}
|
|
2919
|
+
};
|
|
2920
|
+
|
|
2921
|
+
// src/detection/motivation-parsers.ts
|
|
2922
|
+
import { validateAndCorrectOffsets as validateAndCorrectOffsets2 } from "@semiont/api-client";
|
|
2923
|
+
var MotivationParsers = class {
|
|
2924
|
+
/**
|
|
2925
|
+
* Parse and validate AI response for comment detection
|
|
2926
|
+
*
|
|
2927
|
+
* @param response - Raw AI response string (may include markdown code fences)
|
|
2928
|
+
* @param content - Original content to validate offsets against
|
|
2929
|
+
* @returns Array of validated comment matches
|
|
2930
|
+
*/
|
|
2931
|
+
static parseComments(response, content) {
|
|
2932
|
+
try {
|
|
2933
|
+
let cleaned = response.trim();
|
|
2934
|
+
if (cleaned.startsWith("```")) {
|
|
2935
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
2936
|
+
}
|
|
2937
|
+
const parsed = JSON.parse(cleaned);
|
|
2938
|
+
if (!Array.isArray(parsed)) {
|
|
2939
|
+
console.warn("[MotivationParsers] Comment response is not an array");
|
|
2940
|
+
return [];
|
|
2941
|
+
}
|
|
2942
|
+
const valid = parsed.filter(
|
|
2943
|
+
(c) => c && typeof c.exact === "string" && typeof c.start === "number" && typeof c.end === "number" && typeof c.comment === "string" && c.comment.trim().length > 0
|
|
2944
|
+
);
|
|
2945
|
+
console.log(`[MotivationParsers] Parsed ${valid.length} valid comments from ${parsed.length} total`);
|
|
2946
|
+
const validatedComments = [];
|
|
2947
|
+
for (const comment of valid) {
|
|
1541
2948
|
try {
|
|
1542
|
-
const validated =
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
selector: {
|
|
1551
|
-
start: validated.start,
|
|
1552
|
-
end: validated.end,
|
|
1553
|
-
exact: validated.exact,
|
|
1554
|
-
prefix: validated.prefix,
|
|
1555
|
-
suffix: validated.suffix
|
|
1556
|
-
},
|
|
1557
|
-
entityTypes: [entity.entityType]
|
|
1558
|
-
}
|
|
1559
|
-
};
|
|
1560
|
-
detectedAnnotations.push(annotation);
|
|
2949
|
+
const validated = validateAndCorrectOffsets2(content, comment.start, comment.end, comment.exact);
|
|
2950
|
+
validatedComments.push({
|
|
2951
|
+
...comment,
|
|
2952
|
+
start: validated.start,
|
|
2953
|
+
end: validated.end,
|
|
2954
|
+
prefix: validated.prefix,
|
|
2955
|
+
suffix: validated.suffix
|
|
2956
|
+
});
|
|
1561
2957
|
} catch (error) {
|
|
1562
|
-
console.warn(`[
|
|
2958
|
+
console.warn(`[MotivationParsers] Skipping invalid comment "${comment.exact}":`, error);
|
|
1563
2959
|
}
|
|
1564
2960
|
}
|
|
2961
|
+
return validatedComments;
|
|
2962
|
+
} catch (error) {
|
|
2963
|
+
console.error("[MotivationParsers] Failed to parse AI comment response:", error);
|
|
2964
|
+
return [];
|
|
1565
2965
|
}
|
|
1566
|
-
return detectedAnnotations;
|
|
1567
2966
|
}
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
};
|
|
1584
|
-
await this.updateJobProgress(job);
|
|
1585
|
-
for (let i = 0; i < job.entityTypes.length; i++) {
|
|
1586
|
-
const entityType = job.entityTypes[i];
|
|
1587
|
-
if (!entityType) continue;
|
|
1588
|
-
console.log(`[ReferenceDetectionWorker] \u{1F916} [${i + 1}/${job.entityTypes.length}] Detecting ${entityType}...`);
|
|
1589
|
-
const detectedAnnotations = await this.detectReferences(resource, [entityType], job.includeDescriptiveReferences);
|
|
1590
|
-
totalFound += detectedAnnotations.length;
|
|
1591
|
-
console.log(`[ReferenceDetectionWorker] \u2705 Found ${detectedAnnotations.length} ${entityType} entities`);
|
|
1592
|
-
for (let idx = 0; idx < detectedAnnotations.length; idx++) {
|
|
1593
|
-
const detected = detectedAnnotations[idx];
|
|
1594
|
-
if (!detected) {
|
|
1595
|
-
console.warn(`[ReferenceDetectionWorker] Skipping undefined entity at index ${idx}`);
|
|
1596
|
-
continue;
|
|
1597
|
-
}
|
|
1598
|
-
let referenceId;
|
|
1599
|
-
try {
|
|
1600
|
-
const backendUrl = this.config.services.backend?.publicURL;
|
|
1601
|
-
if (!backendUrl) {
|
|
1602
|
-
throw new Error("Backend publicURL not configured");
|
|
1603
|
-
}
|
|
1604
|
-
referenceId = generateAnnotationId5(backendUrl);
|
|
1605
|
-
} catch (error) {
|
|
1606
|
-
console.error(`[ReferenceDetectionWorker] Failed to generate annotation ID:`, error);
|
|
1607
|
-
job.status = "failed";
|
|
1608
|
-
job.error = "Configuration error: Backend publicURL not set";
|
|
1609
|
-
await this.updateJobProgress(job);
|
|
1610
|
-
return;
|
|
2967
|
+
/**
|
|
2968
|
+
* Parse and validate AI response for highlight detection
|
|
2969
|
+
*
|
|
2970
|
+
* @param response - Raw AI response string (may include markdown code fences)
|
|
2971
|
+
* @param content - Original content to validate offsets against
|
|
2972
|
+
* @returns Array of validated highlight matches
|
|
2973
|
+
*/
|
|
2974
|
+
static parseHighlights(response, content) {
|
|
2975
|
+
try {
|
|
2976
|
+
let cleaned = response.trim();
|
|
2977
|
+
if (cleaned.startsWith("```json") || cleaned.startsWith("```")) {
|
|
2978
|
+
cleaned = cleaned.slice(cleaned.indexOf("\n") + 1);
|
|
2979
|
+
const endIndex = cleaned.lastIndexOf("```");
|
|
2980
|
+
if (endIndex !== -1) {
|
|
2981
|
+
cleaned = cleaned.slice(0, endIndex);
|
|
1611
2982
|
}
|
|
2983
|
+
}
|
|
2984
|
+
const parsed = JSON.parse(cleaned);
|
|
2985
|
+
if (!Array.isArray(parsed)) {
|
|
2986
|
+
console.warn("[MotivationParsers] Highlight response was not an array");
|
|
2987
|
+
return [];
|
|
2988
|
+
}
|
|
2989
|
+
const highlights = parsed.filter(
|
|
2990
|
+
(h) => h && typeof h.exact === "string" && typeof h.start === "number" && typeof h.end === "number"
|
|
2991
|
+
);
|
|
2992
|
+
const validatedHighlights = [];
|
|
2993
|
+
for (const highlight of highlights) {
|
|
1612
2994
|
try {
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
1621
|
-
"type": "Annotation",
|
|
1622
|
-
id: referenceId,
|
|
1623
|
-
motivation: "linking",
|
|
1624
|
-
target: {
|
|
1625
|
-
source: resourceIdToURI6(job.resourceId, this.config.services.backend.publicURL),
|
|
1626
|
-
// Convert to full URI
|
|
1627
|
-
selector: [
|
|
1628
|
-
{
|
|
1629
|
-
type: "TextPositionSelector",
|
|
1630
|
-
start: detected.annotation.selector.start,
|
|
1631
|
-
end: detected.annotation.selector.end
|
|
1632
|
-
},
|
|
1633
|
-
{
|
|
1634
|
-
type: "TextQuoteSelector",
|
|
1635
|
-
exact: detected.annotation.selector.exact,
|
|
1636
|
-
...detected.annotation.selector.prefix && { prefix: detected.annotation.selector.prefix },
|
|
1637
|
-
...detected.annotation.selector.suffix && { suffix: detected.annotation.selector.suffix }
|
|
1638
|
-
}
|
|
1639
|
-
]
|
|
1640
|
-
},
|
|
1641
|
-
body: (detected.annotation.entityTypes || []).map((et) => ({
|
|
1642
|
-
type: "TextualBody",
|
|
1643
|
-
value: et,
|
|
1644
|
-
purpose: "tagging"
|
|
1645
|
-
})),
|
|
1646
|
-
modified: (/* @__PURE__ */ new Date()).toISOString()
|
|
1647
|
-
}
|
|
1648
|
-
}
|
|
2995
|
+
const validated = validateAndCorrectOffsets2(content, highlight.start, highlight.end, highlight.exact);
|
|
2996
|
+
validatedHighlights.push({
|
|
2997
|
+
...highlight,
|
|
2998
|
+
start: validated.start,
|
|
2999
|
+
end: validated.end,
|
|
3000
|
+
prefix: validated.prefix,
|
|
3001
|
+
suffix: validated.suffix
|
|
1649
3002
|
});
|
|
1650
|
-
totalEmitted++;
|
|
1651
|
-
if ((idx + 1) % 10 === 0 || idx === detectedAnnotations.length - 1) {
|
|
1652
|
-
console.log(`[ReferenceDetectionWorker] \u{1F4E4} Emitted ${idx + 1}/${detectedAnnotations.length} events for ${entityType}`);
|
|
1653
|
-
}
|
|
1654
3003
|
} catch (error) {
|
|
1655
|
-
|
|
1656
|
-
console.error(`[ReferenceDetectionWorker] \u274C Failed to emit event for ${referenceId}:`, error);
|
|
3004
|
+
console.warn(`[MotivationParsers] Skipping invalid highlight "${highlight.exact}":`, error);
|
|
1657
3005
|
}
|
|
1658
3006
|
}
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
entitiesFound: totalFound,
|
|
1665
|
-
entitiesEmitted: totalEmitted
|
|
1666
|
-
};
|
|
1667
|
-
await this.updateJobProgress(job);
|
|
1668
|
-
}
|
|
1669
|
-
job.result = {
|
|
1670
|
-
totalFound,
|
|
1671
|
-
totalEmitted,
|
|
1672
|
-
errors: totalErrors
|
|
1673
|
-
};
|
|
1674
|
-
console.log(`[ReferenceDetectionWorker] \u2705 Detection complete: ${totalFound} entities found, ${totalEmitted} events emitted, ${totalErrors} errors`);
|
|
1675
|
-
}
|
|
1676
|
-
async handleJobFailure(job, error) {
|
|
1677
|
-
await super.handleJobFailure(job, error);
|
|
1678
|
-
if (job.status === "failed" && job.type === "detection") {
|
|
1679
|
-
const detJob = job;
|
|
1680
|
-
await this.eventStore.appendEvent({
|
|
1681
|
-
type: "job.failed",
|
|
1682
|
-
resourceId: detJob.resourceId,
|
|
1683
|
-
userId: detJob.userId,
|
|
1684
|
-
version: 1,
|
|
1685
|
-
payload: {
|
|
1686
|
-
jobId: detJob.id,
|
|
1687
|
-
jobType: detJob.type,
|
|
1688
|
-
error: "Entity detection failed. Please try again later."
|
|
1689
|
-
}
|
|
1690
|
-
});
|
|
3007
|
+
return validatedHighlights;
|
|
3008
|
+
} catch (error) {
|
|
3009
|
+
console.error("[MotivationParsers] Failed to parse AI highlight response:", error);
|
|
3010
|
+
console.error("Raw response:", response);
|
|
3011
|
+
return [];
|
|
1691
3012
|
}
|
|
1692
3013
|
}
|
|
1693
3014
|
/**
|
|
1694
|
-
*
|
|
1695
|
-
*
|
|
3015
|
+
* Parse and validate AI response for assessment detection
|
|
3016
|
+
*
|
|
3017
|
+
* @param response - Raw AI response string (may include markdown code fences)
|
|
3018
|
+
* @param content - Original content to validate offsets against
|
|
3019
|
+
* @returns Array of validated assessment matches
|
|
1696
3020
|
*/
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
userId: detJob.userId,
|
|
1706
|
-
version: 1
|
|
1707
|
-
};
|
|
1708
|
-
if (!detJob.progress) {
|
|
1709
|
-
return;
|
|
1710
|
-
}
|
|
1711
|
-
const isFirstUpdate = detJob.progress.processedEntityTypes === 0;
|
|
1712
|
-
const isFinalUpdate = detJob.progress.processedEntityTypes === detJob.progress.totalEntityTypes && detJob.progress.totalEntityTypes > 0;
|
|
1713
|
-
if (isFirstUpdate) {
|
|
1714
|
-
await this.eventStore.appendEvent({
|
|
1715
|
-
type: "job.started",
|
|
1716
|
-
...baseEvent,
|
|
1717
|
-
payload: {
|
|
1718
|
-
jobId: detJob.id,
|
|
1719
|
-
jobType: detJob.type,
|
|
1720
|
-
totalSteps: detJob.entityTypes.length
|
|
1721
|
-
}
|
|
1722
|
-
});
|
|
1723
|
-
} else if (isFinalUpdate) {
|
|
1724
|
-
await this.eventStore.appendEvent({
|
|
1725
|
-
type: "job.completed",
|
|
1726
|
-
...baseEvent,
|
|
1727
|
-
payload: {
|
|
1728
|
-
jobId: detJob.id,
|
|
1729
|
-
jobType: detJob.type,
|
|
1730
|
-
foundCount: detJob.progress.entitiesFound
|
|
3021
|
+
static parseAssessments(response, content) {
|
|
3022
|
+
try {
|
|
3023
|
+
let cleaned = response.trim();
|
|
3024
|
+
if (cleaned.startsWith("```json") || cleaned.startsWith("```")) {
|
|
3025
|
+
cleaned = cleaned.slice(cleaned.indexOf("\n") + 1);
|
|
3026
|
+
const endIndex = cleaned.lastIndexOf("```");
|
|
3027
|
+
if (endIndex !== -1) {
|
|
3028
|
+
cleaned = cleaned.slice(0, endIndex);
|
|
1731
3029
|
}
|
|
1732
|
-
}
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
3030
|
+
}
|
|
3031
|
+
const parsed = JSON.parse(cleaned);
|
|
3032
|
+
if (!Array.isArray(parsed)) {
|
|
3033
|
+
console.warn("[MotivationParsers] Assessment response was not an array");
|
|
3034
|
+
return [];
|
|
3035
|
+
}
|
|
3036
|
+
const assessments = parsed.filter(
|
|
3037
|
+
(a) => a && typeof a.exact === "string" && typeof a.start === "number" && typeof a.end === "number" && typeof a.assessment === "string"
|
|
3038
|
+
);
|
|
3039
|
+
const validatedAssessments = [];
|
|
3040
|
+
for (const assessment of assessments) {
|
|
3041
|
+
try {
|
|
3042
|
+
const validated = validateAndCorrectOffsets2(content, assessment.start, assessment.end, assessment.exact);
|
|
3043
|
+
validatedAssessments.push({
|
|
3044
|
+
...assessment,
|
|
3045
|
+
start: validated.start,
|
|
3046
|
+
end: validated.end,
|
|
3047
|
+
prefix: validated.prefix,
|
|
3048
|
+
suffix: validated.suffix
|
|
3049
|
+
});
|
|
3050
|
+
} catch (error) {
|
|
3051
|
+
console.warn(`[MotivationParsers] Skipping invalid assessment "${assessment.exact}":`, error);
|
|
1746
3052
|
}
|
|
1747
|
-
}
|
|
3053
|
+
}
|
|
3054
|
+
return validatedAssessments;
|
|
3055
|
+
} catch (error) {
|
|
3056
|
+
console.error("[MotivationParsers] Failed to parse AI assessment response:", error);
|
|
3057
|
+
console.error("Raw response:", response);
|
|
3058
|
+
return [];
|
|
1748
3059
|
}
|
|
1749
3060
|
}
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
3061
|
+
/**
|
|
3062
|
+
* Parse and validate AI response for tag detection
|
|
3063
|
+
* Note: Does NOT validate offsets - caller must do that with content
|
|
3064
|
+
*
|
|
3065
|
+
* @param response - Raw AI response string (may include markdown code fences)
|
|
3066
|
+
* @returns Array of tag matches (offsets not yet validated)
|
|
3067
|
+
*/
|
|
3068
|
+
static parseTags(response) {
|
|
3069
|
+
try {
|
|
3070
|
+
let cleaned = response.trim();
|
|
3071
|
+
if (cleaned.startsWith("```")) {
|
|
3072
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
3073
|
+
}
|
|
3074
|
+
const parsed = JSON.parse(cleaned);
|
|
3075
|
+
if (!Array.isArray(parsed)) {
|
|
3076
|
+
console.warn("[MotivationParsers] Tag response is not an array");
|
|
3077
|
+
return [];
|
|
3078
|
+
}
|
|
3079
|
+
const valid = parsed.filter(
|
|
3080
|
+
(t) => t && typeof t.exact === "string" && typeof t.start === "number" && typeof t.end === "number" && t.exact.trim().length > 0
|
|
3081
|
+
);
|
|
3082
|
+
console.log(`[MotivationParsers] Parsed ${valid.length} valid tags from ${parsed.length} total`);
|
|
3083
|
+
return valid;
|
|
3084
|
+
} catch (error) {
|
|
3085
|
+
console.error("[MotivationParsers] Failed to parse AI tag response:", error);
|
|
3086
|
+
return [];
|
|
3087
|
+
}
|
|
1774
3088
|
}
|
|
1775
|
-
|
|
1776
|
-
|
|
3089
|
+
/**
|
|
3090
|
+
* Validate tag offsets against content and add category
|
|
3091
|
+
* Helper for tag detection after initial parsing
|
|
3092
|
+
*
|
|
3093
|
+
* @param tags - Parsed tags without validated offsets
|
|
3094
|
+
* @param content - Original content to validate against
|
|
3095
|
+
* @param category - Category to assign to validated tags
|
|
3096
|
+
* @returns Array of validated tag matches
|
|
3097
|
+
*/
|
|
3098
|
+
static validateTagOffsets(tags, content, category) {
|
|
3099
|
+
const validatedTags = [];
|
|
3100
|
+
for (const tag of tags) {
|
|
3101
|
+
try {
|
|
3102
|
+
const validated = validateAndCorrectOffsets2(content, tag.start, tag.end, tag.exact);
|
|
3103
|
+
validatedTags.push({
|
|
3104
|
+
...tag,
|
|
3105
|
+
category,
|
|
3106
|
+
start: validated.start,
|
|
3107
|
+
end: validated.end,
|
|
3108
|
+
prefix: validated.prefix,
|
|
3109
|
+
suffix: validated.suffix
|
|
3110
|
+
});
|
|
3111
|
+
} catch (error) {
|
|
3112
|
+
console.warn(`[MotivationParsers] Skipping invalid tag for category "${category}":`, error);
|
|
3113
|
+
}
|
|
3114
|
+
}
|
|
3115
|
+
return validatedTags;
|
|
1777
3116
|
}
|
|
1778
|
-
|
|
1779
|
-
|
|
3117
|
+
};
|
|
3118
|
+
|
|
3119
|
+
// src/annotation-detection.ts
|
|
3120
|
+
import { getTagSchema as getTagSchema2, getSchemaCategory } from "@semiont/ontology";
|
|
3121
|
+
var AnnotationDetection = class {
|
|
3122
|
+
/**
|
|
3123
|
+
* Detect comments in a resource
|
|
3124
|
+
*
|
|
3125
|
+
* @param resourceId - The resource to analyze
|
|
3126
|
+
* @param config - Environment configuration
|
|
3127
|
+
* @param client - Inference client for AI operations
|
|
3128
|
+
* @param instructions - Optional user instructions for comment generation
|
|
3129
|
+
* @param tone - Optional tone guidance (e.g., "academic", "conversational")
|
|
3130
|
+
* @param density - Optional target number of comments per 2000 words
|
|
3131
|
+
* @returns Array of validated comment matches
|
|
3132
|
+
*/
|
|
3133
|
+
static async detectComments(resourceId2, config, client, instructions, tone, density) {
|
|
3134
|
+
const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
|
|
3135
|
+
if (!resource) {
|
|
3136
|
+
throw new Error(`Resource ${resourceId2} not found`);
|
|
3137
|
+
}
|
|
3138
|
+
const content = await this.loadResourceContent(resourceId2, config);
|
|
3139
|
+
if (!content) {
|
|
3140
|
+
throw new Error(`Could not load content for resource ${resourceId2}`);
|
|
3141
|
+
}
|
|
3142
|
+
const prompt = MotivationPrompts.buildCommentPrompt(content, instructions, tone, density);
|
|
3143
|
+
const response = await client.generateText(
|
|
3144
|
+
prompt,
|
|
3145
|
+
3e3,
|
|
3146
|
+
// maxTokens: Higher than highlights/assessments due to comment text
|
|
3147
|
+
0.4
|
|
3148
|
+
// temperature: Slightly higher to allow creative context
|
|
3149
|
+
);
|
|
3150
|
+
return MotivationParsers.parseComments(response, content);
|
|
1780
3151
|
}
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
3152
|
+
/**
|
|
3153
|
+
* Detect highlights in a resource
|
|
3154
|
+
*
|
|
3155
|
+
* @param resourceId - The resource to analyze
|
|
3156
|
+
* @param config - Environment configuration
|
|
3157
|
+
* @param client - Inference client for AI operations
|
|
3158
|
+
* @param instructions - Optional user instructions for highlight selection
|
|
3159
|
+
* @param density - Optional target number of highlights per 2000 words
|
|
3160
|
+
* @returns Array of validated highlight matches
|
|
3161
|
+
*/
|
|
3162
|
+
static async detectHighlights(resourceId2, config, client, instructions, density) {
|
|
3163
|
+
const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
|
|
3164
|
+
if (!resource) {
|
|
3165
|
+
throw new Error(`Resource ${resourceId2} not found`);
|
|
1784
3166
|
}
|
|
1785
|
-
await this.
|
|
3167
|
+
const content = await this.loadResourceContent(resourceId2, config);
|
|
3168
|
+
if (!content) {
|
|
3169
|
+
throw new Error(`Could not load content for resource ${resourceId2}`);
|
|
3170
|
+
}
|
|
3171
|
+
const prompt = MotivationPrompts.buildHighlightPrompt(content, instructions, density);
|
|
3172
|
+
const response = await client.generateText(
|
|
3173
|
+
prompt,
|
|
3174
|
+
2e3,
|
|
3175
|
+
// maxTokens: Lower than comments/assessments (no body text)
|
|
3176
|
+
0.3
|
|
3177
|
+
// temperature: Low for consistent importance judgments
|
|
3178
|
+
);
|
|
3179
|
+
return MotivationParsers.parseHighlights(response, content);
|
|
1786
3180
|
}
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
const
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
if (!view) {
|
|
1803
|
-
throw new Error(`Resource ${job.sourceResourceId} not found`);
|
|
3181
|
+
/**
|
|
3182
|
+
* Detect assessments in a resource
|
|
3183
|
+
*
|
|
3184
|
+
* @param resourceId - The resource to analyze
|
|
3185
|
+
* @param config - Environment configuration
|
|
3186
|
+
* @param client - Inference client for AI operations
|
|
3187
|
+
* @param instructions - Optional user instructions for assessment generation
|
|
3188
|
+
* @param tone - Optional tone guidance (e.g., "critical", "supportive")
|
|
3189
|
+
* @param density - Optional target number of assessments per 2000 words
|
|
3190
|
+
* @returns Array of validated assessment matches
|
|
3191
|
+
*/
|
|
3192
|
+
static async detectAssessments(resourceId2, config, client, instructions, tone, density) {
|
|
3193
|
+
const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
|
|
3194
|
+
if (!resource) {
|
|
3195
|
+
throw new Error(`Resource ${resourceId2} not found`);
|
|
1804
3196
|
}
|
|
1805
|
-
const
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
3197
|
+
const content = await this.loadResourceContent(resourceId2, config);
|
|
3198
|
+
if (!content) {
|
|
3199
|
+
throw new Error(`Could not load content for resource ${resourceId2}`);
|
|
3200
|
+
}
|
|
3201
|
+
const prompt = MotivationPrompts.buildAssessmentPrompt(content, instructions, tone, density);
|
|
3202
|
+
const response = await client.generateText(
|
|
3203
|
+
prompt,
|
|
3204
|
+
3e3,
|
|
3205
|
+
// maxTokens: Higher for assessment text
|
|
3206
|
+
0.3
|
|
3207
|
+
// temperature: Lower for analytical consistency
|
|
1809
3208
|
);
|
|
1810
|
-
|
|
1811
|
-
|
|
3209
|
+
return MotivationParsers.parseAssessments(response, content);
|
|
3210
|
+
}
|
|
3211
|
+
/**
|
|
3212
|
+
* Detect tags in a resource for a specific category
|
|
3213
|
+
*
|
|
3214
|
+
* @param resourceId - The resource to analyze
|
|
3215
|
+
* @param config - Environment configuration
|
|
3216
|
+
* @param client - Inference client for AI operations
|
|
3217
|
+
* @param schemaId - The tag schema identifier (e.g., "irac", "imrad")
|
|
3218
|
+
* @param category - The specific category to detect
|
|
3219
|
+
* @returns Array of validated tag matches
|
|
3220
|
+
*/
|
|
3221
|
+
static async detectTags(resourceId2, config, client, schemaId, category) {
|
|
3222
|
+
const schema = getTagSchema2(schemaId);
|
|
3223
|
+
if (!schema) {
|
|
3224
|
+
throw new Error(`Invalid tag schema: ${schemaId}`);
|
|
1812
3225
|
}
|
|
1813
|
-
const
|
|
1814
|
-
if (!
|
|
1815
|
-
throw new Error(`
|
|
3226
|
+
const categoryInfo = getSchemaCategory(schemaId, category);
|
|
3227
|
+
if (!categoryInfo) {
|
|
3228
|
+
throw new Error(`Invalid category "${category}" for schema ${schemaId}`);
|
|
1816
3229
|
}
|
|
1817
|
-
const
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
if (!job.context) {
|
|
1821
|
-
throw new Error("Generation context is required but was not provided in job");
|
|
3230
|
+
const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
|
|
3231
|
+
if (!resource) {
|
|
3232
|
+
throw new Error(`Resource ${resourceId2} not found`);
|
|
1822
3233
|
}
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
3234
|
+
const content = await this.loadResourceContent(resourceId2, config);
|
|
3235
|
+
if (!content) {
|
|
3236
|
+
throw new Error(`Could not load content for resource ${resourceId2}`);
|
|
3237
|
+
}
|
|
3238
|
+
const prompt = MotivationPrompts.buildTagPrompt(
|
|
3239
|
+
content,
|
|
3240
|
+
category,
|
|
3241
|
+
schema.name,
|
|
3242
|
+
schema.description,
|
|
3243
|
+
schema.domain,
|
|
3244
|
+
categoryInfo.description,
|
|
3245
|
+
categoryInfo.examples
|
|
3246
|
+
);
|
|
3247
|
+
const response = await client.generateText(
|
|
1837
3248
|
prompt,
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
// NEW - from job
|
|
1843
|
-
job.maxTokens
|
|
1844
|
-
// NEW - from job
|
|
3249
|
+
4e3,
|
|
3250
|
+
// maxTokens: Higher for full document analysis
|
|
3251
|
+
0.2
|
|
3252
|
+
// temperature: Lower for structural consistency
|
|
1845
3253
|
);
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
stage: "generating",
|
|
1849
|
-
percentage: 70,
|
|
1850
|
-
message: "Content ready, creating resource..."
|
|
1851
|
-
};
|
|
1852
|
-
await this.updateJobProgress(job);
|
|
1853
|
-
const rId = resourceId(generateUuid());
|
|
1854
|
-
job.progress = {
|
|
1855
|
-
stage: "creating",
|
|
1856
|
-
percentage: 85,
|
|
1857
|
-
message: "Saving resource..."
|
|
1858
|
-
};
|
|
1859
|
-
console.log(`[GenerationWorker] \u{1F4BE} ${job.progress.message}`);
|
|
1860
|
-
await this.updateJobProgress(job);
|
|
1861
|
-
const storedRep = await repStore.store(Buffer.from(generatedContent.content), {
|
|
1862
|
-
mediaType: "text/markdown",
|
|
1863
|
-
rel: "original"
|
|
1864
|
-
});
|
|
1865
|
-
console.log(`[GenerationWorker] \u2705 Saved resource representation to filesystem: ${rId}`);
|
|
1866
|
-
await this.eventStore.appendEvent({
|
|
1867
|
-
type: "resource.created",
|
|
1868
|
-
resourceId: rId,
|
|
1869
|
-
userId: job.userId,
|
|
1870
|
-
version: 1,
|
|
1871
|
-
payload: {
|
|
1872
|
-
name: resourceName,
|
|
1873
|
-
format: "text/markdown",
|
|
1874
|
-
contentChecksum: storedRep.checksum,
|
|
1875
|
-
creationMethod: CREATION_METHODS.GENERATED,
|
|
1876
|
-
entityTypes: job.entityTypes || annotationEntityTypes,
|
|
1877
|
-
language: job.language,
|
|
1878
|
-
isDraft: true,
|
|
1879
|
-
generatedFrom: job.referenceId,
|
|
1880
|
-
generationPrompt: void 0
|
|
1881
|
-
// Could be added if we track the prompt
|
|
1882
|
-
}
|
|
1883
|
-
});
|
|
1884
|
-
console.log(`[GenerationWorker] Emitted resource.created event for ${rId}`);
|
|
1885
|
-
job.progress = {
|
|
1886
|
-
stage: "linking",
|
|
1887
|
-
percentage: 95,
|
|
1888
|
-
message: "Linking reference..."
|
|
1889
|
-
};
|
|
1890
|
-
console.log(`[GenerationWorker] \u{1F517} ${job.progress.message}`);
|
|
1891
|
-
await this.updateJobProgress(job);
|
|
1892
|
-
const newResourceUri = resourceUri(`${this.config.services.backend.publicURL}/resources/${rId}`);
|
|
1893
|
-
const operations = [{
|
|
1894
|
-
op: "add",
|
|
1895
|
-
item: {
|
|
1896
|
-
type: "SpecificResource",
|
|
1897
|
-
source: newResourceUri,
|
|
1898
|
-
purpose: "linking"
|
|
1899
|
-
}
|
|
1900
|
-
}];
|
|
1901
|
-
const annotationIdSegment = job.referenceId.split("/").pop();
|
|
1902
|
-
await this.eventStore.appendEvent({
|
|
1903
|
-
type: "annotation.body.updated",
|
|
1904
|
-
resourceId: job.sourceResourceId,
|
|
1905
|
-
userId: job.userId,
|
|
1906
|
-
version: 1,
|
|
1907
|
-
payload: {
|
|
1908
|
-
annotationId: annotationId(annotationIdSegment),
|
|
1909
|
-
operations
|
|
1910
|
-
}
|
|
1911
|
-
});
|
|
1912
|
-
console.log(`[GenerationWorker] \u2705 Emitted annotation.body.updated event linking ${job.referenceId} \u2192 ${rId}`);
|
|
1913
|
-
job.result = {
|
|
1914
|
-
resourceId: rId,
|
|
1915
|
-
resourceName
|
|
1916
|
-
};
|
|
1917
|
-
job.progress = {
|
|
1918
|
-
stage: "linking",
|
|
1919
|
-
percentage: 100,
|
|
1920
|
-
message: "Complete!"
|
|
1921
|
-
};
|
|
1922
|
-
await this.updateJobProgress(job);
|
|
1923
|
-
console.log(`[GenerationWorker] \u2705 Generation complete: created resource ${rId}`);
|
|
3254
|
+
const parsedTags = MotivationParsers.parseTags(response);
|
|
3255
|
+
return MotivationParsers.validateTagOffsets(parsedTags, content, category);
|
|
1924
3256
|
}
|
|
1925
3257
|
/**
|
|
1926
|
-
*
|
|
1927
|
-
*
|
|
3258
|
+
* Load resource content from representation store
|
|
3259
|
+
* Helper method used by all detection methods
|
|
3260
|
+
*
|
|
3261
|
+
* @param resourceId - The resource ID to load
|
|
3262
|
+
* @param config - Environment configuration
|
|
3263
|
+
* @returns Resource content as string, or null if not available
|
|
1928
3264
|
*/
|
|
1929
|
-
async
|
|
1930
|
-
await
|
|
1931
|
-
if (
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
const
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
userId: genJob.userId,
|
|
1938
|
-
version: 1
|
|
1939
|
-
};
|
|
1940
|
-
if (genJob.progress?.stage === "fetching" && genJob.progress?.percentage === 20) {
|
|
1941
|
-
await this.eventStore.appendEvent({
|
|
1942
|
-
type: "job.started",
|
|
1943
|
-
...baseEvent,
|
|
1944
|
-
payload: {
|
|
1945
|
-
jobId: genJob.id,
|
|
1946
|
-
jobType: genJob.type,
|
|
1947
|
-
totalSteps: 5
|
|
1948
|
-
// fetching, generating, creating, linking, complete
|
|
1949
|
-
}
|
|
1950
|
-
});
|
|
1951
|
-
} else if (genJob.progress?.stage === "linking" && genJob.progress?.percentage === 100) {
|
|
1952
|
-
await this.eventStore.appendEvent({
|
|
1953
|
-
type: "job.completed",
|
|
1954
|
-
...baseEvent,
|
|
1955
|
-
payload: {
|
|
1956
|
-
jobId: genJob.id,
|
|
1957
|
-
jobType: genJob.type,
|
|
1958
|
-
resultResourceId: genJob.result?.resourceId,
|
|
1959
|
-
annotationUri: annotationUri(`${this.config.services.backend.publicURL}/annotations/${genJob.referenceId}`)
|
|
1960
|
-
}
|
|
1961
|
-
});
|
|
1962
|
-
} else if (genJob.progress) {
|
|
1963
|
-
await this.eventStore.appendEvent({
|
|
1964
|
-
type: "job.progress",
|
|
1965
|
-
...baseEvent,
|
|
1966
|
-
payload: {
|
|
1967
|
-
jobId: genJob.id,
|
|
1968
|
-
jobType: genJob.type,
|
|
1969
|
-
currentStep: genJob.progress.stage,
|
|
1970
|
-
percentage: genJob.progress.percentage,
|
|
1971
|
-
message: genJob.progress.message
|
|
1972
|
-
}
|
|
1973
|
-
});
|
|
3265
|
+
static async loadResourceContent(resourceId2, config) {
|
|
3266
|
+
const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
|
|
3267
|
+
if (!resource) return null;
|
|
3268
|
+
const primaryRep = getPrimaryRepresentation4(resource);
|
|
3269
|
+
if (!primaryRep) return null;
|
|
3270
|
+
const baseMediaType = primaryRep.mediaType?.split(";")[0]?.trim() || "";
|
|
3271
|
+
if (baseMediaType !== "text/plain" && baseMediaType !== "text/markdown") {
|
|
3272
|
+
return null;
|
|
1974
3273
|
}
|
|
3274
|
+
if (!primaryRep.checksum || !primaryRep.mediaType) return null;
|
|
3275
|
+
const basePath = config.services.filesystem.path;
|
|
3276
|
+
const projectRoot = config._metadata?.projectRoot;
|
|
3277
|
+
const repStore = new FilesystemRepresentationStore6({ basePath }, projectRoot);
|
|
3278
|
+
const contentBuffer = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
|
|
3279
|
+
return decodeRepresentation4(contentBuffer, primaryRep.mediaType);
|
|
1975
3280
|
}
|
|
1976
3281
|
};
|
|
1977
3282
|
|
|
@@ -1985,11 +3290,19 @@ export {
|
|
|
1985
3290
|
CommentDetectionWorker,
|
|
1986
3291
|
GenerationWorker,
|
|
1987
3292
|
GraphContext,
|
|
3293
|
+
GraphDBConsumer,
|
|
1988
3294
|
HighlightDetectionWorker,
|
|
3295
|
+
MotivationParsers,
|
|
3296
|
+
MotivationPrompts,
|
|
1989
3297
|
PACKAGE_NAME,
|
|
1990
3298
|
ReferenceDetectionWorker,
|
|
1991
3299
|
ResourceContext,
|
|
1992
3300
|
TagDetectionWorker,
|
|
1993
|
-
VERSION
|
|
3301
|
+
VERSION,
|
|
3302
|
+
extractEntities,
|
|
3303
|
+
generateReferenceSuggestions,
|
|
3304
|
+
generateResourceFromTopic,
|
|
3305
|
+
generateResourceSummary,
|
|
3306
|
+
startMakeMeaning
|
|
1994
3307
|
};
|
|
1995
3308
|
//# sourceMappingURL=index.js.map
|