@semiont/make-meaning 0.2.30 → 0.2.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +177 -570
- package/dist/index.d.ts +489 -9
- package/dist/index.js +2702 -109
- package/dist/index.js.map +1 -1
- package/package.json +6 -3
package/dist/index.js
CHANGED
|
@@ -1,16 +1,2111 @@
|
|
|
1
|
+
// src/service.ts
|
|
2
|
+
import * as path from "path";
|
|
3
|
+
import { JobQueue } from "@semiont/jobs";
|
|
4
|
+
import { createEventStore as createEventStoreCore } from "@semiont/event-sourcing";
|
|
5
|
+
import { FilesystemRepresentationStore as FilesystemRepresentationStore3 } from "@semiont/content";
|
|
6
|
+
import { resourceId as makeResourceId2 } from "@semiont/core";
|
|
7
|
+
import { getInferenceClient } from "@semiont/inference";
|
|
8
|
+
import { getGraphDatabase } from "@semiont/graph";
|
|
9
|
+
|
|
10
|
+
// src/jobs/reference-detection-worker.ts
|
|
11
|
+
import { JobWorker } from "@semiont/jobs";
|
|
12
|
+
import { generateAnnotationId } from "@semiont/event-sourcing";
|
|
13
|
+
import { resourceIdToURI } from "@semiont/core";
|
|
14
|
+
import {
|
|
15
|
+
getPrimaryRepresentation,
|
|
16
|
+
decodeRepresentation,
|
|
17
|
+
validateAndCorrectOffsets
|
|
18
|
+
} from "@semiont/api-client";
|
|
19
|
+
|
|
20
|
+
// src/detection/entity-extractor.ts
|
|
21
|
+
async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences = false) {
|
|
22
|
+
console.log("extractEntities called with:", {
|
|
23
|
+
textLength: exact.length,
|
|
24
|
+
entityTypes: Array.isArray(entityTypes) ? entityTypes.map((et) => typeof et === "string" ? et : et.type) : []
|
|
25
|
+
});
|
|
26
|
+
const entityTypesDescription = entityTypes.map((et) => {
|
|
27
|
+
if (typeof et === "string") {
|
|
28
|
+
return et;
|
|
29
|
+
}
|
|
30
|
+
return et.examples && et.examples.length > 0 ? `${et.type} (examples: ${et.examples.slice(0, 3).join(", ")})` : et.type;
|
|
31
|
+
}).join(", ");
|
|
32
|
+
const descriptiveReferenceGuidance = includeDescriptiveReferences ? `
|
|
33
|
+
Include both:
|
|
34
|
+
- Direct mentions (names, proper nouns)
|
|
35
|
+
- Descriptive references (substantive phrases that refer to entities)
|
|
36
|
+
|
|
37
|
+
For descriptive references, include:
|
|
38
|
+
- Definite descriptions: "the Nobel laureate", "the tech giant", "the former president"
|
|
39
|
+
- Role-based references: "the CEO", "the physicist", "the author", "the owner", "the contractor"
|
|
40
|
+
- Epithets with context: "the Cupertino-based company", "the iPhone maker"
|
|
41
|
+
- References to entities even when identity is unknown or unspecified
|
|
42
|
+
|
|
43
|
+
Do NOT include:
|
|
44
|
+
- Simple pronouns alone: he, she, it, they, him, her, them
|
|
45
|
+
- Generic determiners alone: this, that, these, those
|
|
46
|
+
- Possessives without substance: his, her, their, its
|
|
47
|
+
|
|
48
|
+
Examples:
|
|
49
|
+
- For "Marie Curie", include "the Nobel laureate" and "the physicist" but NOT "she"
|
|
50
|
+
- For an unknown person, include "the owner" or "the contractor" (role-based references count even when identity is unspecified)
|
|
51
|
+
` : `
|
|
52
|
+
Find direct mentions only (names, proper nouns). Do not include pronouns or descriptive references.
|
|
53
|
+
`;
|
|
54
|
+
const prompt = `Identify entity references in the following text. Look for mentions of: ${entityTypesDescription}.
|
|
55
|
+
${descriptiveReferenceGuidance}
|
|
56
|
+
Text to analyze:
|
|
57
|
+
"""
|
|
58
|
+
${exact}
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
Return ONLY a JSON array of entities found. Each entity should have:
|
|
62
|
+
- exact: the exact text span from the input
|
|
63
|
+
- entityType: one of the provided entity types
|
|
64
|
+
- startOffset: character position where the entity starts (0-indexed)
|
|
65
|
+
- endOffset: character position where the entity ends
|
|
66
|
+
- prefix: up to 32 characters of text immediately before the entity (helps identify correct occurrence)
|
|
67
|
+
- suffix: up to 32 characters of text immediately after the entity (helps identify correct occurrence)
|
|
68
|
+
|
|
69
|
+
Return empty array [] if no entities found.
|
|
70
|
+
Do not include markdown formatting or code fences, just the raw JSON array.
|
|
71
|
+
|
|
72
|
+
Example output:
|
|
73
|
+
[{"exact":"Alice","entityType":"Person","startOffset":0,"endOffset":5,"prefix":"","suffix":" went to"},{"exact":"Paris","entityType":"Location","startOffset":20,"endOffset":25,"prefix":"went to ","suffix":" yesterday"}]`;
|
|
74
|
+
console.log("Sending entity extraction request");
|
|
75
|
+
const response = await client.generateTextWithMetadata(
|
|
76
|
+
prompt,
|
|
77
|
+
4e3,
|
|
78
|
+
// Increased to handle many entities without truncation
|
|
79
|
+
0.3
|
|
80
|
+
// Lower temperature for more consistent extraction
|
|
81
|
+
);
|
|
82
|
+
console.log("Got entity extraction response");
|
|
83
|
+
console.log("Entity extraction raw response length:", response.text.length);
|
|
84
|
+
try {
|
|
85
|
+
let jsonStr = response.text.trim();
|
|
86
|
+
if (jsonStr.startsWith("```")) {
|
|
87
|
+
jsonStr = jsonStr.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
88
|
+
}
|
|
89
|
+
const entities = JSON.parse(jsonStr);
|
|
90
|
+
console.log("Parsed", entities.length, "entities from response");
|
|
91
|
+
if (response.stopReason === "max_tokens") {
|
|
92
|
+
const errorMsg = `AI response truncated: Found ${entities.length} entities but response hit max_tokens limit. Increase max_tokens or reduce resource size.`;
|
|
93
|
+
console.error(`\u274C ${errorMsg}`);
|
|
94
|
+
throw new Error(errorMsg);
|
|
95
|
+
}
|
|
96
|
+
return entities.map((entity, idx) => {
|
|
97
|
+
let startOffset = entity.startOffset;
|
|
98
|
+
let endOffset = entity.endOffset;
|
|
99
|
+
console.log(`
|
|
100
|
+
[Entity ${idx + 1}/${entities.length}]`);
|
|
101
|
+
console.log(` Type: ${entity.entityType}`);
|
|
102
|
+
console.log(` Text: "${entity.exact}"`);
|
|
103
|
+
console.log(` Offsets from AI: [${startOffset}, ${endOffset}]`);
|
|
104
|
+
const extractedText = exact.substring(startOffset, endOffset);
|
|
105
|
+
if (extractedText !== entity.exact) {
|
|
106
|
+
console.log(` \u26A0\uFE0F Offset mismatch!`);
|
|
107
|
+
console.log(` Expected: "${entity.exact}"`);
|
|
108
|
+
console.log(` Found at AI offsets [${startOffset}:${endOffset}]: "${extractedText}"`);
|
|
109
|
+
const contextStart = Math.max(0, startOffset - 50);
|
|
110
|
+
const contextEnd = Math.min(exact.length, endOffset + 50);
|
|
111
|
+
const contextBefore = exact.substring(contextStart, startOffset);
|
|
112
|
+
const contextAfter = exact.substring(endOffset, contextEnd);
|
|
113
|
+
console.log(` Context: "...${contextBefore}[${extractedText}]${contextAfter}..."`);
|
|
114
|
+
console.log(` Searching for exact match in resource...`);
|
|
115
|
+
let found = false;
|
|
116
|
+
if (entity.prefix || entity.suffix) {
|
|
117
|
+
console.log(` Using LLM-provided context for disambiguation:`);
|
|
118
|
+
if (entity.prefix) console.log(` Prefix: "${entity.prefix}"`);
|
|
119
|
+
if (entity.suffix) console.log(` Suffix: "${entity.suffix}"`);
|
|
120
|
+
let searchPos = 0;
|
|
121
|
+
while ((searchPos = exact.indexOf(entity.exact, searchPos)) !== -1) {
|
|
122
|
+
const candidatePrefix = exact.substring(Math.max(0, searchPos - 32), searchPos);
|
|
123
|
+
const candidateSuffix = exact.substring(
|
|
124
|
+
searchPos + entity.exact.length,
|
|
125
|
+
Math.min(exact.length, searchPos + entity.exact.length + 32)
|
|
126
|
+
);
|
|
127
|
+
const prefixMatch = !entity.prefix || candidatePrefix.endsWith(entity.prefix);
|
|
128
|
+
const suffixMatch = !entity.suffix || candidateSuffix.startsWith(entity.suffix);
|
|
129
|
+
if (prefixMatch && suffixMatch) {
|
|
130
|
+
console.log(` \u2705 Found match using context at offset ${searchPos} (diff: ${searchPos - startOffset})`);
|
|
131
|
+
console.log(` Candidate prefix: "${candidatePrefix}"`);
|
|
132
|
+
console.log(` Candidate suffix: "${candidateSuffix}"`);
|
|
133
|
+
startOffset = searchPos;
|
|
134
|
+
endOffset = searchPos + entity.exact.length;
|
|
135
|
+
found = true;
|
|
136
|
+
break;
|
|
137
|
+
}
|
|
138
|
+
searchPos++;
|
|
139
|
+
}
|
|
140
|
+
if (!found) {
|
|
141
|
+
console.log(` \u26A0\uFE0F No occurrence found with matching context`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
if (!found) {
|
|
145
|
+
const index = exact.indexOf(entity.exact);
|
|
146
|
+
if (index !== -1) {
|
|
147
|
+
console.log(` \u26A0\uFE0F Using first occurrence at offset ${index} (diff: ${index - startOffset})`);
|
|
148
|
+
startOffset = index;
|
|
149
|
+
endOffset = index + entity.exact.length;
|
|
150
|
+
} else {
|
|
151
|
+
console.log(` \u274C Cannot find "${entity.exact}" anywhere in resource`);
|
|
152
|
+
console.log(` Resource starts with: "${exact.substring(0, 200)}..."`);
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
} else {
|
|
157
|
+
console.log(` \u2705 Offsets correct`);
|
|
158
|
+
}
|
|
159
|
+
return {
|
|
160
|
+
exact: entity.exact,
|
|
161
|
+
entityType: entity.entityType,
|
|
162
|
+
startOffset,
|
|
163
|
+
endOffset,
|
|
164
|
+
prefix: entity.prefix,
|
|
165
|
+
suffix: entity.suffix
|
|
166
|
+
};
|
|
167
|
+
}).filter((entity) => {
|
|
168
|
+
if (entity === null) {
|
|
169
|
+
console.log("\u274C Filtered entity: null");
|
|
170
|
+
return false;
|
|
171
|
+
}
|
|
172
|
+
if (entity.startOffset === void 0 || entity.endOffset === void 0) {
|
|
173
|
+
console.log(`\u274C Filtered entity "${entity.exact}": missing offsets`);
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
if (entity.startOffset < 0) {
|
|
177
|
+
console.log(`\u274C Filtered entity "${entity.exact}": negative startOffset (${entity.startOffset})`);
|
|
178
|
+
return false;
|
|
179
|
+
}
|
|
180
|
+
if (entity.endOffset > exact.length) {
|
|
181
|
+
console.log(`\u274C Filtered entity "${entity.exact}": endOffset (${entity.endOffset}) > text length (${exact.length})`);
|
|
182
|
+
return false;
|
|
183
|
+
}
|
|
184
|
+
const extractedText = exact.substring(entity.startOffset, entity.endOffset);
|
|
185
|
+
if (extractedText !== entity.exact) {
|
|
186
|
+
console.log(`\u274C Filtered entity "${entity.exact}": offset mismatch`);
|
|
187
|
+
console.log(` Expected: "${entity.exact}"`);
|
|
188
|
+
console.log(` Got at [${entity.startOffset}:${entity.endOffset}]: "${extractedText}"`);
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
191
|
+
console.log(`\u2705 Accepted entity "${entity.exact}" at [${entity.startOffset}:${entity.endOffset}]`);
|
|
192
|
+
return true;
|
|
193
|
+
});
|
|
194
|
+
} catch (error) {
|
|
195
|
+
console.error("Failed to parse entity extraction response:", error);
|
|
196
|
+
return [];
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// src/jobs/reference-detection-worker.ts
|
|
201
|
+
import { FilesystemRepresentationStore } from "@semiont/content";
|
|
202
|
+
var ReferenceDetectionWorker = class extends JobWorker {
|
|
203
|
+
constructor(jobQueue, config, eventStore, inferenceClient) {
|
|
204
|
+
super(jobQueue);
|
|
205
|
+
this.config = config;
|
|
206
|
+
this.eventStore = eventStore;
|
|
207
|
+
this.inferenceClient = inferenceClient;
|
|
208
|
+
}
|
|
209
|
+
getWorkerName() {
|
|
210
|
+
return "ReferenceDetectionWorker";
|
|
211
|
+
}
|
|
212
|
+
canProcessJob(job) {
|
|
213
|
+
return job.metadata.type === "detection";
|
|
214
|
+
}
|
|
215
|
+
async executeJob(job) {
|
|
216
|
+
if (job.metadata.type !== "detection") {
|
|
217
|
+
throw new Error(`Invalid job type: ${job.metadata.type}`);
|
|
218
|
+
}
|
|
219
|
+
if (job.status !== "running") {
|
|
220
|
+
throw new Error(`Job must be in running state to execute, got: ${job.status}`);
|
|
221
|
+
}
|
|
222
|
+
await this.processDetectionJob(job);
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Detect entity references in resource using AI
|
|
226
|
+
* Self-contained implementation for reference detection
|
|
227
|
+
*
|
|
228
|
+
* Public for testing charset handling - see entity-detection-charset.test.ts
|
|
229
|
+
*/
|
|
230
|
+
async detectReferences(resource, entityTypes, includeDescriptiveReferences = false) {
|
|
231
|
+
console.log(`Detecting entities of types: ${entityTypes.join(", ")}${includeDescriptiveReferences ? " (including descriptive references)" : ""}`);
|
|
232
|
+
const detectedAnnotations = [];
|
|
233
|
+
const primaryRep = getPrimaryRepresentation(resource);
|
|
234
|
+
if (!primaryRep) return detectedAnnotations;
|
|
235
|
+
const mediaType = primaryRep.mediaType;
|
|
236
|
+
const baseMediaType = mediaType?.split(";")[0]?.trim() || "";
|
|
237
|
+
if (baseMediaType === "text/plain" || baseMediaType === "text/markdown") {
|
|
238
|
+
if (!primaryRep.checksum || !primaryRep.mediaType) return detectedAnnotations;
|
|
239
|
+
const basePath = this.config.services.filesystem.path;
|
|
240
|
+
const projectRoot = this.config._metadata?.projectRoot;
|
|
241
|
+
const repStore = new FilesystemRepresentationStore({ basePath }, projectRoot);
|
|
242
|
+
const contentBuffer = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
|
|
243
|
+
const content = decodeRepresentation(contentBuffer, primaryRep.mediaType);
|
|
244
|
+
const extractedEntities = await extractEntities(content, entityTypes, this.inferenceClient, includeDescriptiveReferences);
|
|
245
|
+
for (const entity of extractedEntities) {
|
|
246
|
+
try {
|
|
247
|
+
const validated = validateAndCorrectOffsets(
|
|
248
|
+
content,
|
|
249
|
+
entity.startOffset,
|
|
250
|
+
entity.endOffset,
|
|
251
|
+
entity.exact
|
|
252
|
+
);
|
|
253
|
+
const annotation = {
|
|
254
|
+
annotation: {
|
|
255
|
+
selector: {
|
|
256
|
+
start: validated.start,
|
|
257
|
+
end: validated.end,
|
|
258
|
+
exact: validated.exact,
|
|
259
|
+
prefix: validated.prefix,
|
|
260
|
+
suffix: validated.suffix
|
|
261
|
+
},
|
|
262
|
+
entityTypes: [entity.entityType]
|
|
263
|
+
}
|
|
264
|
+
};
|
|
265
|
+
detectedAnnotations.push(annotation);
|
|
266
|
+
} catch (error) {
|
|
267
|
+
console.warn(`[ReferenceDetectionWorker] Skipping invalid entity "${entity.exact}":`, error);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
return detectedAnnotations;
|
|
272
|
+
}
|
|
273
|
+
async processDetectionJob(job) {
|
|
274
|
+
console.log(`[ReferenceDetectionWorker] Processing detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
|
|
275
|
+
console.log(`[ReferenceDetectionWorker] \u{1F50D} Entity types: ${job.params.entityTypes.join(", ")}`);
|
|
276
|
+
const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
|
|
277
|
+
if (!resource) {
|
|
278
|
+
throw new Error(`Resource ${job.params.resourceId} not found`);
|
|
279
|
+
}
|
|
280
|
+
let totalFound = 0;
|
|
281
|
+
let totalEmitted = 0;
|
|
282
|
+
let totalErrors = 0;
|
|
283
|
+
let updatedJob = {
|
|
284
|
+
...job,
|
|
285
|
+
progress: {
|
|
286
|
+
totalEntityTypes: job.params.entityTypes.length,
|
|
287
|
+
processedEntityTypes: 0,
|
|
288
|
+
entitiesFound: 0,
|
|
289
|
+
entitiesEmitted: 0
|
|
290
|
+
}
|
|
291
|
+
};
|
|
292
|
+
await this.updateJobProgress(updatedJob);
|
|
293
|
+
for (let i = 0; i < job.params.entityTypes.length; i++) {
|
|
294
|
+
const entityType = job.params.entityTypes[i];
|
|
295
|
+
if (!entityType) continue;
|
|
296
|
+
console.log(`[ReferenceDetectionWorker] \u{1F916} [${i + 1}/${job.params.entityTypes.length}] Detecting ${entityType}...`);
|
|
297
|
+
const detectedAnnotations = await this.detectReferences(resource, [entityType], job.params.includeDescriptiveReferences);
|
|
298
|
+
totalFound += detectedAnnotations.length;
|
|
299
|
+
console.log(`[ReferenceDetectionWorker] \u2705 Found ${detectedAnnotations.length} ${entityType} entities`);
|
|
300
|
+
for (let idx = 0; idx < detectedAnnotations.length; idx++) {
|
|
301
|
+
const detected = detectedAnnotations[idx];
|
|
302
|
+
if (!detected) {
|
|
303
|
+
console.warn(`[ReferenceDetectionWorker] Skipping undefined entity at index ${idx}`);
|
|
304
|
+
continue;
|
|
305
|
+
}
|
|
306
|
+
let referenceId;
|
|
307
|
+
try {
|
|
308
|
+
const backendUrl = this.config.services.backend?.publicURL;
|
|
309
|
+
if (!backendUrl) {
|
|
310
|
+
throw new Error("Backend publicURL not configured");
|
|
311
|
+
}
|
|
312
|
+
referenceId = generateAnnotationId(backendUrl);
|
|
313
|
+
} catch (error) {
|
|
314
|
+
console.error(`[ReferenceDetectionWorker] Failed to generate annotation ID:`, error);
|
|
315
|
+
throw new Error("Configuration error: Backend publicURL not set");
|
|
316
|
+
}
|
|
317
|
+
try {
|
|
318
|
+
await this.eventStore.appendEvent({
|
|
319
|
+
type: "annotation.added",
|
|
320
|
+
resourceId: job.params.resourceId,
|
|
321
|
+
userId: job.metadata.userId,
|
|
322
|
+
version: 1,
|
|
323
|
+
payload: {
|
|
324
|
+
annotation: {
|
|
325
|
+
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
326
|
+
"type": "Annotation",
|
|
327
|
+
id: referenceId,
|
|
328
|
+
motivation: "linking",
|
|
329
|
+
target: {
|
|
330
|
+
source: resourceIdToURI(job.params.resourceId, this.config.services.backend.publicURL),
|
|
331
|
+
// Convert to full URI
|
|
332
|
+
selector: [
|
|
333
|
+
{
|
|
334
|
+
type: "TextPositionSelector",
|
|
335
|
+
start: detected.annotation.selector.start,
|
|
336
|
+
end: detected.annotation.selector.end
|
|
337
|
+
},
|
|
338
|
+
{
|
|
339
|
+
type: "TextQuoteSelector",
|
|
340
|
+
exact: detected.annotation.selector.exact,
|
|
341
|
+
...detected.annotation.selector.prefix && { prefix: detected.annotation.selector.prefix },
|
|
342
|
+
...detected.annotation.selector.suffix && { suffix: detected.annotation.selector.suffix }
|
|
343
|
+
}
|
|
344
|
+
]
|
|
345
|
+
},
|
|
346
|
+
body: (detected.annotation.entityTypes || []).map((et) => ({
|
|
347
|
+
type: "TextualBody",
|
|
348
|
+
value: et,
|
|
349
|
+
purpose: "tagging"
|
|
350
|
+
})),
|
|
351
|
+
modified: (/* @__PURE__ */ new Date()).toISOString()
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
});
|
|
355
|
+
totalEmitted++;
|
|
356
|
+
if ((idx + 1) % 10 === 0 || idx === detectedAnnotations.length - 1) {
|
|
357
|
+
console.log(`[ReferenceDetectionWorker] \u{1F4E4} Emitted ${idx + 1}/${detectedAnnotations.length} events for ${entityType}`);
|
|
358
|
+
}
|
|
359
|
+
} catch (error) {
|
|
360
|
+
totalErrors++;
|
|
361
|
+
console.error(`[ReferenceDetectionWorker] \u274C Failed to emit event for ${referenceId}:`, error);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
console.log(`[ReferenceDetectionWorker] \u2705 Completed ${entityType}: ${detectedAnnotations.length} found, ${detectedAnnotations.length - (totalErrors - (totalFound - totalEmitted))} emitted`);
|
|
365
|
+
updatedJob = {
|
|
366
|
+
...updatedJob,
|
|
367
|
+
progress: {
|
|
368
|
+
totalEntityTypes: job.params.entityTypes.length,
|
|
369
|
+
processedEntityTypes: i + 1,
|
|
370
|
+
currentEntityType: entityType,
|
|
371
|
+
entitiesFound: totalFound,
|
|
372
|
+
entitiesEmitted: totalEmitted
|
|
373
|
+
}
|
|
374
|
+
};
|
|
375
|
+
await this.updateJobProgress(updatedJob);
|
|
376
|
+
}
|
|
377
|
+
console.log(`[ReferenceDetectionWorker] \u2705 Detection complete: ${totalFound} entities found, ${totalEmitted} events emitted, ${totalErrors} errors`);
|
|
378
|
+
}
|
|
379
|
+
async handleJobFailure(job, error) {
|
|
380
|
+
await super.handleJobFailure(job, error);
|
|
381
|
+
if (job.status === "failed" && job.metadata.type === "detection") {
|
|
382
|
+
const detJob = job;
|
|
383
|
+
await this.eventStore.appendEvent({
|
|
384
|
+
type: "job.failed",
|
|
385
|
+
resourceId: detJob.params.resourceId,
|
|
386
|
+
userId: detJob.metadata.userId,
|
|
387
|
+
version: 1,
|
|
388
|
+
payload: {
|
|
389
|
+
jobId: detJob.metadata.id,
|
|
390
|
+
jobType: detJob.metadata.type,
|
|
391
|
+
error: "Entity detection failed. Please try again later."
|
|
392
|
+
}
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
/**
|
|
397
|
+
* Update job progress and emit events to Event Store
|
|
398
|
+
* Overrides base class to also emit job progress events
|
|
399
|
+
*/
|
|
400
|
+
async updateJobProgress(job) {
|
|
401
|
+
await super.updateJobProgress(job);
|
|
402
|
+
if (job.metadata.type !== "detection") {
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
405
|
+
if (job.status !== "running") {
|
|
406
|
+
return;
|
|
407
|
+
}
|
|
408
|
+
const detJob = job;
|
|
409
|
+
const baseEvent = {
|
|
410
|
+
resourceId: detJob.params.resourceId,
|
|
411
|
+
userId: detJob.metadata.userId,
|
|
412
|
+
version: 1
|
|
413
|
+
};
|
|
414
|
+
const isFirstUpdate = detJob.progress.processedEntityTypes === 0;
|
|
415
|
+
const isFinalUpdate = detJob.progress.processedEntityTypes === detJob.progress.totalEntityTypes && detJob.progress.totalEntityTypes > 0;
|
|
416
|
+
if (isFirstUpdate) {
|
|
417
|
+
await this.eventStore.appendEvent({
|
|
418
|
+
type: "job.started",
|
|
419
|
+
...baseEvent,
|
|
420
|
+
payload: {
|
|
421
|
+
jobId: detJob.metadata.id,
|
|
422
|
+
jobType: detJob.metadata.type,
|
|
423
|
+
totalSteps: detJob.params.entityTypes.length
|
|
424
|
+
}
|
|
425
|
+
});
|
|
426
|
+
} else if (isFinalUpdate) {
|
|
427
|
+
await this.eventStore.appendEvent({
|
|
428
|
+
type: "job.completed",
|
|
429
|
+
...baseEvent,
|
|
430
|
+
payload: {
|
|
431
|
+
jobId: detJob.metadata.id,
|
|
432
|
+
jobType: detJob.metadata.type,
|
|
433
|
+
foundCount: detJob.progress.entitiesFound
|
|
434
|
+
}
|
|
435
|
+
});
|
|
436
|
+
} else {
|
|
437
|
+
const percentage = Math.round(detJob.progress.processedEntityTypes / detJob.progress.totalEntityTypes * 100);
|
|
438
|
+
await this.eventStore.appendEvent({
|
|
439
|
+
type: "job.progress",
|
|
440
|
+
...baseEvent,
|
|
441
|
+
payload: {
|
|
442
|
+
jobId: detJob.metadata.id,
|
|
443
|
+
jobType: detJob.metadata.type,
|
|
444
|
+
percentage,
|
|
445
|
+
currentStep: detJob.progress.currentEntityType,
|
|
446
|
+
processedSteps: detJob.progress.processedEntityTypes,
|
|
447
|
+
totalSteps: detJob.progress.totalEntityTypes,
|
|
448
|
+
foundCount: detJob.progress.entitiesFound
|
|
449
|
+
}
|
|
450
|
+
});
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
// src/jobs/generation-worker.ts
|
|
456
|
+
import { JobWorker as JobWorker2 } from "@semiont/jobs";
|
|
457
|
+
import { FilesystemRepresentationStore as FilesystemRepresentationStore2 } from "@semiont/content";
|
|
458
|
+
|
|
459
|
+
// src/generation/resource-generation.ts
|
|
460
|
+
import { getLocaleEnglishName } from "@semiont/api-client";
|
|
461
|
+
function getLanguageName(locale) {
|
|
462
|
+
return getLocaleEnglishName(locale) || locale;
|
|
463
|
+
}
|
|
464
|
+
async function generateResourceFromTopic(topic, entityTypes, client, userPrompt, locale, context, temperature, maxTokens) {
|
|
465
|
+
console.log("generateResourceFromTopic called with:", {
|
|
466
|
+
topic: topic.substring(0, 100),
|
|
467
|
+
entityTypes,
|
|
468
|
+
hasUserPrompt: !!userPrompt,
|
|
469
|
+
locale,
|
|
470
|
+
hasContext: !!context,
|
|
471
|
+
temperature,
|
|
472
|
+
maxTokens
|
|
473
|
+
});
|
|
474
|
+
const finalTemperature = temperature ?? 0.7;
|
|
475
|
+
const finalMaxTokens = maxTokens ?? 500;
|
|
476
|
+
const languageInstruction = locale && locale !== "en" ? `
|
|
477
|
+
|
|
478
|
+
IMPORTANT: Write the entire resource in ${getLanguageName(locale)}.` : "";
|
|
479
|
+
let contextSection = "";
|
|
480
|
+
if (context?.sourceContext) {
|
|
481
|
+
const { before, selected, after } = context.sourceContext;
|
|
482
|
+
contextSection = `
|
|
483
|
+
|
|
484
|
+
Source document context:
|
|
485
|
+
---
|
|
486
|
+
${before ? `...${before}` : ""}
|
|
487
|
+
**[${selected}]**
|
|
488
|
+
${after ? `${after}...` : ""}
|
|
489
|
+
---
|
|
490
|
+
`;
|
|
491
|
+
}
|
|
492
|
+
const prompt = `Generate a concise, informative resource about "${topic}".
|
|
493
|
+
${entityTypes.length > 0 ? `Focus on these entity types: ${entityTypes.join(", ")}.` : ""}
|
|
494
|
+
${userPrompt ? `Additional context: ${userPrompt}` : ""}${contextSection}${languageInstruction}
|
|
495
|
+
|
|
496
|
+
Requirements:
|
|
497
|
+
- Start with a clear heading (# Title)
|
|
498
|
+
- Write 2-3 paragraphs of substantive content
|
|
499
|
+
- Be factual and informative
|
|
500
|
+
- Use markdown formatting
|
|
501
|
+
- Return ONLY the markdown content, no JSON, no code fences, no additional wrapper`;
|
|
502
|
+
const parseResponse = (response2) => {
|
|
503
|
+
let content = response2.trim();
|
|
504
|
+
if (content.startsWith("```markdown") || content.startsWith("```md")) {
|
|
505
|
+
content = content.slice(content.indexOf("\n") + 1);
|
|
506
|
+
const endIndex = content.lastIndexOf("```");
|
|
507
|
+
if (endIndex !== -1) {
|
|
508
|
+
content = content.slice(0, endIndex);
|
|
509
|
+
}
|
|
510
|
+
} else if (content.startsWith("```")) {
|
|
511
|
+
content = content.slice(3);
|
|
512
|
+
const endIndex = content.lastIndexOf("```");
|
|
513
|
+
if (endIndex !== -1) {
|
|
514
|
+
content = content.slice(0, endIndex);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
content = content.trim();
|
|
518
|
+
return {
|
|
519
|
+
title: topic,
|
|
520
|
+
content
|
|
521
|
+
};
|
|
522
|
+
};
|
|
523
|
+
console.log("Sending prompt to inference (length:", prompt.length, "chars)", "temp:", finalTemperature, "maxTokens:", finalMaxTokens);
|
|
524
|
+
const response = await client.generateText(prompt, finalMaxTokens, finalTemperature);
|
|
525
|
+
console.log("Got raw response (length:", response.length, "chars)");
|
|
526
|
+
const result = parseResponse(response);
|
|
527
|
+
console.log("Parsed result:", {
|
|
528
|
+
hasTitle: !!result.title,
|
|
529
|
+
titleLength: result.title?.length,
|
|
530
|
+
hasContent: !!result.content,
|
|
531
|
+
contentLength: result.content?.length
|
|
532
|
+
});
|
|
533
|
+
return result;
|
|
534
|
+
}
|
|
535
|
+
async function generateResourceSummary(resourceName, content, entityTypes, client) {
|
|
536
|
+
const truncatedContent = content.length > 2e3 ? content.substring(0, 2e3) + "..." : content;
|
|
537
|
+
const prompt = `Create a brief, intelligent summary of this resource titled "${resourceName}".
|
|
538
|
+
${entityTypes.length > 0 ? `Key entity types: ${entityTypes.join(", ")}` : ""}
|
|
539
|
+
|
|
540
|
+
Resource content:
|
|
541
|
+
${truncatedContent}
|
|
542
|
+
|
|
543
|
+
Write a 2-3 sentence summary that captures the key points and would help someone understand what this resource contains.`;
|
|
544
|
+
return await client.generateText(prompt, 150, 0.5);
|
|
545
|
+
}
|
|
546
|
+
async function generateReferenceSuggestions(referenceTitle, client, entityType, currentContent) {
|
|
547
|
+
const prompt = `For a reference titled "${referenceTitle}"${entityType ? ` (type: ${entityType})` : ""}${currentContent ? ` with current stub: "${currentContent}"` : ""}, suggest 3 specific, actionable next steps or related topics to explore.
|
|
548
|
+
|
|
549
|
+
Format as a simple list, one suggestion per line.`;
|
|
550
|
+
const response = await client.generateText(prompt, 200, 0.8);
|
|
551
|
+
if (!response) {
|
|
552
|
+
return null;
|
|
553
|
+
}
|
|
554
|
+
return response.split("\n").map((line) => line.replace(/^[-*•]\s*/, "").trim()).filter((line) => line.length > 0).slice(0, 3);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// src/jobs/generation-worker.ts
|
|
558
|
+
import {
|
|
559
|
+
getTargetSelector,
|
|
560
|
+
getExactText,
|
|
561
|
+
resourceUri,
|
|
562
|
+
annotationUri
|
|
563
|
+
} from "@semiont/api-client";
|
|
564
|
+
import { getEntityTypes } from "@semiont/ontology";
|
|
565
|
+
import {
|
|
566
|
+
CREATION_METHODS,
|
|
567
|
+
generateUuid,
|
|
568
|
+
resourceId,
|
|
569
|
+
annotationId
|
|
570
|
+
} from "@semiont/core";
|
|
571
|
+
var GenerationWorker = class extends JobWorker2 {
|
|
572
|
+
constructor(jobQueue, config, eventStore, inferenceClient) {
|
|
573
|
+
super(jobQueue);
|
|
574
|
+
this.config = config;
|
|
575
|
+
this.eventStore = eventStore;
|
|
576
|
+
this.inferenceClient = inferenceClient;
|
|
577
|
+
}
|
|
578
|
+
getWorkerName() {
|
|
579
|
+
return "GenerationWorker";
|
|
580
|
+
}
|
|
581
|
+
canProcessJob(job) {
|
|
582
|
+
return job.metadata.type === "generation";
|
|
583
|
+
}
|
|
584
|
+
async executeJob(job) {
|
|
585
|
+
if (job.metadata.type !== "generation") {
|
|
586
|
+
throw new Error(`Invalid job type: ${job.metadata.type}`);
|
|
587
|
+
}
|
|
588
|
+
if (job.status !== "running") {
|
|
589
|
+
throw new Error(`Job must be in running state to execute, got: ${job.status}`);
|
|
590
|
+
}
|
|
591
|
+
await this.processGenerationJob(job);
|
|
592
|
+
}
|
|
593
|
+
async processGenerationJob(job) {
|
|
594
|
+
console.log(`[GenerationWorker] Processing generation for reference ${job.params.referenceId} (job: ${job.metadata.id})`);
|
|
595
|
+
const basePath = this.config.services.filesystem.path;
|
|
596
|
+
const projectRoot = this.config._metadata?.projectRoot;
|
|
597
|
+
const repStore = new FilesystemRepresentationStore2({ basePath }, projectRoot);
|
|
598
|
+
let updatedJob = {
|
|
599
|
+
...job,
|
|
600
|
+
progress: {
|
|
601
|
+
stage: "fetching",
|
|
602
|
+
percentage: 20,
|
|
603
|
+
message: "Fetching source resource..."
|
|
604
|
+
}
|
|
605
|
+
};
|
|
606
|
+
console.log(`[GenerationWorker] \u{1F4E5} ${updatedJob.progress.message}`);
|
|
607
|
+
await this.updateJobProgress(updatedJob);
|
|
608
|
+
const { FilesystemViewStorage: FilesystemViewStorage3 } = await import("@semiont/event-sourcing");
|
|
609
|
+
const viewStorage = new FilesystemViewStorage3(basePath, projectRoot);
|
|
610
|
+
const view = await viewStorage.get(job.params.sourceResourceId);
|
|
611
|
+
if (!view) {
|
|
612
|
+
throw new Error(`Resource ${job.params.sourceResourceId} not found`);
|
|
613
|
+
}
|
|
614
|
+
const projection = view.annotations;
|
|
615
|
+
const expectedAnnotationUri = `${this.config.services.backend.publicURL}/annotations/${job.params.referenceId}`;
|
|
616
|
+
const annotation = projection.annotations.find(
|
|
617
|
+
(a) => a.id === expectedAnnotationUri && a.motivation === "linking"
|
|
618
|
+
);
|
|
619
|
+
if (!annotation) {
|
|
620
|
+
throw new Error(`Annotation ${job.params.referenceId} not found in resource ${job.params.sourceResourceId}`);
|
|
621
|
+
}
|
|
622
|
+
const sourceResource = await ResourceContext.getResourceMetadata(job.params.sourceResourceId, this.config);
|
|
623
|
+
if (!sourceResource) {
|
|
624
|
+
throw new Error(`Source resource ${job.params.sourceResourceId} not found`);
|
|
625
|
+
}
|
|
626
|
+
const targetSelector = getTargetSelector(annotation.target);
|
|
627
|
+
const resourceName = job.params.title || (targetSelector ? getExactText(targetSelector) : "") || "New Resource";
|
|
628
|
+
console.log(`[GenerationWorker] Generating resource: "${resourceName}"`);
|
|
629
|
+
if (!job.params.context) {
|
|
630
|
+
throw new Error("Generation context is required but was not provided in job");
|
|
631
|
+
}
|
|
632
|
+
console.log(`[GenerationWorker] Using pre-fetched context: ${job.params.context.sourceContext?.before?.length || 0} chars before, ${job.params.context.sourceContext?.selected?.length || 0} chars selected, ${job.params.context.sourceContext?.after?.length || 0} chars after`);
|
|
633
|
+
updatedJob = {
|
|
634
|
+
...updatedJob,
|
|
635
|
+
progress: {
|
|
636
|
+
stage: "generating",
|
|
637
|
+
percentage: 40,
|
|
638
|
+
message: "Creating content with AI..."
|
|
639
|
+
}
|
|
640
|
+
};
|
|
641
|
+
console.log(`[GenerationWorker] \u{1F916} ${updatedJob.progress.message}`);
|
|
642
|
+
await this.updateJobProgress(updatedJob);
|
|
643
|
+
const prompt = job.params.prompt || `Create a comprehensive resource about "${resourceName}"`;
|
|
644
|
+
const annotationEntityTypes = getEntityTypes({ body: annotation.body });
|
|
645
|
+
const generatedContent = await generateResourceFromTopic(
|
|
646
|
+
resourceName,
|
|
647
|
+
job.params.entityTypes || annotationEntityTypes,
|
|
648
|
+
this.inferenceClient,
|
|
649
|
+
prompt,
|
|
650
|
+
job.params.language,
|
|
651
|
+
job.params.context,
|
|
652
|
+
// NEW - context from job (passed from modal)
|
|
653
|
+
job.params.temperature,
|
|
654
|
+
// NEW - from job
|
|
655
|
+
job.params.maxTokens
|
|
656
|
+
// NEW - from job
|
|
657
|
+
);
|
|
658
|
+
console.log(`[GenerationWorker] \u2705 Generated ${generatedContent.content.length} bytes of content`);
|
|
659
|
+
updatedJob = {
|
|
660
|
+
...updatedJob,
|
|
661
|
+
progress: {
|
|
662
|
+
stage: "generating",
|
|
663
|
+
percentage: 70,
|
|
664
|
+
message: "Content ready, creating resource..."
|
|
665
|
+
}
|
|
666
|
+
};
|
|
667
|
+
await this.updateJobProgress(updatedJob);
|
|
668
|
+
const rId = resourceId(generateUuid());
|
|
669
|
+
updatedJob = {
|
|
670
|
+
...updatedJob,
|
|
671
|
+
progress: {
|
|
672
|
+
stage: "creating",
|
|
673
|
+
percentage: 85,
|
|
674
|
+
message: "Saving resource..."
|
|
675
|
+
}
|
|
676
|
+
};
|
|
677
|
+
console.log(`[GenerationWorker] \u{1F4BE} ${updatedJob.progress.message}`);
|
|
678
|
+
await this.updateJobProgress(updatedJob);
|
|
679
|
+
const storedRep = await repStore.store(Buffer.from(generatedContent.content), {
|
|
680
|
+
mediaType: "text/markdown",
|
|
681
|
+
rel: "original"
|
|
682
|
+
});
|
|
683
|
+
console.log(`[GenerationWorker] \u2705 Saved resource representation to filesystem: ${rId}`);
|
|
684
|
+
await this.eventStore.appendEvent({
|
|
685
|
+
type: "resource.created",
|
|
686
|
+
resourceId: rId,
|
|
687
|
+
userId: job.metadata.userId,
|
|
688
|
+
version: 1,
|
|
689
|
+
payload: {
|
|
690
|
+
name: resourceName,
|
|
691
|
+
format: "text/markdown",
|
|
692
|
+
contentChecksum: storedRep.checksum,
|
|
693
|
+
creationMethod: CREATION_METHODS.GENERATED,
|
|
694
|
+
entityTypes: job.params.entityTypes || annotationEntityTypes,
|
|
695
|
+
language: job.params.language,
|
|
696
|
+
isDraft: true,
|
|
697
|
+
generatedFrom: job.params.referenceId,
|
|
698
|
+
generationPrompt: void 0
|
|
699
|
+
// Could be added if we track the prompt
|
|
700
|
+
}
|
|
701
|
+
});
|
|
702
|
+
console.log(`[GenerationWorker] Emitted resource.created event for ${rId}`);
|
|
703
|
+
updatedJob = {
|
|
704
|
+
...updatedJob,
|
|
705
|
+
progress: {
|
|
706
|
+
stage: "linking",
|
|
707
|
+
percentage: 95,
|
|
708
|
+
message: "Linking reference...",
|
|
709
|
+
resultResourceId: rId
|
|
710
|
+
// Store for job.completed event
|
|
711
|
+
}
|
|
712
|
+
};
|
|
713
|
+
console.log(`[GenerationWorker] \u{1F517} ${updatedJob.progress.message}`);
|
|
714
|
+
await this.updateJobProgress(updatedJob);
|
|
715
|
+
const newResourceUri = resourceUri(`${this.config.services.backend.publicURL}/resources/${rId}`);
|
|
716
|
+
const operations = [{
|
|
717
|
+
op: "add",
|
|
718
|
+
item: {
|
|
719
|
+
type: "SpecificResource",
|
|
720
|
+
source: newResourceUri,
|
|
721
|
+
purpose: "linking"
|
|
722
|
+
}
|
|
723
|
+
}];
|
|
724
|
+
const annotationIdSegment = job.params.referenceId.split("/").pop();
|
|
725
|
+
await this.eventStore.appendEvent({
|
|
726
|
+
type: "annotation.body.updated",
|
|
727
|
+
resourceId: job.params.sourceResourceId,
|
|
728
|
+
userId: job.metadata.userId,
|
|
729
|
+
version: 1,
|
|
730
|
+
payload: {
|
|
731
|
+
annotationId: annotationId(annotationIdSegment),
|
|
732
|
+
operations
|
|
733
|
+
}
|
|
734
|
+
});
|
|
735
|
+
console.log(`[GenerationWorker] \u2705 Emitted annotation.body.updated event linking ${job.params.referenceId} \u2192 ${rId}`);
|
|
736
|
+
updatedJob = {
|
|
737
|
+
...updatedJob,
|
|
738
|
+
progress: {
|
|
739
|
+
stage: "linking",
|
|
740
|
+
percentage: 100,
|
|
741
|
+
message: "Complete!",
|
|
742
|
+
resultResourceId: rId
|
|
743
|
+
// Store for job.completed event
|
|
744
|
+
}
|
|
745
|
+
};
|
|
746
|
+
await this.updateJobProgress(updatedJob);
|
|
747
|
+
console.log(`[GenerationWorker] \u2705 Generation complete: created resource ${rId}`);
|
|
748
|
+
}
|
|
749
|
+
/**
|
|
750
|
+
* Update job progress and emit events to Event Store
|
|
751
|
+
* Overrides base class to also emit job progress events
|
|
752
|
+
*/
|
|
753
|
+
async updateJobProgress(job) {
|
|
754
|
+
await super.updateJobProgress(job);
|
|
755
|
+
if (job.metadata.type !== "generation") {
|
|
756
|
+
return;
|
|
757
|
+
}
|
|
758
|
+
if (job.status !== "running") {
|
|
759
|
+
return;
|
|
760
|
+
}
|
|
761
|
+
const genJob = job;
|
|
762
|
+
const baseEvent = {
|
|
763
|
+
resourceId: genJob.params.sourceResourceId,
|
|
764
|
+
userId: genJob.metadata.userId,
|
|
765
|
+
version: 1
|
|
766
|
+
};
|
|
767
|
+
if (genJob.progress.stage === "fetching" && genJob.progress.percentage === 20) {
|
|
768
|
+
await this.eventStore.appendEvent({
|
|
769
|
+
type: "job.started",
|
|
770
|
+
...baseEvent,
|
|
771
|
+
payload: {
|
|
772
|
+
jobId: genJob.metadata.id,
|
|
773
|
+
jobType: genJob.metadata.type,
|
|
774
|
+
totalSteps: 5
|
|
775
|
+
// fetching, generating, creating, linking, complete
|
|
776
|
+
}
|
|
777
|
+
});
|
|
778
|
+
} else if (genJob.progress.stage === "linking" && genJob.progress.percentage === 100) {
|
|
779
|
+
await this.eventStore.appendEvent({
|
|
780
|
+
type: "job.completed",
|
|
781
|
+
...baseEvent,
|
|
782
|
+
payload: {
|
|
783
|
+
jobId: genJob.metadata.id,
|
|
784
|
+
jobType: genJob.metadata.type,
|
|
785
|
+
resultResourceId: genJob.progress.resultResourceId,
|
|
786
|
+
annotationUri: annotationUri(`${this.config.services.backend.publicURL}/annotations/${genJob.params.referenceId}`)
|
|
787
|
+
}
|
|
788
|
+
});
|
|
789
|
+
} else {
|
|
790
|
+
await this.eventStore.appendEvent({
|
|
791
|
+
type: "job.progress",
|
|
792
|
+
...baseEvent,
|
|
793
|
+
payload: {
|
|
794
|
+
jobId: genJob.metadata.id,
|
|
795
|
+
jobType: genJob.metadata.type,
|
|
796
|
+
currentStep: genJob.progress.stage,
|
|
797
|
+
percentage: genJob.progress.percentage,
|
|
798
|
+
message: genJob.progress.message
|
|
799
|
+
}
|
|
800
|
+
});
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
};
|
|
804
|
+
|
|
805
|
+
// src/jobs/highlight-detection-worker.ts
|
|
806
|
+
import { JobWorker as JobWorker3 } from "@semiont/jobs";
|
|
807
|
+
import { generateAnnotationId as generateAnnotationId2 } from "@semiont/event-sourcing";
|
|
808
|
+
import { resourceIdToURI as resourceIdToURI2 } from "@semiont/core";
|
|
809
|
+
import { userId } from "@semiont/core";
|
|
810
|
+
var HighlightDetectionWorker = class extends JobWorker3 {
|
|
811
|
+
constructor(jobQueue, config, eventStore, inferenceClient) {
|
|
812
|
+
super(jobQueue);
|
|
813
|
+
this.config = config;
|
|
814
|
+
this.eventStore = eventStore;
|
|
815
|
+
this.inferenceClient = inferenceClient;
|
|
816
|
+
}
|
|
817
|
+
isFirstProgress = true;
|
|
818
|
+
getWorkerName() {
|
|
819
|
+
return "HighlightDetectionWorker";
|
|
820
|
+
}
|
|
821
|
+
canProcessJob(job) {
|
|
822
|
+
return job.metadata.type === "highlight-detection";
|
|
823
|
+
}
|
|
824
|
+
async executeJob(job) {
|
|
825
|
+
if (job.metadata.type !== "highlight-detection") {
|
|
826
|
+
throw new Error(`Invalid job type: ${job.metadata.type}`);
|
|
827
|
+
}
|
|
828
|
+
if (job.status !== "running") {
|
|
829
|
+
throw new Error(`Job must be in running state to execute, got: ${job.status}`);
|
|
830
|
+
}
|
|
831
|
+
this.isFirstProgress = true;
|
|
832
|
+
await this.processHighlightDetectionJob(job);
|
|
833
|
+
}
|
|
834
|
+
/**
|
|
835
|
+
* Override updateJobProgress to emit events to Event Store
|
|
836
|
+
*/
|
|
837
|
+
async updateJobProgress(job) {
|
|
838
|
+
await super.updateJobProgress(job);
|
|
839
|
+
if (job.metadata.type !== "highlight-detection") return;
|
|
840
|
+
if (job.status !== "running") {
|
|
841
|
+
return;
|
|
842
|
+
}
|
|
843
|
+
const hlJob = job;
|
|
844
|
+
const baseEvent = {
|
|
845
|
+
resourceId: hlJob.params.resourceId,
|
|
846
|
+
userId: hlJob.metadata.userId,
|
|
847
|
+
version: 1
|
|
848
|
+
};
|
|
849
|
+
const isComplete = hlJob.progress.percentage === 100;
|
|
850
|
+
if (this.isFirstProgress) {
|
|
851
|
+
this.isFirstProgress = false;
|
|
852
|
+
await this.eventStore.appendEvent({
|
|
853
|
+
type: "job.started",
|
|
854
|
+
...baseEvent,
|
|
855
|
+
payload: {
|
|
856
|
+
jobId: hlJob.metadata.id,
|
|
857
|
+
jobType: hlJob.metadata.type
|
|
858
|
+
}
|
|
859
|
+
});
|
|
860
|
+
} else if (isComplete) {
|
|
861
|
+
await this.eventStore.appendEvent({
|
|
862
|
+
type: "job.completed",
|
|
863
|
+
...baseEvent,
|
|
864
|
+
payload: {
|
|
865
|
+
jobId: hlJob.metadata.id,
|
|
866
|
+
jobType: hlJob.metadata.type
|
|
867
|
+
// Note: result would come from job.result, but that's handled by base class
|
|
868
|
+
}
|
|
869
|
+
});
|
|
870
|
+
} else {
|
|
871
|
+
await this.eventStore.appendEvent({
|
|
872
|
+
type: "job.progress",
|
|
873
|
+
...baseEvent,
|
|
874
|
+
payload: {
|
|
875
|
+
jobId: hlJob.metadata.id,
|
|
876
|
+
jobType: hlJob.metadata.type,
|
|
877
|
+
progress: hlJob.progress
|
|
878
|
+
}
|
|
879
|
+
});
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
async handleJobFailure(job, error) {
|
|
883
|
+
await super.handleJobFailure(job, error);
|
|
884
|
+
if (job.status === "failed" && job.metadata.type === "highlight-detection") {
|
|
885
|
+
const hlJob = job;
|
|
886
|
+
await this.eventStore.appendEvent({
|
|
887
|
+
type: "job.failed",
|
|
888
|
+
resourceId: hlJob.params.resourceId,
|
|
889
|
+
userId: hlJob.metadata.userId,
|
|
890
|
+
version: 1,
|
|
891
|
+
payload: {
|
|
892
|
+
jobId: hlJob.metadata.id,
|
|
893
|
+
jobType: hlJob.metadata.type,
|
|
894
|
+
error: "Highlight detection failed. Please try again later."
|
|
895
|
+
}
|
|
896
|
+
});
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
async processHighlightDetectionJob(job) {
|
|
900
|
+
console.log(`[HighlightDetectionWorker] Processing highlight detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
|
|
901
|
+
const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
|
|
902
|
+
if (!resource) {
|
|
903
|
+
throw new Error(`Resource ${job.params.resourceId} not found`);
|
|
904
|
+
}
|
|
905
|
+
let updatedJob = {
|
|
906
|
+
...job,
|
|
907
|
+
progress: {
|
|
908
|
+
stage: "analyzing",
|
|
909
|
+
percentage: 10,
|
|
910
|
+
message: "Loading resource..."
|
|
911
|
+
}
|
|
912
|
+
};
|
|
913
|
+
await this.updateJobProgress(updatedJob);
|
|
914
|
+
updatedJob = {
|
|
915
|
+
...updatedJob,
|
|
916
|
+
progress: {
|
|
917
|
+
stage: "analyzing",
|
|
918
|
+
percentage: 30,
|
|
919
|
+
message: "Analyzing text..."
|
|
920
|
+
}
|
|
921
|
+
};
|
|
922
|
+
await this.updateJobProgress(updatedJob);
|
|
923
|
+
const highlights = await AnnotationDetection.detectHighlights(
|
|
924
|
+
job.params.resourceId,
|
|
925
|
+
this.config,
|
|
926
|
+
this.inferenceClient,
|
|
927
|
+
job.params.instructions,
|
|
928
|
+
job.params.density
|
|
929
|
+
);
|
|
930
|
+
console.log(`[HighlightDetectionWorker] Found ${highlights.length} highlights to create`);
|
|
931
|
+
updatedJob = {
|
|
932
|
+
...updatedJob,
|
|
933
|
+
progress: {
|
|
934
|
+
stage: "creating",
|
|
935
|
+
percentage: 60,
|
|
936
|
+
message: `Creating ${highlights.length} annotations...`
|
|
937
|
+
}
|
|
938
|
+
};
|
|
939
|
+
await this.updateJobProgress(updatedJob);
|
|
940
|
+
let created = 0;
|
|
941
|
+
for (const highlight of highlights) {
|
|
942
|
+
try {
|
|
943
|
+
await this.createHighlightAnnotation(job.params.resourceId, job.metadata.userId, highlight);
|
|
944
|
+
created++;
|
|
945
|
+
} catch (error) {
|
|
946
|
+
console.error(`[HighlightDetectionWorker] Failed to create highlight:`, error);
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
updatedJob = {
|
|
950
|
+
...updatedJob,
|
|
951
|
+
progress: {
|
|
952
|
+
stage: "creating",
|
|
953
|
+
percentage: 100,
|
|
954
|
+
message: `Complete! Created ${created} highlights`
|
|
955
|
+
}
|
|
956
|
+
};
|
|
957
|
+
await this.updateJobProgress(updatedJob);
|
|
958
|
+
console.log(`[HighlightDetectionWorker] \u2705 Created ${created}/${highlights.length} highlights`);
|
|
959
|
+
}
|
|
960
|
+
async createHighlightAnnotation(resourceId2, creatorUserId, highlight) {
|
|
961
|
+
const backendUrl = this.config.services.backend?.publicURL;
|
|
962
|
+
if (!backendUrl) throw new Error("Backend publicURL not configured");
|
|
963
|
+
const annotationId2 = generateAnnotationId2(backendUrl);
|
|
964
|
+
const resourceUri3 = resourceIdToURI2(resourceId2, backendUrl);
|
|
965
|
+
const annotation = {
|
|
966
|
+
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
967
|
+
"type": "Annotation",
|
|
968
|
+
"id": annotationId2,
|
|
969
|
+
"motivation": "highlighting",
|
|
970
|
+
"creator": userId(creatorUserId),
|
|
971
|
+
"created": (/* @__PURE__ */ new Date()).toISOString(),
|
|
972
|
+
"target": {
|
|
973
|
+
type: "SpecificResource",
|
|
974
|
+
source: resourceUri3,
|
|
975
|
+
selector: [
|
|
976
|
+
{
|
|
977
|
+
type: "TextPositionSelector",
|
|
978
|
+
start: highlight.start,
|
|
979
|
+
end: highlight.end
|
|
980
|
+
},
|
|
981
|
+
{
|
|
982
|
+
type: "TextQuoteSelector",
|
|
983
|
+
exact: highlight.exact,
|
|
984
|
+
...highlight.prefix && { prefix: highlight.prefix },
|
|
985
|
+
...highlight.suffix && { suffix: highlight.suffix }
|
|
986
|
+
}
|
|
987
|
+
]
|
|
988
|
+
},
|
|
989
|
+
"body": []
|
|
990
|
+
// Empty body for highlights
|
|
991
|
+
};
|
|
992
|
+
await this.eventStore.appendEvent({
|
|
993
|
+
type: "annotation.added",
|
|
994
|
+
resourceId: resourceId2,
|
|
995
|
+
userId: userId(creatorUserId),
|
|
996
|
+
version: 1,
|
|
997
|
+
payload: { annotation }
|
|
998
|
+
});
|
|
999
|
+
}
|
|
1000
|
+
};
|
|
1001
|
+
|
|
1002
|
+
// src/jobs/assessment-detection-worker.ts
|
|
1003
|
+
import { JobWorker as JobWorker4 } from "@semiont/jobs";
|
|
1004
|
+
import { generateAnnotationId as generateAnnotationId3 } from "@semiont/event-sourcing";
|
|
1005
|
+
import { resourceIdToURI as resourceIdToURI3 } from "@semiont/core";
|
|
1006
|
+
import { userId as userId2 } from "@semiont/core";
|
|
1007
|
+
var AssessmentDetectionWorker = class extends JobWorker4 {
|
|
1008
|
+
constructor(jobQueue, config, eventStore, inferenceClient) {
|
|
1009
|
+
super(jobQueue);
|
|
1010
|
+
this.config = config;
|
|
1011
|
+
this.eventStore = eventStore;
|
|
1012
|
+
this.inferenceClient = inferenceClient;
|
|
1013
|
+
}
|
|
1014
|
+
isFirstProgress = true;
|
|
1015
|
+
getWorkerName() {
|
|
1016
|
+
return "AssessmentDetectionWorker";
|
|
1017
|
+
}
|
|
1018
|
+
canProcessJob(job) {
|
|
1019
|
+
return job.metadata.type === "assessment-detection";
|
|
1020
|
+
}
|
|
1021
|
+
async executeJob(job) {
|
|
1022
|
+
if (job.metadata.type !== "assessment-detection") {
|
|
1023
|
+
throw new Error(`Invalid job type: ${job.metadata.type}`);
|
|
1024
|
+
}
|
|
1025
|
+
if (job.status !== "running") {
|
|
1026
|
+
throw new Error(`Job must be in running state to execute, got: ${job.status}`);
|
|
1027
|
+
}
|
|
1028
|
+
this.isFirstProgress = true;
|
|
1029
|
+
await this.processAssessmentDetectionJob(job);
|
|
1030
|
+
}
|
|
1031
|
+
/**
|
|
1032
|
+
* Override updateJobProgress to emit events to Event Store
|
|
1033
|
+
*/
|
|
1034
|
+
async updateJobProgress(job) {
|
|
1035
|
+
await super.updateJobProgress(job);
|
|
1036
|
+
if (job.metadata.type !== "assessment-detection") return;
|
|
1037
|
+
if (job.status !== "running") {
|
|
1038
|
+
return;
|
|
1039
|
+
}
|
|
1040
|
+
const assJob = job;
|
|
1041
|
+
const baseEvent = {
|
|
1042
|
+
resourceId: assJob.params.resourceId,
|
|
1043
|
+
userId: assJob.metadata.userId,
|
|
1044
|
+
version: 1
|
|
1045
|
+
};
|
|
1046
|
+
const isComplete = assJob.progress.percentage === 100;
|
|
1047
|
+
if (this.isFirstProgress) {
|
|
1048
|
+
this.isFirstProgress = false;
|
|
1049
|
+
await this.eventStore.appendEvent({
|
|
1050
|
+
type: "job.started",
|
|
1051
|
+
...baseEvent,
|
|
1052
|
+
payload: {
|
|
1053
|
+
jobId: assJob.metadata.id,
|
|
1054
|
+
jobType: assJob.metadata.type
|
|
1055
|
+
}
|
|
1056
|
+
});
|
|
1057
|
+
} else if (isComplete) {
|
|
1058
|
+
await this.eventStore.appendEvent({
|
|
1059
|
+
type: "job.completed",
|
|
1060
|
+
...baseEvent,
|
|
1061
|
+
payload: {
|
|
1062
|
+
jobId: assJob.metadata.id,
|
|
1063
|
+
jobType: assJob.metadata.type
|
|
1064
|
+
// Note: result would come from job.result, but that's handled by base class
|
|
1065
|
+
}
|
|
1066
|
+
});
|
|
1067
|
+
} else {
|
|
1068
|
+
await this.eventStore.appendEvent({
|
|
1069
|
+
type: "job.progress",
|
|
1070
|
+
...baseEvent,
|
|
1071
|
+
payload: {
|
|
1072
|
+
jobId: assJob.metadata.id,
|
|
1073
|
+
jobType: assJob.metadata.type,
|
|
1074
|
+
progress: assJob.progress
|
|
1075
|
+
}
|
|
1076
|
+
});
|
|
1077
|
+
}
|
|
1078
|
+
}
|
|
1079
|
+
async handleJobFailure(job, error) {
|
|
1080
|
+
await super.handleJobFailure(job, error);
|
|
1081
|
+
if (job.status === "failed" && job.metadata.type === "assessment-detection") {
|
|
1082
|
+
const aJob = job;
|
|
1083
|
+
await this.eventStore.appendEvent({
|
|
1084
|
+
type: "job.failed",
|
|
1085
|
+
resourceId: aJob.params.resourceId,
|
|
1086
|
+
userId: aJob.metadata.userId,
|
|
1087
|
+
version: 1,
|
|
1088
|
+
payload: {
|
|
1089
|
+
jobId: aJob.metadata.id,
|
|
1090
|
+
jobType: aJob.metadata.type,
|
|
1091
|
+
error: "Assessment detection failed. Please try again later."
|
|
1092
|
+
}
|
|
1093
|
+
});
|
|
1094
|
+
}
|
|
1095
|
+
}
|
|
1096
|
+
async processAssessmentDetectionJob(job) {
|
|
1097
|
+
console.log(`[AssessmentDetectionWorker] Processing assessment detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
|
|
1098
|
+
const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
|
|
1099
|
+
if (!resource) {
|
|
1100
|
+
throw new Error(`Resource ${job.params.resourceId} not found`);
|
|
1101
|
+
}
|
|
1102
|
+
let updatedJob = {
|
|
1103
|
+
...job,
|
|
1104
|
+
progress: {
|
|
1105
|
+
stage: "analyzing",
|
|
1106
|
+
percentage: 10,
|
|
1107
|
+
message: "Loading resource..."
|
|
1108
|
+
}
|
|
1109
|
+
};
|
|
1110
|
+
await this.updateJobProgress(updatedJob);
|
|
1111
|
+
updatedJob = {
|
|
1112
|
+
...updatedJob,
|
|
1113
|
+
progress: {
|
|
1114
|
+
stage: "analyzing",
|
|
1115
|
+
percentage: 30,
|
|
1116
|
+
message: "Analyzing text..."
|
|
1117
|
+
}
|
|
1118
|
+
};
|
|
1119
|
+
await this.updateJobProgress(updatedJob);
|
|
1120
|
+
const assessments = await AnnotationDetection.detectAssessments(
|
|
1121
|
+
job.params.resourceId,
|
|
1122
|
+
this.config,
|
|
1123
|
+
this.inferenceClient,
|
|
1124
|
+
job.params.instructions,
|
|
1125
|
+
job.params.tone,
|
|
1126
|
+
job.params.density
|
|
1127
|
+
);
|
|
1128
|
+
console.log(`[AssessmentDetectionWorker] Found ${assessments.length} assessments to create`);
|
|
1129
|
+
updatedJob = {
|
|
1130
|
+
...updatedJob,
|
|
1131
|
+
progress: {
|
|
1132
|
+
stage: "creating",
|
|
1133
|
+
percentage: 60,
|
|
1134
|
+
message: `Creating ${assessments.length} annotations...`
|
|
1135
|
+
}
|
|
1136
|
+
};
|
|
1137
|
+
await this.updateJobProgress(updatedJob);
|
|
1138
|
+
let created = 0;
|
|
1139
|
+
for (const assessment of assessments) {
|
|
1140
|
+
try {
|
|
1141
|
+
await this.createAssessmentAnnotation(job.params.resourceId, job.metadata.userId, assessment);
|
|
1142
|
+
created++;
|
|
1143
|
+
} catch (error) {
|
|
1144
|
+
console.error(`[AssessmentDetectionWorker] Failed to create assessment:`, error);
|
|
1145
|
+
}
|
|
1146
|
+
}
|
|
1147
|
+
updatedJob = {
|
|
1148
|
+
...updatedJob,
|
|
1149
|
+
progress: {
|
|
1150
|
+
stage: "creating",
|
|
1151
|
+
percentage: 100,
|
|
1152
|
+
message: `Complete! Created ${created} assessments`
|
|
1153
|
+
}
|
|
1154
|
+
};
|
|
1155
|
+
await this.updateJobProgress(updatedJob);
|
|
1156
|
+
console.log(`[AssessmentDetectionWorker] \u2705 Created ${created}/${assessments.length} assessments`);
|
|
1157
|
+
}
|
|
1158
|
+
async createAssessmentAnnotation(resourceId2, creatorUserId, assessment) {
|
|
1159
|
+
const backendUrl = this.config.services.backend?.publicURL;
|
|
1160
|
+
if (!backendUrl) throw new Error("Backend publicURL not configured");
|
|
1161
|
+
const annotationId2 = generateAnnotationId3(backendUrl);
|
|
1162
|
+
const resourceUri3 = resourceIdToURI3(resourceId2, backendUrl);
|
|
1163
|
+
const annotation = {
|
|
1164
|
+
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
1165
|
+
"type": "Annotation",
|
|
1166
|
+
"id": annotationId2,
|
|
1167
|
+
"motivation": "assessing",
|
|
1168
|
+
"creator": userId2(creatorUserId),
|
|
1169
|
+
"created": (/* @__PURE__ */ new Date()).toISOString(),
|
|
1170
|
+
"target": {
|
|
1171
|
+
type: "SpecificResource",
|
|
1172
|
+
source: resourceUri3,
|
|
1173
|
+
selector: [
|
|
1174
|
+
{
|
|
1175
|
+
type: "TextPositionSelector",
|
|
1176
|
+
start: assessment.start,
|
|
1177
|
+
end: assessment.end
|
|
1178
|
+
},
|
|
1179
|
+
{
|
|
1180
|
+
type: "TextQuoteSelector",
|
|
1181
|
+
exact: assessment.exact,
|
|
1182
|
+
...assessment.prefix && { prefix: assessment.prefix },
|
|
1183
|
+
...assessment.suffix && { suffix: assessment.suffix }
|
|
1184
|
+
}
|
|
1185
|
+
]
|
|
1186
|
+
},
|
|
1187
|
+
"body": {
|
|
1188
|
+
type: "TextualBody",
|
|
1189
|
+
value: assessment.assessment,
|
|
1190
|
+
format: "text/plain"
|
|
1191
|
+
}
|
|
1192
|
+
};
|
|
1193
|
+
await this.eventStore.appendEvent({
|
|
1194
|
+
type: "annotation.added",
|
|
1195
|
+
resourceId: resourceId2,
|
|
1196
|
+
userId: userId2(creatorUserId),
|
|
1197
|
+
version: 1,
|
|
1198
|
+
payload: { annotation }
|
|
1199
|
+
});
|
|
1200
|
+
}
|
|
1201
|
+
};
|
|
1202
|
+
|
|
1203
|
+
// src/jobs/comment-detection-worker.ts
|
|
1204
|
+
import { JobWorker as JobWorker5 } from "@semiont/jobs";
|
|
1205
|
+
import { generateAnnotationId as generateAnnotationId4 } from "@semiont/event-sourcing";
|
|
1206
|
+
import { resourceIdToURI as resourceIdToURI4 } from "@semiont/core";
|
|
1207
|
+
import { userId as userId3 } from "@semiont/core";
|
|
1208
|
+
var CommentDetectionWorker = class extends JobWorker5 {
|
|
1209
|
+
constructor(jobQueue, config, eventStore, inferenceClient) {
|
|
1210
|
+
super(jobQueue);
|
|
1211
|
+
this.config = config;
|
|
1212
|
+
this.eventStore = eventStore;
|
|
1213
|
+
this.inferenceClient = inferenceClient;
|
|
1214
|
+
}
|
|
1215
|
+
isFirstProgress = true;
|
|
1216
|
+
getWorkerName() {
|
|
1217
|
+
return "CommentDetectionWorker";
|
|
1218
|
+
}
|
|
1219
|
+
canProcessJob(job) {
|
|
1220
|
+
return job.metadata.type === "comment-detection";
|
|
1221
|
+
}
|
|
1222
|
+
async executeJob(job) {
|
|
1223
|
+
if (job.metadata.type !== "comment-detection") {
|
|
1224
|
+
throw new Error(`Invalid job type: ${job.metadata.type}`);
|
|
1225
|
+
}
|
|
1226
|
+
if (job.status !== "running") {
|
|
1227
|
+
throw new Error(`Job must be in running state to execute, got: ${job.status}`);
|
|
1228
|
+
}
|
|
1229
|
+
this.isFirstProgress = true;
|
|
1230
|
+
await this.processCommentDetectionJob(job);
|
|
1231
|
+
}
|
|
1232
|
+
/**
|
|
1233
|
+
* Override updateJobProgress to emit events to Event Store
|
|
1234
|
+
*/
|
|
1235
|
+
async updateJobProgress(job) {
|
|
1236
|
+
await super.updateJobProgress(job);
|
|
1237
|
+
if (job.metadata.type !== "comment-detection") return;
|
|
1238
|
+
if (job.status !== "running") {
|
|
1239
|
+
return;
|
|
1240
|
+
}
|
|
1241
|
+
const cdJob = job;
|
|
1242
|
+
const baseEvent = {
|
|
1243
|
+
resourceId: cdJob.params.resourceId,
|
|
1244
|
+
userId: cdJob.metadata.userId,
|
|
1245
|
+
version: 1
|
|
1246
|
+
};
|
|
1247
|
+
const isComplete = cdJob.progress.percentage === 100;
|
|
1248
|
+
if (this.isFirstProgress) {
|
|
1249
|
+
this.isFirstProgress = false;
|
|
1250
|
+
await this.eventStore.appendEvent({
|
|
1251
|
+
type: "job.started",
|
|
1252
|
+
...baseEvent,
|
|
1253
|
+
payload: {
|
|
1254
|
+
jobId: cdJob.metadata.id,
|
|
1255
|
+
jobType: cdJob.metadata.type
|
|
1256
|
+
}
|
|
1257
|
+
});
|
|
1258
|
+
} else if (isComplete) {
|
|
1259
|
+
await this.eventStore.appendEvent({
|
|
1260
|
+
type: "job.completed",
|
|
1261
|
+
...baseEvent,
|
|
1262
|
+
payload: {
|
|
1263
|
+
jobId: cdJob.metadata.id,
|
|
1264
|
+
jobType: cdJob.metadata.type
|
|
1265
|
+
// Note: result would come from job.result, but that's handled by base class
|
|
1266
|
+
}
|
|
1267
|
+
});
|
|
1268
|
+
} else {
|
|
1269
|
+
await this.eventStore.appendEvent({
|
|
1270
|
+
type: "job.progress",
|
|
1271
|
+
...baseEvent,
|
|
1272
|
+
payload: {
|
|
1273
|
+
jobId: cdJob.metadata.id,
|
|
1274
|
+
jobType: cdJob.metadata.type,
|
|
1275
|
+
progress: cdJob.progress
|
|
1276
|
+
}
|
|
1277
|
+
});
|
|
1278
|
+
}
|
|
1279
|
+
}
|
|
1280
|
+
async handleJobFailure(job, error) {
|
|
1281
|
+
await super.handleJobFailure(job, error);
|
|
1282
|
+
if (job.status === "failed" && job.metadata.type === "comment-detection") {
|
|
1283
|
+
const cdJob = job;
|
|
1284
|
+
await this.eventStore.appendEvent({
|
|
1285
|
+
type: "job.failed",
|
|
1286
|
+
resourceId: cdJob.params.resourceId,
|
|
1287
|
+
userId: cdJob.metadata.userId,
|
|
1288
|
+
version: 1,
|
|
1289
|
+
payload: {
|
|
1290
|
+
jobId: cdJob.metadata.id,
|
|
1291
|
+
jobType: cdJob.metadata.type,
|
|
1292
|
+
error: "Comment detection failed. Please try again later."
|
|
1293
|
+
}
|
|
1294
|
+
});
|
|
1295
|
+
}
|
|
1296
|
+
}
|
|
1297
|
+
async processCommentDetectionJob(job) {
|
|
1298
|
+
console.log(`[CommentDetectionWorker] Processing comment detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
|
|
1299
|
+
const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
|
|
1300
|
+
if (!resource) {
|
|
1301
|
+
throw new Error(`Resource ${job.params.resourceId} not found`);
|
|
1302
|
+
}
|
|
1303
|
+
let updatedJob = {
|
|
1304
|
+
...job,
|
|
1305
|
+
progress: {
|
|
1306
|
+
stage: "analyzing",
|
|
1307
|
+
percentage: 10,
|
|
1308
|
+
message: "Loading resource..."
|
|
1309
|
+
}
|
|
1310
|
+
};
|
|
1311
|
+
await this.updateJobProgress(updatedJob);
|
|
1312
|
+
updatedJob = {
|
|
1313
|
+
...updatedJob,
|
|
1314
|
+
progress: {
|
|
1315
|
+
stage: "analyzing",
|
|
1316
|
+
percentage: 30,
|
|
1317
|
+
message: "Analyzing text and generating comments..."
|
|
1318
|
+
}
|
|
1319
|
+
};
|
|
1320
|
+
await this.updateJobProgress(updatedJob);
|
|
1321
|
+
const comments = await AnnotationDetection.detectComments(
|
|
1322
|
+
job.params.resourceId,
|
|
1323
|
+
this.config,
|
|
1324
|
+
this.inferenceClient,
|
|
1325
|
+
job.params.instructions,
|
|
1326
|
+
job.params.tone,
|
|
1327
|
+
job.params.density
|
|
1328
|
+
);
|
|
1329
|
+
console.log(`[CommentDetectionWorker] Found ${comments.length} comments to create`);
|
|
1330
|
+
updatedJob = {
|
|
1331
|
+
...updatedJob,
|
|
1332
|
+
progress: {
|
|
1333
|
+
stage: "creating",
|
|
1334
|
+
percentage: 60,
|
|
1335
|
+
message: `Creating ${comments.length} annotations...`
|
|
1336
|
+
}
|
|
1337
|
+
};
|
|
1338
|
+
await this.updateJobProgress(updatedJob);
|
|
1339
|
+
let created = 0;
|
|
1340
|
+
for (const comment of comments) {
|
|
1341
|
+
try {
|
|
1342
|
+
await this.createCommentAnnotation(job.params.resourceId, job.metadata.userId, comment);
|
|
1343
|
+
created++;
|
|
1344
|
+
} catch (error) {
|
|
1345
|
+
console.error(`[CommentDetectionWorker] Failed to create comment:`, error);
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
updatedJob = {
|
|
1349
|
+
...updatedJob,
|
|
1350
|
+
progress: {
|
|
1351
|
+
stage: "creating",
|
|
1352
|
+
percentage: 100,
|
|
1353
|
+
message: `Complete! Created ${created} comments`
|
|
1354
|
+
}
|
|
1355
|
+
};
|
|
1356
|
+
await this.updateJobProgress(updatedJob);
|
|
1357
|
+
console.log(`[CommentDetectionWorker] \u2705 Created ${created}/${comments.length} comments`);
|
|
1358
|
+
}
|
|
1359
|
+
async createCommentAnnotation(resourceId2, userId_, comment) {
|
|
1360
|
+
const backendUrl = this.config.services.backend?.publicURL;
|
|
1361
|
+
if (!backendUrl) {
|
|
1362
|
+
throw new Error("Backend publicURL not configured");
|
|
1363
|
+
}
|
|
1364
|
+
const resourceUri3 = resourceIdToURI4(resourceId2, backendUrl);
|
|
1365
|
+
const annotationId2 = generateAnnotationId4(backendUrl);
|
|
1366
|
+
const annotation = {
|
|
1367
|
+
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
1368
|
+
type: "Annotation",
|
|
1369
|
+
id: annotationId2,
|
|
1370
|
+
motivation: "commenting",
|
|
1371
|
+
target: {
|
|
1372
|
+
type: "SpecificResource",
|
|
1373
|
+
source: resourceUri3,
|
|
1374
|
+
selector: [
|
|
1375
|
+
{
|
|
1376
|
+
type: "TextPositionSelector",
|
|
1377
|
+
start: comment.start,
|
|
1378
|
+
end: comment.end
|
|
1379
|
+
},
|
|
1380
|
+
{
|
|
1381
|
+
type: "TextQuoteSelector",
|
|
1382
|
+
exact: comment.exact,
|
|
1383
|
+
prefix: comment.prefix || "",
|
|
1384
|
+
suffix: comment.suffix || ""
|
|
1385
|
+
}
|
|
1386
|
+
]
|
|
1387
|
+
},
|
|
1388
|
+
body: [
|
|
1389
|
+
{
|
|
1390
|
+
type: "TextualBody",
|
|
1391
|
+
value: comment.comment,
|
|
1392
|
+
purpose: "commenting",
|
|
1393
|
+
format: "text/plain",
|
|
1394
|
+
language: "en"
|
|
1395
|
+
}
|
|
1396
|
+
]
|
|
1397
|
+
};
|
|
1398
|
+
await this.eventStore.appendEvent({
|
|
1399
|
+
type: "annotation.added",
|
|
1400
|
+
resourceId: resourceId2,
|
|
1401
|
+
userId: userId3(userId_),
|
|
1402
|
+
version: 1,
|
|
1403
|
+
payload: {
|
|
1404
|
+
annotation
|
|
1405
|
+
}
|
|
1406
|
+
});
|
|
1407
|
+
console.log(`[CommentDetectionWorker] Created comment annotation ${annotationId2} for "${comment.exact.substring(0, 50)}..."`);
|
|
1408
|
+
}
|
|
1409
|
+
};
|
|
1410
|
+
|
|
1411
|
+
// src/jobs/tag-detection-worker.ts
|
|
1412
|
+
import { JobWorker as JobWorker6 } from "@semiont/jobs";
|
|
1413
|
+
import { generateAnnotationId as generateAnnotationId5 } from "@semiont/event-sourcing";
|
|
1414
|
+
import { resourceIdToURI as resourceIdToURI5 } from "@semiont/core";
|
|
1415
|
+
import { getTagSchema } from "@semiont/ontology";
|
|
1416
|
+
import { userId as userId4 } from "@semiont/core";
|
|
1417
|
+
var TagDetectionWorker = class extends JobWorker6 {
|
|
1418
|
+
constructor(jobQueue, config, eventStore, inferenceClient) {
|
|
1419
|
+
super(jobQueue);
|
|
1420
|
+
this.config = config;
|
|
1421
|
+
this.eventStore = eventStore;
|
|
1422
|
+
this.inferenceClient = inferenceClient;
|
|
1423
|
+
}
|
|
1424
|
+
isFirstProgress = true;
|
|
1425
|
+
getWorkerName() {
|
|
1426
|
+
return "TagDetectionWorker";
|
|
1427
|
+
}
|
|
1428
|
+
canProcessJob(job) {
|
|
1429
|
+
return job.metadata.type === "tag-detection";
|
|
1430
|
+
}
|
|
1431
|
+
async executeJob(job) {
|
|
1432
|
+
if (job.metadata.type !== "tag-detection") {
|
|
1433
|
+
throw new Error(`Invalid job type: ${job.metadata.type}`);
|
|
1434
|
+
}
|
|
1435
|
+
if (job.status !== "running") {
|
|
1436
|
+
throw new Error(`Job must be in running state to execute, got: ${job.status}`);
|
|
1437
|
+
}
|
|
1438
|
+
this.isFirstProgress = true;
|
|
1439
|
+
await this.processTagDetectionJob(job);
|
|
1440
|
+
}
|
|
1441
|
+
/**
|
|
1442
|
+
* Override updateJobProgress to emit events to Event Store
|
|
1443
|
+
*/
|
|
1444
|
+
async updateJobProgress(job) {
|
|
1445
|
+
await super.updateJobProgress(job);
|
|
1446
|
+
if (job.metadata.type !== "tag-detection") return;
|
|
1447
|
+
if (job.status !== "running") {
|
|
1448
|
+
return;
|
|
1449
|
+
}
|
|
1450
|
+
const tdJob = job;
|
|
1451
|
+
const baseEvent = {
|
|
1452
|
+
resourceId: tdJob.params.resourceId,
|
|
1453
|
+
userId: tdJob.metadata.userId,
|
|
1454
|
+
version: 1
|
|
1455
|
+
};
|
|
1456
|
+
const isComplete = tdJob.progress.percentage === 100;
|
|
1457
|
+
if (this.isFirstProgress) {
|
|
1458
|
+
this.isFirstProgress = false;
|
|
1459
|
+
await this.eventStore.appendEvent({
|
|
1460
|
+
type: "job.started",
|
|
1461
|
+
...baseEvent,
|
|
1462
|
+
payload: {
|
|
1463
|
+
jobId: tdJob.metadata.id,
|
|
1464
|
+
jobType: tdJob.metadata.type
|
|
1465
|
+
}
|
|
1466
|
+
});
|
|
1467
|
+
} else if (isComplete) {
|
|
1468
|
+
await this.eventStore.appendEvent({
|
|
1469
|
+
type: "job.completed",
|
|
1470
|
+
...baseEvent,
|
|
1471
|
+
payload: {
|
|
1472
|
+
jobId: tdJob.metadata.id,
|
|
1473
|
+
jobType: tdJob.metadata.type
|
|
1474
|
+
// Note: result would come from job.result, but that's handled by base class
|
|
1475
|
+
}
|
|
1476
|
+
});
|
|
1477
|
+
} else {
|
|
1478
|
+
await this.eventStore.appendEvent({
|
|
1479
|
+
type: "job.progress",
|
|
1480
|
+
...baseEvent,
|
|
1481
|
+
payload: {
|
|
1482
|
+
jobId: tdJob.metadata.id,
|
|
1483
|
+
jobType: tdJob.metadata.type,
|
|
1484
|
+
progress: tdJob.progress
|
|
1485
|
+
}
|
|
1486
|
+
});
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
async handleJobFailure(job, error) {
|
|
1490
|
+
await super.handleJobFailure(job, error);
|
|
1491
|
+
if (job.status === "failed" && job.metadata.type === "tag-detection") {
|
|
1492
|
+
const tdJob = job;
|
|
1493
|
+
await this.eventStore.appendEvent({
|
|
1494
|
+
type: "job.failed",
|
|
1495
|
+
resourceId: tdJob.params.resourceId,
|
|
1496
|
+
userId: tdJob.metadata.userId,
|
|
1497
|
+
version: 1,
|
|
1498
|
+
payload: {
|
|
1499
|
+
jobId: tdJob.metadata.id,
|
|
1500
|
+
jobType: tdJob.metadata.type,
|
|
1501
|
+
error: "Tag detection failed. Please try again later."
|
|
1502
|
+
}
|
|
1503
|
+
});
|
|
1504
|
+
}
|
|
1505
|
+
}
|
|
1506
|
+
async processTagDetectionJob(job) {
|
|
1507
|
+
console.log(`[TagDetectionWorker] Processing tag detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
|
|
1508
|
+
const schema = getTagSchema(job.params.schemaId);
|
|
1509
|
+
if (!schema) {
|
|
1510
|
+
throw new Error(`Invalid tag schema: ${job.params.schemaId}`);
|
|
1511
|
+
}
|
|
1512
|
+
for (const category of job.params.categories) {
|
|
1513
|
+
if (!schema.tags.some((t) => t.name === category)) {
|
|
1514
|
+
throw new Error(`Invalid category "${category}" for schema ${job.params.schemaId}`);
|
|
1515
|
+
}
|
|
1516
|
+
}
|
|
1517
|
+
const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
|
|
1518
|
+
if (!resource) {
|
|
1519
|
+
throw new Error(`Resource ${job.params.resourceId} not found`);
|
|
1520
|
+
}
|
|
1521
|
+
let updatedJob = {
|
|
1522
|
+
...job,
|
|
1523
|
+
progress: {
|
|
1524
|
+
stage: "analyzing",
|
|
1525
|
+
percentage: 10,
|
|
1526
|
+
processedCategories: 0,
|
|
1527
|
+
totalCategories: job.params.categories.length,
|
|
1528
|
+
message: "Loading resource..."
|
|
1529
|
+
}
|
|
1530
|
+
};
|
|
1531
|
+
await this.updateJobProgress(updatedJob);
|
|
1532
|
+
const allTags = [];
|
|
1533
|
+
const byCategory = {};
|
|
1534
|
+
for (let i = 0; i < job.params.categories.length; i++) {
|
|
1535
|
+
const category = job.params.categories[i];
|
|
1536
|
+
updatedJob = {
|
|
1537
|
+
...updatedJob,
|
|
1538
|
+
progress: {
|
|
1539
|
+
stage: "analyzing",
|
|
1540
|
+
percentage: 10 + Math.floor(i / job.params.categories.length * 50),
|
|
1541
|
+
currentCategory: category,
|
|
1542
|
+
processedCategories: i + 1,
|
|
1543
|
+
totalCategories: job.params.categories.length,
|
|
1544
|
+
message: `Analyzing ${category}...`
|
|
1545
|
+
}
|
|
1546
|
+
};
|
|
1547
|
+
await this.updateJobProgress(updatedJob);
|
|
1548
|
+
const tags = await AnnotationDetection.detectTags(
|
|
1549
|
+
job.params.resourceId,
|
|
1550
|
+
this.config,
|
|
1551
|
+
this.inferenceClient,
|
|
1552
|
+
job.params.schemaId,
|
|
1553
|
+
category
|
|
1554
|
+
);
|
|
1555
|
+
console.log(`[TagDetectionWorker] Found ${tags.length} tags for category "${category}"`);
|
|
1556
|
+
allTags.push(...tags);
|
|
1557
|
+
byCategory[category] = tags.length;
|
|
1558
|
+
}
|
|
1559
|
+
updatedJob = {
|
|
1560
|
+
...updatedJob,
|
|
1561
|
+
progress: {
|
|
1562
|
+
stage: "creating",
|
|
1563
|
+
percentage: 60,
|
|
1564
|
+
processedCategories: job.params.categories.length,
|
|
1565
|
+
totalCategories: job.params.categories.length,
|
|
1566
|
+
message: `Creating ${allTags.length} tag annotations...`
|
|
1567
|
+
}
|
|
1568
|
+
};
|
|
1569
|
+
await this.updateJobProgress(updatedJob);
|
|
1570
|
+
let created = 0;
|
|
1571
|
+
for (const tag of allTags) {
|
|
1572
|
+
try {
|
|
1573
|
+
await this.createTagAnnotation(job.params.resourceId, job.metadata.userId, job.params.schemaId, tag);
|
|
1574
|
+
created++;
|
|
1575
|
+
} catch (error) {
|
|
1576
|
+
console.error(`[TagDetectionWorker] Failed to create tag:`, error);
|
|
1577
|
+
}
|
|
1578
|
+
}
|
|
1579
|
+
updatedJob = {
|
|
1580
|
+
...updatedJob,
|
|
1581
|
+
progress: {
|
|
1582
|
+
stage: "creating",
|
|
1583
|
+
percentage: 100,
|
|
1584
|
+
processedCategories: job.params.categories.length,
|
|
1585
|
+
totalCategories: job.params.categories.length,
|
|
1586
|
+
message: `Complete! Created ${created} tags`
|
|
1587
|
+
}
|
|
1588
|
+
};
|
|
1589
|
+
await this.updateJobProgress(updatedJob);
|
|
1590
|
+
console.log(`[TagDetectionWorker] \u2705 Created ${created}/${allTags.length} tags across ${job.params.categories.length} categories`);
|
|
1591
|
+
}
|
|
1592
|
+
async createTagAnnotation(resourceId2, userId_, schemaId, tag) {
|
|
1593
|
+
const backendUrl = this.config.services.backend?.publicURL;
|
|
1594
|
+
if (!backendUrl) {
|
|
1595
|
+
throw new Error("Backend publicURL not configured");
|
|
1596
|
+
}
|
|
1597
|
+
const resourceUri3 = resourceIdToURI5(resourceId2, backendUrl);
|
|
1598
|
+
const annotationId2 = generateAnnotationId5(backendUrl);
|
|
1599
|
+
const annotation = {
|
|
1600
|
+
"@context": "http://www.w3.org/ns/anno.jsonld",
|
|
1601
|
+
type: "Annotation",
|
|
1602
|
+
id: annotationId2,
|
|
1603
|
+
motivation: "tagging",
|
|
1604
|
+
target: {
|
|
1605
|
+
type: "SpecificResource",
|
|
1606
|
+
source: resourceUri3,
|
|
1607
|
+
selector: [
|
|
1608
|
+
{
|
|
1609
|
+
type: "TextPositionSelector",
|
|
1610
|
+
start: tag.start,
|
|
1611
|
+
end: tag.end
|
|
1612
|
+
},
|
|
1613
|
+
{
|
|
1614
|
+
type: "TextQuoteSelector",
|
|
1615
|
+
exact: tag.exact,
|
|
1616
|
+
prefix: tag.prefix || "",
|
|
1617
|
+
suffix: tag.suffix || ""
|
|
1618
|
+
}
|
|
1619
|
+
]
|
|
1620
|
+
},
|
|
1621
|
+
body: [
|
|
1622
|
+
{
|
|
1623
|
+
type: "TextualBody",
|
|
1624
|
+
value: tag.category,
|
|
1625
|
+
purpose: "tagging",
|
|
1626
|
+
format: "text/plain",
|
|
1627
|
+
language: "en"
|
|
1628
|
+
},
|
|
1629
|
+
{
|
|
1630
|
+
type: "TextualBody",
|
|
1631
|
+
value: schemaId,
|
|
1632
|
+
purpose: "classifying",
|
|
1633
|
+
format: "text/plain"
|
|
1634
|
+
}
|
|
1635
|
+
]
|
|
1636
|
+
};
|
|
1637
|
+
await this.eventStore.appendEvent({
|
|
1638
|
+
type: "annotation.added",
|
|
1639
|
+
resourceId: resourceId2,
|
|
1640
|
+
userId: userId4(userId_),
|
|
1641
|
+
version: 1,
|
|
1642
|
+
payload: {
|
|
1643
|
+
annotation
|
|
1644
|
+
}
|
|
1645
|
+
});
|
|
1646
|
+
console.log(`[TagDetectionWorker] Created tag annotation ${annotationId2} for "${tag.category}": "${tag.exact.substring(0, 50)}..."`);
|
|
1647
|
+
}
|
|
1648
|
+
};
|
|
1649
|
+
|
|
1650
|
+
// src/graph/consumer.ts
|
|
1651
|
+
import { EventQuery } from "@semiont/event-sourcing";
|
|
1652
|
+
import { didToAgent } from "@semiont/core";
|
|
1653
|
+
import { resourceId as makeResourceId, findBodyItem } from "@semiont/core";
|
|
1654
|
+
import { toResourceUri, toAnnotationUri } from "@semiont/event-sourcing";
|
|
1655
|
+
import { resourceUri as resourceUri2 } from "@semiont/api-client";
|
|
1656
|
+
var GraphDBConsumer = class {
|
|
1657
|
+
constructor(config, eventStore, graphDb) {
|
|
1658
|
+
this.config = config;
|
|
1659
|
+
this.eventStore = eventStore;
|
|
1660
|
+
this.graphDb = graphDb;
|
|
1661
|
+
}
|
|
1662
|
+
subscriptions = /* @__PURE__ */ new Map();
|
|
1663
|
+
_globalSubscription = null;
|
|
1664
|
+
// Subscription to system-level events (kept for cleanup)
|
|
1665
|
+
processing = /* @__PURE__ */ new Map();
|
|
1666
|
+
lastProcessed = /* @__PURE__ */ new Map();
|
|
1667
|
+
async initialize() {
|
|
1668
|
+
console.log("[GraphDBConsumer] Initialized");
|
|
1669
|
+
await this.subscribeToGlobalEvents();
|
|
1670
|
+
}
|
|
1671
|
+
/**
|
|
1672
|
+
* Subscribe to global system-level events (no resourceId)
|
|
1673
|
+
* This allows the consumer to react to events like entitytype.added
|
|
1674
|
+
*/
|
|
1675
|
+
async subscribeToGlobalEvents() {
|
|
1676
|
+
this._globalSubscription = this.eventStore.bus.subscriptions.subscribeGlobal(async (storedEvent) => {
|
|
1677
|
+
console.log(`[GraphDBConsumer] Received global event: ${storedEvent.event.type}`);
|
|
1678
|
+
await this.processEvent(storedEvent);
|
|
1679
|
+
});
|
|
1680
|
+
console.log("[GraphDBConsumer] Subscribed to global system events");
|
|
1681
|
+
}
|
|
1682
|
+
ensureInitialized() {
|
|
1683
|
+
return this.graphDb;
|
|
1684
|
+
}
|
|
1685
|
+
/**
|
|
1686
|
+
* Subscribe to events for a resource
|
|
1687
|
+
* Apply each event to GraphDB
|
|
1688
|
+
*/
|
|
1689
|
+
async subscribeToResource(resourceId2) {
|
|
1690
|
+
this.ensureInitialized();
|
|
1691
|
+
const publicURL = this.config.services.backend.publicURL;
|
|
1692
|
+
const rUri = resourceUri2(`${publicURL}/resources/${resourceId2}`);
|
|
1693
|
+
const subscription = this.eventStore.bus.subscriptions.subscribe(rUri, async (storedEvent) => {
|
|
1694
|
+
await this.processEvent(storedEvent);
|
|
1695
|
+
});
|
|
1696
|
+
this.subscriptions.set(resourceId2, subscription);
|
|
1697
|
+
console.log(`[GraphDBConsumer] Subscribed to ${resourceId2}`);
|
|
1698
|
+
}
|
|
1699
|
+
/**
|
|
1700
|
+
* Stop the consumer and unsubscribe from all events
|
|
1701
|
+
*/
|
|
1702
|
+
async stop() {
|
|
1703
|
+
console.log("[GraphDBConsumer] Stopping...");
|
|
1704
|
+
for (const subscription of this.subscriptions.values()) {
|
|
1705
|
+
if (subscription && typeof subscription.unsubscribe === "function") {
|
|
1706
|
+
subscription.unsubscribe();
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
this.subscriptions.clear();
|
|
1710
|
+
if (this._globalSubscription && typeof this._globalSubscription.unsubscribe === "function") {
|
|
1711
|
+
this._globalSubscription.unsubscribe();
|
|
1712
|
+
}
|
|
1713
|
+
this._globalSubscription = null;
|
|
1714
|
+
console.log("[GraphDBConsumer] Stopped");
|
|
1715
|
+
}
|
|
1716
|
+
/**
|
|
1717
|
+
* Process event with ordering guarantee (sequential per resource)
|
|
1718
|
+
*/
|
|
1719
|
+
async processEvent(storedEvent) {
|
|
1720
|
+
const { resourceId: resourceId2 } = storedEvent.event;
|
|
1721
|
+
if (!resourceId2) {
|
|
1722
|
+
await this.applyEventToGraph(storedEvent);
|
|
1723
|
+
return;
|
|
1724
|
+
}
|
|
1725
|
+
const previousProcessing = this.processing.get(resourceId2);
|
|
1726
|
+
if (previousProcessing) {
|
|
1727
|
+
await previousProcessing;
|
|
1728
|
+
}
|
|
1729
|
+
const processingPromise = this.applyEventToGraph(storedEvent);
|
|
1730
|
+
this.processing.set(resourceId2, processingPromise);
|
|
1731
|
+
try {
|
|
1732
|
+
await processingPromise;
|
|
1733
|
+
this.lastProcessed.set(resourceId2, storedEvent.metadata.sequenceNumber);
|
|
1734
|
+
} catch (error) {
|
|
1735
|
+
console.error(`[GraphDBConsumer] Failed to process event:`, error);
|
|
1736
|
+
throw error;
|
|
1737
|
+
} finally {
|
|
1738
|
+
this.processing.delete(resourceId2);
|
|
1739
|
+
}
|
|
1740
|
+
}
|
|
1741
|
+
/**
|
|
1742
|
+
* Apply event to GraphDB
|
|
1743
|
+
*/
|
|
1744
|
+
async applyEventToGraph(storedEvent) {
|
|
1745
|
+
const graphDb = this.ensureInitialized();
|
|
1746
|
+
const event = storedEvent.event;
|
|
1747
|
+
console.log(`[GraphDBConsumer] Applying ${event.type} to GraphDB (seq=${storedEvent.metadata.sequenceNumber})`);
|
|
1748
|
+
switch (event.type) {
|
|
1749
|
+
case "resource.created": {
|
|
1750
|
+
if (!event.resourceId) throw new Error("resource.created requires resourceId");
|
|
1751
|
+
const resourceUri3 = toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId);
|
|
1752
|
+
const resource = {
|
|
1753
|
+
"@context": "https://schema.org/",
|
|
1754
|
+
"@id": resourceUri3,
|
|
1755
|
+
name: event.payload.name,
|
|
1756
|
+
entityTypes: event.payload.entityTypes || [],
|
|
1757
|
+
representations: [{
|
|
1758
|
+
mediaType: event.payload.format,
|
|
1759
|
+
checksum: event.payload.contentChecksum,
|
|
1760
|
+
rel: "original"
|
|
1761
|
+
}],
|
|
1762
|
+
archived: false,
|
|
1763
|
+
dateCreated: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1764
|
+
wasAttributedTo: didToAgent(event.userId),
|
|
1765
|
+
creationMethod: "api"
|
|
1766
|
+
};
|
|
1767
|
+
console.log(`[GraphDBConsumer] Creating resource in graph: ${resourceUri3}`);
|
|
1768
|
+
await graphDb.createResource(resource);
|
|
1769
|
+
console.log(`[GraphDBConsumer] \u2705 Resource created in graph: ${resourceUri3}`);
|
|
1770
|
+
break;
|
|
1771
|
+
}
|
|
1772
|
+
case "resource.cloned": {
|
|
1773
|
+
if (!event.resourceId) throw new Error("resource.cloned requires resourceId");
|
|
1774
|
+
const resourceUri3 = toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId);
|
|
1775
|
+
const resource = {
|
|
1776
|
+
"@context": "https://schema.org/",
|
|
1777
|
+
"@id": resourceUri3,
|
|
1778
|
+
name: event.payload.name,
|
|
1779
|
+
entityTypes: event.payload.entityTypes || [],
|
|
1780
|
+
representations: [{
|
|
1781
|
+
mediaType: event.payload.format,
|
|
1782
|
+
checksum: event.payload.contentChecksum,
|
|
1783
|
+
rel: "original"
|
|
1784
|
+
}],
|
|
1785
|
+
archived: false,
|
|
1786
|
+
dateCreated: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1787
|
+
wasAttributedTo: didToAgent(event.userId),
|
|
1788
|
+
creationMethod: "clone"
|
|
1789
|
+
};
|
|
1790
|
+
console.log(`[GraphDBConsumer] Creating cloned resource in graph: ${resourceUri3}`);
|
|
1791
|
+
await graphDb.createResource(resource);
|
|
1792
|
+
console.log(`[GraphDBConsumer] \u2705 Cloned resource created in graph: ${resourceUri3}`);
|
|
1793
|
+
break;
|
|
1794
|
+
}
|
|
1795
|
+
case "resource.archived":
|
|
1796
|
+
if (!event.resourceId) throw new Error("resource.archived requires resourceId");
|
|
1797
|
+
await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
|
|
1798
|
+
archived: true
|
|
1799
|
+
});
|
|
1800
|
+
break;
|
|
1801
|
+
case "resource.unarchived":
|
|
1802
|
+
if (!event.resourceId) throw new Error("resource.unarchived requires resourceId");
|
|
1803
|
+
await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
|
|
1804
|
+
archived: false
|
|
1805
|
+
});
|
|
1806
|
+
break;
|
|
1807
|
+
case "annotation.added":
|
|
1808
|
+
console.log(`[GraphDBConsumer] \u{1F50D} ENTERED annotation.added case block`);
|
|
1809
|
+
console.log(`[GraphDBConsumer] Annotation ID: ${event.payload.annotation.id}`);
|
|
1810
|
+
await graphDb.createAnnotation({
|
|
1811
|
+
...event.payload.annotation,
|
|
1812
|
+
creator: didToAgent(event.userId)
|
|
1813
|
+
});
|
|
1814
|
+
console.log(`[GraphDBConsumer] \u2705 Annotation created in graph: ${event.payload.annotation.id}`);
|
|
1815
|
+
break;
|
|
1816
|
+
case "annotation.removed":
|
|
1817
|
+
await graphDb.deleteAnnotation(toAnnotationUri({ baseUrl: this.config.services.backend.publicURL }, event.payload.annotationId));
|
|
1818
|
+
break;
|
|
1819
|
+
case "annotation.body.updated":
|
|
1820
|
+
console.log(`[GraphDBConsumer] \u{1F50D} ENTERED annotation.body.updated case block`);
|
|
1821
|
+
console.log(`[GraphDBConsumer] Event payload:`, JSON.stringify(event.payload));
|
|
1822
|
+
try {
|
|
1823
|
+
console.log(`[GraphDBConsumer] Creating annotation URI for: ${event.payload.annotationId}`);
|
|
1824
|
+
const annotationUri2 = toAnnotationUri({ baseUrl: this.config.services.backend.publicURL }, event.payload.annotationId);
|
|
1825
|
+
console.log(`[GraphDBConsumer] \u2705 Annotation URI created: ${annotationUri2}`);
|
|
1826
|
+
console.log(`[GraphDBConsumer] Processing annotation.body.updated for ${annotationUri2}`);
|
|
1827
|
+
console.log(`[GraphDBConsumer] Operations:`, JSON.stringify(event.payload.operations));
|
|
1828
|
+
const currentAnnotation = await graphDb.getAnnotation(annotationUri2);
|
|
1829
|
+
console.log(`[GraphDBConsumer] Current annotation in graph:`, currentAnnotation ? "FOUND" : "NOT FOUND");
|
|
1830
|
+
if (currentAnnotation) {
|
|
1831
|
+
console.log(`[GraphDBConsumer] Current body:`, JSON.stringify(currentAnnotation.body));
|
|
1832
|
+
let bodyArray = Array.isArray(currentAnnotation.body) ? [...currentAnnotation.body] : currentAnnotation.body ? [currentAnnotation.body] : [];
|
|
1833
|
+
for (const op of event.payload.operations) {
|
|
1834
|
+
console.log(`[GraphDBConsumer] Applying operation:`, JSON.stringify(op));
|
|
1835
|
+
if (op.op === "add") {
|
|
1836
|
+
const exists = findBodyItem(bodyArray, op.item) !== -1;
|
|
1837
|
+
if (!exists) {
|
|
1838
|
+
bodyArray.push(op.item);
|
|
1839
|
+
console.log(`[GraphDBConsumer] Added item to body`);
|
|
1840
|
+
} else {
|
|
1841
|
+
console.log(`[GraphDBConsumer] Item already exists, skipping`);
|
|
1842
|
+
}
|
|
1843
|
+
} else if (op.op === "remove") {
|
|
1844
|
+
const index = findBodyItem(bodyArray, op.item);
|
|
1845
|
+
if (index !== -1) {
|
|
1846
|
+
bodyArray.splice(index, 1);
|
|
1847
|
+
console.log(`[GraphDBConsumer] Removed item from body`);
|
|
1848
|
+
}
|
|
1849
|
+
} else if (op.op === "replace") {
|
|
1850
|
+
const index = findBodyItem(bodyArray, op.oldItem);
|
|
1851
|
+
if (index !== -1) {
|
|
1852
|
+
bodyArray[index] = op.newItem;
|
|
1853
|
+
console.log(`[GraphDBConsumer] Replaced item in body`);
|
|
1854
|
+
}
|
|
1855
|
+
}
|
|
1856
|
+
}
|
|
1857
|
+
console.log(`[GraphDBConsumer] New body array:`, JSON.stringify(bodyArray));
|
|
1858
|
+
console.log(`[GraphDBConsumer] Calling updateAnnotation...`);
|
|
1859
|
+
await graphDb.updateAnnotation(annotationUri2, {
|
|
1860
|
+
body: bodyArray
|
|
1861
|
+
});
|
|
1862
|
+
console.log(`[GraphDBConsumer] \u2705 updateAnnotation completed successfully`);
|
|
1863
|
+
} else {
|
|
1864
|
+
console.log(`[GraphDBConsumer] \u26A0\uFE0F Annotation not found in graph, skipping update`);
|
|
1865
|
+
}
|
|
1866
|
+
} catch (error) {
|
|
1867
|
+
console.error(`[GraphDBConsumer] \u274C ERROR in annotation.body.updated handler`);
|
|
1868
|
+
console.error(`[GraphDBConsumer] Annotation ID: ${event.payload.annotationId}`);
|
|
1869
|
+
console.error(`[GraphDBConsumer] Error:`, error);
|
|
1870
|
+
console.error(`[GraphDBConsumer] Error stack:`, error instanceof Error ? error.stack : "N/A");
|
|
1871
|
+
}
|
|
1872
|
+
break;
|
|
1873
|
+
case "entitytag.added":
|
|
1874
|
+
if (!event.resourceId) throw new Error("entitytag.added requires resourceId");
|
|
1875
|
+
const doc = await graphDb.getResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId));
|
|
1876
|
+
if (doc) {
|
|
1877
|
+
await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
|
|
1878
|
+
entityTypes: [...doc.entityTypes || [], event.payload.entityType]
|
|
1879
|
+
});
|
|
1880
|
+
}
|
|
1881
|
+
break;
|
|
1882
|
+
case "entitytag.removed":
|
|
1883
|
+
if (!event.resourceId) throw new Error("entitytag.removed requires resourceId");
|
|
1884
|
+
const doc2 = await graphDb.getResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId));
|
|
1885
|
+
if (doc2) {
|
|
1886
|
+
await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
|
|
1887
|
+
entityTypes: (doc2.entityTypes || []).filter((t) => t !== event.payload.entityType)
|
|
1888
|
+
});
|
|
1889
|
+
}
|
|
1890
|
+
break;
|
|
1891
|
+
case "entitytype.added":
|
|
1892
|
+
await graphDb.addEntityType(event.payload.entityType);
|
|
1893
|
+
break;
|
|
1894
|
+
default:
|
|
1895
|
+
console.warn(`[GraphDBConsumer] Unknown event type: ${event.type}`);
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
/**
|
|
1899
|
+
* Rebuild entire resource from events
|
|
1900
|
+
* Useful for recovery or initial sync
|
|
1901
|
+
*/
|
|
1902
|
+
async rebuildResource(resourceId2) {
|
|
1903
|
+
const graphDb = this.ensureInitialized();
|
|
1904
|
+
console.log(`[GraphDBConsumer] Rebuilding resource ${resourceId2} from events`);
|
|
1905
|
+
try {
|
|
1906
|
+
await graphDb.deleteResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, makeResourceId(resourceId2)));
|
|
1907
|
+
} catch (error) {
|
|
1908
|
+
console.log(`[GraphDBConsumer] No existing resource to delete: ${resourceId2}`);
|
|
1909
|
+
}
|
|
1910
|
+
const query = new EventQuery(this.eventStore.log.storage);
|
|
1911
|
+
const events = await query.getResourceEvents(resourceId2);
|
|
1912
|
+
for (const storedEvent of events) {
|
|
1913
|
+
await this.applyEventToGraph(storedEvent);
|
|
1914
|
+
}
|
|
1915
|
+
console.log(`[GraphDBConsumer] Rebuilt ${resourceId2} from ${events.length} events`);
|
|
1916
|
+
}
|
|
1917
|
+
/**
|
|
1918
|
+
* Rebuild entire GraphDB from all events
|
|
1919
|
+
* Uses two-pass approach to ensure all resources exist before creating REFERENCES edges
|
|
1920
|
+
*/
|
|
1921
|
+
async rebuildAll() {
|
|
1922
|
+
const graphDb = this.ensureInitialized();
|
|
1923
|
+
console.log("[GraphDBConsumer] Rebuilding entire GraphDB from events...");
|
|
1924
|
+
console.log("[GraphDBConsumer] Using two-pass approach: nodes first, then edges\n");
|
|
1925
|
+
await graphDb.clearDatabase();
|
|
1926
|
+
const query = new EventQuery(this.eventStore.log.storage);
|
|
1927
|
+
const allResourceIds = await this.eventStore.log.getAllResourceIds();
|
|
1928
|
+
console.log(`[GraphDBConsumer] Found ${allResourceIds.length} resources to rebuild`);
|
|
1929
|
+
console.log("\n[GraphDBConsumer] === PASS 1: Creating all nodes (resources + annotations) ===");
|
|
1930
|
+
for (const resourceId2 of allResourceIds) {
|
|
1931
|
+
const events = await query.getResourceEvents(makeResourceId(resourceId2));
|
|
1932
|
+
for (const storedEvent of events) {
|
|
1933
|
+
if (storedEvent.event.type === "annotation.body.updated") {
|
|
1934
|
+
continue;
|
|
1935
|
+
}
|
|
1936
|
+
await this.applyEventToGraph(storedEvent);
|
|
1937
|
+
}
|
|
1938
|
+
}
|
|
1939
|
+
console.log("[GraphDBConsumer] \u2705 Pass 1 complete - all nodes created\n");
|
|
1940
|
+
console.log("[GraphDBConsumer] === PASS 2: Creating all REFERENCES edges ===");
|
|
1941
|
+
for (const resourceId2 of allResourceIds) {
|
|
1942
|
+
const events = await query.getResourceEvents(makeResourceId(resourceId2));
|
|
1943
|
+
for (const storedEvent of events) {
|
|
1944
|
+
if (storedEvent.event.type === "annotation.body.updated") {
|
|
1945
|
+
await this.applyEventToGraph(storedEvent);
|
|
1946
|
+
}
|
|
1947
|
+
}
|
|
1948
|
+
}
|
|
1949
|
+
console.log("[GraphDBConsumer] \u2705 Pass 2 complete - all edges created\n");
|
|
1950
|
+
console.log("[GraphDBConsumer] Rebuild complete");
|
|
1951
|
+
}
|
|
1952
|
+
/**
|
|
1953
|
+
* Get consumer health metrics
|
|
1954
|
+
*/
|
|
1955
|
+
getHealthMetrics() {
|
|
1956
|
+
return {
|
|
1957
|
+
subscriptions: this.subscriptions.size,
|
|
1958
|
+
lastProcessed: Object.fromEntries(this.lastProcessed),
|
|
1959
|
+
processing: Array.from(this.processing.keys())
|
|
1960
|
+
};
|
|
1961
|
+
}
|
|
1962
|
+
/**
|
|
1963
|
+
* Unsubscribe from resource
|
|
1964
|
+
*/
|
|
1965
|
+
async unsubscribeFromResource(resourceId2) {
|
|
1966
|
+
const subscription = this.subscriptions.get(resourceId2);
|
|
1967
|
+
if (subscription) {
|
|
1968
|
+
subscription.unsubscribe();
|
|
1969
|
+
this.subscriptions.delete(resourceId2);
|
|
1970
|
+
console.log(`[GraphDBConsumer] Unsubscribed from ${resourceId2}`);
|
|
1971
|
+
}
|
|
1972
|
+
}
|
|
1973
|
+
/**
|
|
1974
|
+
* Unsubscribe from all resources
|
|
1975
|
+
*/
|
|
1976
|
+
async unsubscribeAll() {
|
|
1977
|
+
for (const [_resourceId, subscription] of this.subscriptions) {
|
|
1978
|
+
subscription.unsubscribe();
|
|
1979
|
+
}
|
|
1980
|
+
this.subscriptions.clear();
|
|
1981
|
+
console.log("[GraphDBConsumer] Unsubscribed from all resources");
|
|
1982
|
+
}
|
|
1983
|
+
/**
|
|
1984
|
+
* Shutdown consumer
|
|
1985
|
+
*/
|
|
1986
|
+
async shutdown() {
|
|
1987
|
+
await this.unsubscribeAll();
|
|
1988
|
+
if (this._globalSubscription) {
|
|
1989
|
+
this._globalSubscription.unsubscribe();
|
|
1990
|
+
this._globalSubscription = null;
|
|
1991
|
+
console.log("[GraphDBConsumer] Unsubscribed from global events");
|
|
1992
|
+
}
|
|
1993
|
+
console.log("[GraphDBConsumer] Shut down");
|
|
1994
|
+
}
|
|
1995
|
+
};
|
|
1996
|
+
|
|
1997
|
+
// src/service.ts
|
|
1998
|
+
async function startMakeMeaning(config) {
|
|
1999
|
+
console.log("\u{1F9E0} Starting Make-Meaning service...");
|
|
2000
|
+
const configuredPath = config.services?.filesystem?.path;
|
|
2001
|
+
if (!configuredPath) {
|
|
2002
|
+
throw new Error("services.filesystem.path is required for make-meaning service");
|
|
2003
|
+
}
|
|
2004
|
+
const baseUrl = config.services?.backend?.publicURL;
|
|
2005
|
+
if (!baseUrl) {
|
|
2006
|
+
throw new Error("services.backend.publicURL is required for make-meaning service");
|
|
2007
|
+
}
|
|
2008
|
+
const projectRoot = config._metadata?.projectRoot;
|
|
2009
|
+
let basePath;
|
|
2010
|
+
if (path.isAbsolute(configuredPath)) {
|
|
2011
|
+
basePath = configuredPath;
|
|
2012
|
+
} else if (projectRoot) {
|
|
2013
|
+
basePath = path.resolve(projectRoot, configuredPath);
|
|
2014
|
+
} else {
|
|
2015
|
+
basePath = path.resolve(configuredPath);
|
|
2016
|
+
}
|
|
2017
|
+
console.log("\u{1F4BC} Initializing job queue...");
|
|
2018
|
+
const jobQueue = new JobQueue({ dataDir: basePath });
|
|
2019
|
+
await jobQueue.initialize();
|
|
2020
|
+
console.log("\u2705 Job queue initialized");
|
|
2021
|
+
console.log("\u{1F4CA} Creating event store connection...");
|
|
2022
|
+
const eventStore = createEventStoreCore(basePath, baseUrl);
|
|
2023
|
+
console.log("\u{1F4E6} Creating representation store...");
|
|
2024
|
+
const repStore = new FilesystemRepresentationStore3({ basePath }, projectRoot);
|
|
2025
|
+
console.log("\u2705 Representation store created");
|
|
2026
|
+
console.log("\u{1F916} Creating inference client...");
|
|
2027
|
+
const inferenceClient = await getInferenceClient(config);
|
|
2028
|
+
console.log("\u2705 Inference client created");
|
|
2029
|
+
console.log("\u{1F4CA} Connecting to graph database...");
|
|
2030
|
+
const graphDb = await getGraphDatabase(config);
|
|
2031
|
+
console.log("\u2705 Graph database connected");
|
|
2032
|
+
console.log("\u{1F504} Starting graph consumer...");
|
|
2033
|
+
const graphConsumer = new GraphDBConsumer(config, eventStore, graphDb);
|
|
2034
|
+
await graphConsumer.initialize();
|
|
2035
|
+
const allResourceIds = await eventStore.log.getAllResourceIds();
|
|
2036
|
+
console.log(`[GraphDBConsumer] Subscribing to ${allResourceIds.length} resources`);
|
|
2037
|
+
for (const resourceId2 of allResourceIds) {
|
|
2038
|
+
await graphConsumer.subscribeToResource(makeResourceId2(resourceId2));
|
|
2039
|
+
}
|
|
2040
|
+
console.log("\u2705 Graph consumer started");
|
|
2041
|
+
console.log("\u{1F477} Creating workers...");
|
|
2042
|
+
const workers = {
|
|
2043
|
+
detection: new ReferenceDetectionWorker(jobQueue, config, eventStore, inferenceClient),
|
|
2044
|
+
generation: new GenerationWorker(jobQueue, config, eventStore, inferenceClient),
|
|
2045
|
+
highlight: new HighlightDetectionWorker(jobQueue, config, eventStore, inferenceClient),
|
|
2046
|
+
assessment: new AssessmentDetectionWorker(jobQueue, config, eventStore, inferenceClient),
|
|
2047
|
+
comment: new CommentDetectionWorker(jobQueue, config, eventStore, inferenceClient),
|
|
2048
|
+
tag: new TagDetectionWorker(jobQueue, config, eventStore, inferenceClient)
|
|
2049
|
+
};
|
|
2050
|
+
console.log("\u{1F680} Starting workers...");
|
|
2051
|
+
workers.detection.start().catch((error) => {
|
|
2052
|
+
console.error("\u26A0\uFE0F Detection worker stopped:", error);
|
|
2053
|
+
});
|
|
2054
|
+
workers.generation.start().catch((error) => {
|
|
2055
|
+
console.error("\u26A0\uFE0F Generation worker stopped:", error);
|
|
2056
|
+
});
|
|
2057
|
+
workers.highlight.start().catch((error) => {
|
|
2058
|
+
console.error("\u26A0\uFE0F Highlight worker stopped:", error);
|
|
2059
|
+
});
|
|
2060
|
+
workers.assessment.start().catch((error) => {
|
|
2061
|
+
console.error("\u26A0\uFE0F Assessment worker stopped:", error);
|
|
2062
|
+
});
|
|
2063
|
+
workers.comment.start().catch((error) => {
|
|
2064
|
+
console.error("\u26A0\uFE0F Comment worker stopped:", error);
|
|
2065
|
+
});
|
|
2066
|
+
workers.tag.start().catch((error) => {
|
|
2067
|
+
console.error("\u26A0\uFE0F Tag worker stopped:", error);
|
|
2068
|
+
});
|
|
2069
|
+
console.log("\u2705 All workers started");
|
|
2070
|
+
console.log("\u2705 Make-Meaning service started");
|
|
2071
|
+
return {
|
|
2072
|
+
jobQueue,
|
|
2073
|
+
eventStore,
|
|
2074
|
+
repStore,
|
|
2075
|
+
inferenceClient,
|
|
2076
|
+
graphDb,
|
|
2077
|
+
workers,
|
|
2078
|
+
graphConsumer,
|
|
2079
|
+
stop: async () => {
|
|
2080
|
+
console.log("\u23F9\uFE0F Stopping Make-Meaning service...");
|
|
2081
|
+
await Promise.all([
|
|
2082
|
+
workers.detection.stop(),
|
|
2083
|
+
workers.generation.stop(),
|
|
2084
|
+
workers.highlight.stop(),
|
|
2085
|
+
workers.assessment.stop(),
|
|
2086
|
+
workers.comment.stop(),
|
|
2087
|
+
workers.tag.stop()
|
|
2088
|
+
]);
|
|
2089
|
+
await graphConsumer.stop();
|
|
2090
|
+
await graphDb.disconnect();
|
|
2091
|
+
console.log("\u2705 Make-Meaning service stopped");
|
|
2092
|
+
}
|
|
2093
|
+
};
|
|
2094
|
+
}
|
|
2095
|
+
|
|
1
2096
|
// src/resource-context.ts
|
|
2
2097
|
import { FilesystemViewStorage } from "@semiont/event-sourcing";
|
|
3
|
-
import { FilesystemRepresentationStore } from "@semiont/content";
|
|
4
|
-
import { getPrimaryRepresentation, decodeRepresentation } from "@semiont/api-client";
|
|
2098
|
+
import { FilesystemRepresentationStore as FilesystemRepresentationStore4 } from "@semiont/content";
|
|
2099
|
+
import { getPrimaryRepresentation as getPrimaryRepresentation2, decodeRepresentation as decodeRepresentation2 } from "@semiont/api-client";
|
|
5
2100
|
var ResourceContext = class {
|
|
6
2101
|
/**
|
|
7
2102
|
* Get resource metadata from view storage
|
|
8
2103
|
*/
|
|
9
|
-
static async getResourceMetadata(
|
|
2104
|
+
static async getResourceMetadata(resourceId2, config) {
|
|
10
2105
|
const basePath = config.services.filesystem.path;
|
|
11
2106
|
const projectRoot = config._metadata?.projectRoot;
|
|
12
2107
|
const viewStorage = new FilesystemViewStorage(basePath, projectRoot);
|
|
13
|
-
const view = await viewStorage.get(
|
|
2108
|
+
const view = await viewStorage.get(resourceId2);
|
|
14
2109
|
if (!view) {
|
|
15
2110
|
return null;
|
|
16
2111
|
}
|
|
@@ -52,14 +2147,14 @@ var ResourceContext = class {
|
|
|
52
2147
|
static async addContentPreviews(resources, config) {
|
|
53
2148
|
const basePath = config.services.filesystem.path;
|
|
54
2149
|
const projectRoot = config._metadata?.projectRoot;
|
|
55
|
-
const repStore = new
|
|
2150
|
+
const repStore = new FilesystemRepresentationStore4({ basePath }, projectRoot);
|
|
56
2151
|
return await Promise.all(
|
|
57
2152
|
resources.map(async (doc) => {
|
|
58
2153
|
try {
|
|
59
|
-
const primaryRep =
|
|
2154
|
+
const primaryRep = getPrimaryRepresentation2(doc);
|
|
60
2155
|
if (primaryRep?.checksum && primaryRep?.mediaType) {
|
|
61
2156
|
const contentBuffer = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
|
|
62
|
-
const contentPreview =
|
|
2157
|
+
const contentPreview = decodeRepresentation2(contentBuffer, primaryRep.mediaType).slice(0, 200);
|
|
63
2158
|
return { ...doc, content: contentPreview };
|
|
64
2159
|
}
|
|
65
2160
|
return { ...doc, content: "" };
|
|
@@ -72,20 +2167,20 @@ var ResourceContext = class {
|
|
|
72
2167
|
};
|
|
73
2168
|
|
|
74
2169
|
// src/annotation-context.ts
|
|
75
|
-
import {
|
|
2170
|
+
import { getInferenceClient as getInferenceClient2 } from "@semiont/inference";
|
|
76
2171
|
import {
|
|
77
2172
|
getBodySource,
|
|
78
2173
|
getTargetSource,
|
|
79
|
-
getTargetSelector,
|
|
2174
|
+
getTargetSelector as getTargetSelector2,
|
|
80
2175
|
getResourceEntityTypes,
|
|
81
2176
|
getTextPositionSelector,
|
|
82
|
-
getPrimaryRepresentation as
|
|
83
|
-
decodeRepresentation as
|
|
2177
|
+
getPrimaryRepresentation as getPrimaryRepresentation3,
|
|
2178
|
+
decodeRepresentation as decodeRepresentation3
|
|
84
2179
|
} from "@semiont/api-client";
|
|
85
|
-
import { FilesystemRepresentationStore as
|
|
2180
|
+
import { FilesystemRepresentationStore as FilesystemRepresentationStore5 } from "@semiont/content";
|
|
86
2181
|
import { FilesystemViewStorage as FilesystemViewStorage2 } from "@semiont/event-sourcing";
|
|
87
2182
|
import { resourceId as createResourceId, uriToResourceId } from "@semiont/core";
|
|
88
|
-
import { getEntityTypes } from "@semiont/ontology";
|
|
2183
|
+
import { getEntityTypes as getEntityTypes2 } from "@semiont/ontology";
|
|
89
2184
|
var AnnotationContext = class {
|
|
90
2185
|
/**
|
|
91
2186
|
* Build LLM context for an annotation
|
|
@@ -97,7 +2192,7 @@ var AnnotationContext = class {
|
|
|
97
2192
|
* @returns Rich context for LLM processing
|
|
98
2193
|
* @throws Error if annotation or resource not found
|
|
99
2194
|
*/
|
|
100
|
-
static async buildLLMContext(
|
|
2195
|
+
static async buildLLMContext(annotationUri2, resourceId2, config, options = {}) {
|
|
101
2196
|
const {
|
|
102
2197
|
includeSourceContext = true,
|
|
103
2198
|
includeTargetContext = true,
|
|
@@ -106,16 +2201,16 @@ var AnnotationContext = class {
|
|
|
106
2201
|
if (contextWindow < 100 || contextWindow > 5e3) {
|
|
107
2202
|
throw new Error("contextWindow must be between 100 and 5000");
|
|
108
2203
|
}
|
|
109
|
-
console.log(`[AnnotationContext] buildLLMContext called with annotationUri=${
|
|
2204
|
+
console.log(`[AnnotationContext] buildLLMContext called with annotationUri=${annotationUri2}, resourceId=${resourceId2}`);
|
|
110
2205
|
const basePath = config.services.filesystem.path;
|
|
111
2206
|
console.log(`[AnnotationContext] basePath=${basePath}`);
|
|
112
2207
|
const projectRoot = config._metadata?.projectRoot;
|
|
113
2208
|
const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
|
|
114
|
-
const repStore = new
|
|
115
|
-
console.log(`[AnnotationContext] Getting view for resourceId=${
|
|
2209
|
+
const repStore = new FilesystemRepresentationStore5({ basePath }, projectRoot);
|
|
2210
|
+
console.log(`[AnnotationContext] Getting view for resourceId=${resourceId2}`);
|
|
116
2211
|
let sourceView;
|
|
117
2212
|
try {
|
|
118
|
-
sourceView = await viewStorage.get(
|
|
2213
|
+
sourceView = await viewStorage.get(resourceId2);
|
|
119
2214
|
console.log(`[AnnotationContext] Got view:`, !!sourceView);
|
|
120
2215
|
if (!sourceView) {
|
|
121
2216
|
throw new Error("Source resource not found");
|
|
@@ -124,19 +2219,19 @@ var AnnotationContext = class {
|
|
|
124
2219
|
console.error(`[AnnotationContext] Error getting view:`, error);
|
|
125
2220
|
throw error;
|
|
126
2221
|
}
|
|
127
|
-
console.log(`[AnnotationContext] Looking for annotation ${
|
|
2222
|
+
console.log(`[AnnotationContext] Looking for annotation ${annotationUri2} in resource ${resourceId2}`);
|
|
128
2223
|
console.log(`[AnnotationContext] View has ${sourceView.annotations.annotations.length} annotations`);
|
|
129
2224
|
console.log(`[AnnotationContext] First 5 annotation IDs:`, sourceView.annotations.annotations.slice(0, 5).map((a) => a.id));
|
|
130
|
-
const annotation = sourceView.annotations.annotations.find((a) => a.id ===
|
|
2225
|
+
const annotation = sourceView.annotations.annotations.find((a) => a.id === annotationUri2);
|
|
131
2226
|
console.log(`[AnnotationContext] Found annotation:`, !!annotation);
|
|
132
2227
|
if (!annotation) {
|
|
133
2228
|
throw new Error("Annotation not found in view");
|
|
134
2229
|
}
|
|
135
2230
|
const targetSource = getTargetSource(annotation.target);
|
|
136
2231
|
const targetResourceId = targetSource.split("/").pop();
|
|
137
|
-
console.log(`[AnnotationContext] Target source: ${targetSource}, Expected resource ID: ${
|
|
138
|
-
if (targetResourceId !==
|
|
139
|
-
throw new Error(`Annotation target resource ID (${targetResourceId}) does not match expected resource ID (${
|
|
2232
|
+
console.log(`[AnnotationContext] Target source: ${targetSource}, Expected resource ID: ${resourceId2}, Extracted ID: ${targetResourceId}`);
|
|
2233
|
+
if (targetResourceId !== resourceId2) {
|
|
2234
|
+
throw new Error(`Annotation target resource ID (${targetResourceId}) does not match expected resource ID (${resourceId2})`);
|
|
140
2235
|
}
|
|
141
2236
|
const sourceDoc = sourceView.resource;
|
|
142
2237
|
const bodySource = getBodySource(annotation.body);
|
|
@@ -153,13 +2248,13 @@ var AnnotationContext = class {
|
|
|
153
2248
|
}
|
|
154
2249
|
let sourceContext;
|
|
155
2250
|
if (includeSourceContext) {
|
|
156
|
-
const primaryRep =
|
|
2251
|
+
const primaryRep = getPrimaryRepresentation3(sourceDoc);
|
|
157
2252
|
if (!primaryRep?.checksum || !primaryRep?.mediaType) {
|
|
158
2253
|
throw new Error("Source content not found");
|
|
159
2254
|
}
|
|
160
2255
|
const sourceContent = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
|
|
161
|
-
const contentStr =
|
|
162
|
-
const targetSelectorRaw =
|
|
2256
|
+
const contentStr = decodeRepresentation3(sourceContent, primaryRep.mediaType);
|
|
2257
|
+
const targetSelectorRaw = getTargetSelector2(annotation.target);
|
|
163
2258
|
const targetSelector = Array.isArray(targetSelectorRaw) ? targetSelectorRaw[0] : targetSelectorRaw;
|
|
164
2259
|
console.log(`[AnnotationContext] Target selector type:`, targetSelector?.type);
|
|
165
2260
|
if (!targetSelector) {
|
|
@@ -194,13 +2289,14 @@ var AnnotationContext = class {
|
|
|
194
2289
|
}
|
|
195
2290
|
let targetContext;
|
|
196
2291
|
if (includeTargetContext && targetDoc) {
|
|
197
|
-
const targetRep =
|
|
2292
|
+
const targetRep = getPrimaryRepresentation3(targetDoc);
|
|
198
2293
|
if (targetRep?.checksum && targetRep?.mediaType) {
|
|
199
2294
|
const targetContent = await repStore.retrieve(targetRep.checksum, targetRep.mediaType);
|
|
200
|
-
const contentStr =
|
|
2295
|
+
const contentStr = decodeRepresentation3(targetContent, targetRep.mediaType);
|
|
2296
|
+
const client = await getInferenceClient2(config);
|
|
201
2297
|
targetContext = {
|
|
202
2298
|
content: contentStr.slice(0, contextWindow * 2),
|
|
203
|
-
summary: await generateResourceSummary(targetDoc.name, contentStr, getResourceEntityTypes(targetDoc),
|
|
2299
|
+
summary: await generateResourceSummary(targetDoc.name, contentStr, getResourceEntityTypes(targetDoc), client)
|
|
204
2300
|
};
|
|
205
2301
|
}
|
|
206
2302
|
}
|
|
@@ -214,7 +2310,7 @@ var AnnotationContext = class {
|
|
|
214
2310
|
metadata: {
|
|
215
2311
|
resourceType: "document",
|
|
216
2312
|
language: sourceDoc.language,
|
|
217
|
-
entityTypes:
|
|
2313
|
+
entityTypes: getEntityTypes2(annotation)
|
|
218
2314
|
}
|
|
219
2315
|
} : void 0;
|
|
220
2316
|
const response = {
|
|
@@ -233,16 +2329,16 @@ var AnnotationContext = class {
|
|
|
233
2329
|
* Get resource annotations from view storage (fast path)
|
|
234
2330
|
* Throws if view missing
|
|
235
2331
|
*/
|
|
236
|
-
static async getResourceAnnotations(
|
|
2332
|
+
static async getResourceAnnotations(resourceId2, config) {
|
|
237
2333
|
if (!config.services?.filesystem?.path) {
|
|
238
2334
|
throw new Error("Filesystem path not found in configuration");
|
|
239
2335
|
}
|
|
240
2336
|
const basePath = config.services.filesystem.path;
|
|
241
2337
|
const projectRoot = config._metadata?.projectRoot;
|
|
242
2338
|
const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
|
|
243
|
-
const view = await viewStorage.get(
|
|
2339
|
+
const view = await viewStorage.get(resourceId2);
|
|
244
2340
|
if (!view) {
|
|
245
|
-
throw new Error(`Resource ${
|
|
2341
|
+
throw new Error(`Resource ${resourceId2} not found in view storage`);
|
|
246
2342
|
}
|
|
247
2343
|
return view.annotations;
|
|
248
2344
|
}
|
|
@@ -250,8 +2346,8 @@ var AnnotationContext = class {
|
|
|
250
2346
|
* Get all annotations
|
|
251
2347
|
* @returns Array of all annotation objects
|
|
252
2348
|
*/
|
|
253
|
-
static async getAllAnnotations(
|
|
254
|
-
const annotations = await this.getResourceAnnotations(
|
|
2349
|
+
static async getAllAnnotations(resourceId2, config) {
|
|
2350
|
+
const annotations = await this.getResourceAnnotations(resourceId2, config);
|
|
255
2351
|
return await this.enrichResolvedReferences(annotations.annotations, config);
|
|
256
2352
|
}
|
|
257
2353
|
/**
|
|
@@ -268,7 +2364,7 @@ var AnnotationContext = class {
|
|
|
268
2364
|
if (ann.motivation === "linking" && ann.body) {
|
|
269
2365
|
const body = Array.isArray(ann.body) ? ann.body : [ann.body];
|
|
270
2366
|
for (const item of body) {
|
|
271
|
-
if (item.purpose === "linking" && item.source) {
|
|
2367
|
+
if (item.type === "SpecificResource" && item.purpose === "linking" && item.source) {
|
|
272
2368
|
resolvedUris.add(item.source);
|
|
273
2369
|
}
|
|
274
2370
|
}
|
|
@@ -309,7 +2405,7 @@ var AnnotationContext = class {
|
|
|
309
2405
|
if (ann.motivation === "linking" && ann.body) {
|
|
310
2406
|
const body = Array.isArray(ann.body) ? ann.body : [ann.body];
|
|
311
2407
|
for (const item of body) {
|
|
312
|
-
if (item.purpose === "linking" && item.source) {
|
|
2408
|
+
if (item.type === "SpecificResource" && item.purpose === "linking" && item.source) {
|
|
313
2409
|
const metadata = uriToMetadata.get(item.source);
|
|
314
2410
|
if (metadata) {
|
|
315
2411
|
return {
|
|
@@ -328,8 +2424,8 @@ var AnnotationContext = class {
|
|
|
328
2424
|
* Get resource stats (version info)
|
|
329
2425
|
* @returns Version and timestamp info for the annotations
|
|
330
2426
|
*/
|
|
331
|
-
static async getResourceStats(
|
|
332
|
-
const annotations = await this.getResourceAnnotations(
|
|
2427
|
+
static async getResourceStats(resourceId2, config) {
|
|
2428
|
+
const annotations = await this.getResourceAnnotations(resourceId2, config);
|
|
333
2429
|
return {
|
|
334
2430
|
resourceId: annotations.resourceId,
|
|
335
2431
|
version: annotations.version,
|
|
@@ -339,24 +2435,24 @@ var AnnotationContext = class {
|
|
|
339
2435
|
/**
|
|
340
2436
|
* Check if resource exists in view storage
|
|
341
2437
|
*/
|
|
342
|
-
static async resourceExists(
|
|
2438
|
+
static async resourceExists(resourceId2, config) {
|
|
343
2439
|
if (!config.services?.filesystem?.path) {
|
|
344
2440
|
throw new Error("Filesystem path not found in configuration");
|
|
345
2441
|
}
|
|
346
2442
|
const basePath = config.services.filesystem.path;
|
|
347
2443
|
const projectRoot = config._metadata?.projectRoot;
|
|
348
2444
|
const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
|
|
349
|
-
return await viewStorage.exists(
|
|
2445
|
+
return await viewStorage.exists(resourceId2);
|
|
350
2446
|
}
|
|
351
2447
|
/**
|
|
352
2448
|
* Get a single annotation by ID
|
|
353
2449
|
* O(1) lookup using resource ID to access view storage
|
|
354
2450
|
*/
|
|
355
|
-
static async getAnnotation(
|
|
356
|
-
const annotations = await this.getResourceAnnotations(
|
|
2451
|
+
static async getAnnotation(annotationId2, resourceId2, config) {
|
|
2452
|
+
const annotations = await this.getResourceAnnotations(resourceId2, config);
|
|
357
2453
|
return annotations.annotations.find((a) => {
|
|
358
2454
|
const shortId = a.id.split("/").pop();
|
|
359
|
-
return shortId ===
|
|
2455
|
+
return shortId === annotationId2;
|
|
360
2456
|
}) || null;
|
|
361
2457
|
}
|
|
362
2458
|
/**
|
|
@@ -373,11 +2469,11 @@ var AnnotationContext = class {
|
|
|
373
2469
|
/**
|
|
374
2470
|
* Get annotation context (selected text with surrounding context)
|
|
375
2471
|
*/
|
|
376
|
-
static async getAnnotationContext(
|
|
2472
|
+
static async getAnnotationContext(annotationId2, resourceId2, contextBefore, contextAfter, config) {
|
|
377
2473
|
const basePath = config.services.filesystem.path;
|
|
378
2474
|
const projectRoot = config._metadata?.projectRoot;
|
|
379
|
-
const repStore = new
|
|
380
|
-
const annotation = await this.getAnnotation(
|
|
2475
|
+
const repStore = new FilesystemRepresentationStore5({ basePath }, projectRoot);
|
|
2476
|
+
const annotation = await this.getAnnotation(annotationId2, resourceId2, config);
|
|
381
2477
|
if (!annotation) {
|
|
382
2478
|
throw new Error("Annotation not found");
|
|
383
2479
|
}
|
|
@@ -409,11 +2505,11 @@ var AnnotationContext = class {
|
|
|
409
2505
|
/**
|
|
410
2506
|
* Generate AI summary of annotation in context
|
|
411
2507
|
*/
|
|
412
|
-
static async generateAnnotationSummary(
|
|
2508
|
+
static async generateAnnotationSummary(annotationId2, resourceId2, config) {
|
|
413
2509
|
const basePath = config.services.filesystem.path;
|
|
414
2510
|
const projectRoot = config._metadata?.projectRoot;
|
|
415
|
-
const repStore = new
|
|
416
|
-
const annotation = await this.getAnnotation(
|
|
2511
|
+
const repStore = new FilesystemRepresentationStore5({ basePath }, projectRoot);
|
|
2512
|
+
const annotation = await this.getAnnotation(annotationId2, resourceId2, config);
|
|
417
2513
|
if (!annotation) {
|
|
418
2514
|
throw new Error("Annotation not found");
|
|
419
2515
|
}
|
|
@@ -427,7 +2523,7 @@ var AnnotationContext = class {
|
|
|
427
2523
|
const contentStr = await this.getResourceContent(resource, repStore);
|
|
428
2524
|
const contextSize = 500;
|
|
429
2525
|
const context = this.extractAnnotationContext(annotation, contentStr, contextSize, contextSize);
|
|
430
|
-
const annotationEntityTypes =
|
|
2526
|
+
const annotationEntityTypes = getEntityTypes2(annotation);
|
|
431
2527
|
const summary = await this.generateSummary(resource, context, annotationEntityTypes, config);
|
|
432
2528
|
return {
|
|
433
2529
|
summary,
|
|
@@ -449,18 +2545,18 @@ var AnnotationContext = class {
|
|
|
449
2545
|
* Get resource content as string
|
|
450
2546
|
*/
|
|
451
2547
|
static async getResourceContent(resource, repStore) {
|
|
452
|
-
const primaryRep =
|
|
2548
|
+
const primaryRep = getPrimaryRepresentation3(resource);
|
|
453
2549
|
if (!primaryRep?.checksum || !primaryRep?.mediaType) {
|
|
454
2550
|
throw new Error("Resource content not found");
|
|
455
2551
|
}
|
|
456
2552
|
const content = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
|
|
457
|
-
return
|
|
2553
|
+
return decodeRepresentation3(content, primaryRep.mediaType);
|
|
458
2554
|
}
|
|
459
2555
|
/**
|
|
460
2556
|
* Extract annotation context from resource content
|
|
461
2557
|
*/
|
|
462
2558
|
static extractAnnotationContext(annotation, contentStr, contextBefore, contextAfter) {
|
|
463
|
-
const targetSelector =
|
|
2559
|
+
const targetSelector = getTargetSelector2(annotation.target);
|
|
464
2560
|
const posSelector = targetSelector ? getTextPositionSelector(targetSelector) : null;
|
|
465
2561
|
if (!posSelector) {
|
|
466
2562
|
throw new Error("TextPositionSelector required for context");
|
|
@@ -477,6 +2573,7 @@ var AnnotationContext = class {
|
|
|
477
2573
|
}
|
|
478
2574
|
/**
|
|
479
2575
|
* Generate LLM summary of annotation in context
|
|
2576
|
+
* Creates inference client per-request (HTTP handler context)
|
|
480
2577
|
*/
|
|
481
2578
|
static async generateSummary(resource, context, entityTypes, config) {
|
|
482
2579
|
const summaryPrompt = `Summarize this text in context:
|
|
@@ -487,82 +2584,564 @@ Context after: "${context.after.substring(0, 200)}"
|
|
|
487
2584
|
|
|
488
2585
|
Resource: ${resource.name}
|
|
489
2586
|
Entity types: ${entityTypes.join(", ")}`;
|
|
490
|
-
|
|
2587
|
+
const client = await getInferenceClient2(config);
|
|
2588
|
+
return await client.generateText(summaryPrompt, 500, 0.5);
|
|
491
2589
|
}
|
|
492
2590
|
};
|
|
493
2591
|
|
|
494
2592
|
// src/graph-context.ts
|
|
495
|
-
import { getGraphDatabase } from "@semiont/graph";
|
|
496
|
-
import { resourceIdToURI } from "@semiont/core";
|
|
2593
|
+
import { getGraphDatabase as getGraphDatabase2 } from "@semiont/graph";
|
|
2594
|
+
import { resourceIdToURI as resourceIdToURI6 } from "@semiont/core";
|
|
497
2595
|
var GraphContext = class {
|
|
498
2596
|
/**
|
|
499
2597
|
* Get all resources referencing this resource (backlinks)
|
|
500
2598
|
* Requires graph traversal - must use graph database
|
|
501
2599
|
*/
|
|
502
|
-
static async getBacklinks(
|
|
503
|
-
const graphDb = await
|
|
504
|
-
const
|
|
505
|
-
return await graphDb.getResourceReferencedBy(
|
|
2600
|
+
static async getBacklinks(resourceId2, config) {
|
|
2601
|
+
const graphDb = await getGraphDatabase2(config);
|
|
2602
|
+
const resourceUri3 = resourceIdToURI6(resourceId2, config.services.backend.publicURL);
|
|
2603
|
+
return await graphDb.getResourceReferencedBy(resourceUri3);
|
|
506
2604
|
}
|
|
507
2605
|
/**
|
|
508
2606
|
* Find shortest path between two resources
|
|
509
2607
|
* Requires graph traversal - must use graph database
|
|
510
2608
|
*/
|
|
511
2609
|
static async findPath(fromResourceId, toResourceId, config, maxDepth) {
|
|
512
|
-
const graphDb = await
|
|
2610
|
+
const graphDb = await getGraphDatabase2(config);
|
|
513
2611
|
return await graphDb.findPath(fromResourceId, toResourceId, maxDepth);
|
|
514
2612
|
}
|
|
515
2613
|
/**
|
|
516
2614
|
* Get resource connections (graph edges)
|
|
517
2615
|
* Requires graph traversal - must use graph database
|
|
518
2616
|
*/
|
|
519
|
-
static async getResourceConnections(
|
|
520
|
-
const graphDb = await
|
|
521
|
-
return await graphDb.getResourceConnections(
|
|
2617
|
+
static async getResourceConnections(resourceId2, config) {
|
|
2618
|
+
const graphDb = await getGraphDatabase2(config);
|
|
2619
|
+
return await graphDb.getResourceConnections(resourceId2);
|
|
522
2620
|
}
|
|
523
2621
|
/**
|
|
524
2622
|
* Search resources by name (cross-resource query)
|
|
525
2623
|
* Requires full-text search - must use graph database
|
|
526
2624
|
*/
|
|
527
2625
|
static async searchResources(query, config, limit) {
|
|
528
|
-
const graphDb = await
|
|
2626
|
+
const graphDb = await getGraphDatabase2(config);
|
|
529
2627
|
return await graphDb.searchResources(query, limit);
|
|
530
2628
|
}
|
|
531
2629
|
};
|
|
532
2630
|
|
|
533
2631
|
// src/annotation-detection.ts
|
|
534
|
-
import { FilesystemRepresentationStore as
|
|
535
|
-
import { getPrimaryRepresentation as
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
2632
|
+
import { FilesystemRepresentationStore as FilesystemRepresentationStore6 } from "@semiont/content";
|
|
2633
|
+
import { getPrimaryRepresentation as getPrimaryRepresentation4, decodeRepresentation as decodeRepresentation4 } from "@semiont/api-client";
|
|
2634
|
+
|
|
2635
|
+
// src/detection/motivation-prompts.ts
|
|
2636
|
+
var MotivationPrompts = class {
|
|
2637
|
+
/**
|
|
2638
|
+
* Build a prompt for detecting comment-worthy passages
|
|
2639
|
+
*
|
|
2640
|
+
* @param content - The text content to analyze (will be truncated to 8000 chars)
|
|
2641
|
+
* @param instructions - Optional user-provided instructions
|
|
2642
|
+
* @param tone - Optional tone guidance (e.g., "academic", "conversational")
|
|
2643
|
+
* @param density - Optional target number of comments per 2000 words
|
|
2644
|
+
* @returns Formatted prompt string
|
|
2645
|
+
*/
|
|
2646
|
+
static buildCommentPrompt(content, instructions, tone, density) {
|
|
2647
|
+
let prompt;
|
|
2648
|
+
if (instructions) {
|
|
2649
|
+
const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
|
|
2650
|
+
const densityGuidance = density ? `
|
|
2651
|
+
|
|
2652
|
+
Aim for approximately ${density} comments per 2000 words of text.` : "";
|
|
2653
|
+
prompt = `Add comments to passages in this text following these instructions:
|
|
2654
|
+
|
|
2655
|
+
${instructions}${toneGuidance}${densityGuidance}
|
|
2656
|
+
|
|
2657
|
+
Text to analyze:
|
|
2658
|
+
---
|
|
2659
|
+
${content.substring(0, 8e3)}
|
|
2660
|
+
---
|
|
2661
|
+
|
|
2662
|
+
Return a JSON array of comments. Each comment must have:
|
|
2663
|
+
- "exact": the exact text passage being commented on (quoted verbatim from source)
|
|
2664
|
+
- "start": character offset where the passage starts
|
|
2665
|
+
- "end": character offset where the passage ends
|
|
2666
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2667
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2668
|
+
- "comment": your comment following the instructions above
|
|
2669
|
+
|
|
2670
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2671
|
+
|
|
2672
|
+
Example:
|
|
2673
|
+
[
|
|
2674
|
+
{"exact": "the quarterly review meeting", "start": 142, "end": 169, "prefix": "We need to schedule ", "suffix": " for next month.", "comment": "Who will lead this? Should we invite the external auditors?"}
|
|
2675
|
+
]`;
|
|
2676
|
+
} else {
|
|
2677
|
+
const toneGuidance = tone ? `
|
|
2678
|
+
|
|
2679
|
+
Tone: Use a ${tone} style in your comments.` : "";
|
|
2680
|
+
const densityGuidance = density ? `
|
|
2681
|
+
- Aim for approximately ${density} comments per 2000 words` : `
|
|
2682
|
+
- Aim for 3-8 comments per 2000 words (not too sparse or dense)`;
|
|
2683
|
+
prompt = `Identify passages in this text that would benefit from explanatory comments.
|
|
2684
|
+
For each passage, provide contextual information, clarification, or background.${toneGuidance}
|
|
2685
|
+
|
|
2686
|
+
Guidelines:
|
|
2687
|
+
- Select passages that reference technical terms, historical figures, complex concepts, or unclear references
|
|
2688
|
+
- Provide comments that ADD VALUE beyond restating the text
|
|
2689
|
+
- Focus on explanation, background, or connections to other ideas
|
|
2690
|
+
- Avoid obvious or trivial comments
|
|
2691
|
+
- Keep comments concise (1-3 sentences typically)${densityGuidance}
|
|
2692
|
+
|
|
2693
|
+
Text to analyze:
|
|
2694
|
+
---
|
|
2695
|
+
${content.substring(0, 8e3)}
|
|
2696
|
+
---
|
|
2697
|
+
|
|
2698
|
+
Return a JSON array of comments. Each comment should have:
|
|
2699
|
+
- "exact": the exact text passage being commented on (quoted verbatim from source)
|
|
2700
|
+
- "start": character offset where the passage starts
|
|
2701
|
+
- "end": character offset where the passage ends
|
|
2702
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2703
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2704
|
+
- "comment": your explanatory comment (1-3 sentences, provide context/background/clarification)
|
|
2705
|
+
|
|
2706
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2707
|
+
|
|
2708
|
+
Example format:
|
|
2709
|
+
[
|
|
2710
|
+
{"exact": "Ouranos", "start": 52, "end": 59, "prefix": "In the beginning, ", "suffix": " ruled the universe", "comment": "Ouranos (also spelled Uranus) is the primordial Greek deity personifying the sky. In Hesiod's Theogony, he is the son and husband of Gaia (Earth) and father of the Titans."}
|
|
2711
|
+
]`;
|
|
2712
|
+
}
|
|
2713
|
+
return prompt;
|
|
2714
|
+
}
|
|
2715
|
+
/**
|
|
2716
|
+
* Build a prompt for detecting highlight-worthy passages
|
|
2717
|
+
*
|
|
2718
|
+
* @param content - The text content to analyze (will be truncated to 8000 chars)
|
|
2719
|
+
* @param instructions - Optional user-provided instructions
|
|
2720
|
+
* @param density - Optional target number of highlights per 2000 words
|
|
2721
|
+
* @returns Formatted prompt string
|
|
2722
|
+
*/
|
|
2723
|
+
static buildHighlightPrompt(content, instructions, density) {
|
|
2724
|
+
let prompt;
|
|
2725
|
+
if (instructions) {
|
|
2726
|
+
const densityGuidance = density ? `
|
|
2727
|
+
|
|
2728
|
+
Aim for approximately ${density} highlights per 2000 words of text.` : "";
|
|
2729
|
+
prompt = `Identify passages in this text to highlight following these instructions:
|
|
2730
|
+
|
|
2731
|
+
${instructions}${densityGuidance}
|
|
2732
|
+
|
|
2733
|
+
Text to analyze:
|
|
2734
|
+
---
|
|
2735
|
+
${content.substring(0, 8e3)}
|
|
2736
|
+
---
|
|
2737
|
+
|
|
2738
|
+
Return a JSON array of highlights. Each highlight must have:
|
|
2739
|
+
- "exact": the exact text passage to highlight (quoted verbatim from source)
|
|
2740
|
+
- "start": character offset where the passage starts
|
|
2741
|
+
- "end": character offset where the passage ends
|
|
2742
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2743
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2744
|
+
|
|
2745
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2746
|
+
|
|
2747
|
+
Example:
|
|
2748
|
+
[
|
|
2749
|
+
{"exact": "revenue grew 45% year-over-year", "start": 142, "end": 174, "prefix": "In Q3 2024, ", "suffix": ", exceeding all forecasts."}
|
|
2750
|
+
]`;
|
|
2751
|
+
} else {
|
|
2752
|
+
const densityGuidance = density ? `
|
|
2753
|
+
- Aim for approximately ${density} highlights per 2000 words` : `
|
|
2754
|
+
- Aim for 3-8 highlights per 2000 words (be selective)`;
|
|
2755
|
+
prompt = `Identify passages in this text that merit highlighting for their importance or salience.
|
|
2756
|
+
Focus on content that readers should notice and remember.
|
|
2757
|
+
|
|
2758
|
+
Guidelines:
|
|
2759
|
+
- Highlight key claims, findings, or conclusions
|
|
2760
|
+
- Highlight important definitions, terminology, or concepts
|
|
2761
|
+
- Highlight notable quotes or particularly striking statements
|
|
2762
|
+
- Highlight critical decisions, action items, or turning points
|
|
2763
|
+
- Select passages that are SIGNIFICANT, not just interesting
|
|
2764
|
+
- Avoid trivial or obvious content${densityGuidance}
|
|
2765
|
+
|
|
2766
|
+
Text to analyze:
|
|
2767
|
+
---
|
|
2768
|
+
${content.substring(0, 8e3)}
|
|
2769
|
+
---
|
|
2770
|
+
|
|
2771
|
+
Return a JSON array of highlights. Each highlight should have:
|
|
2772
|
+
- "exact": the exact text passage to highlight (quoted verbatim from source)
|
|
2773
|
+
- "start": character offset where the passage starts
|
|
2774
|
+
- "end": character offset where the passage ends
|
|
2775
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2776
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2777
|
+
|
|
2778
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2779
|
+
|
|
2780
|
+
Example format:
|
|
2781
|
+
[
|
|
2782
|
+
{"exact": "we will discontinue support for legacy systems by March 2025", "start": 52, "end": 113, "prefix": "After careful consideration, ", "suffix": ". This decision affects"}
|
|
2783
|
+
]`;
|
|
2784
|
+
}
|
|
2785
|
+
return prompt;
|
|
2786
|
+
}
|
|
2787
|
+
/**
|
|
2788
|
+
* Build a prompt for detecting assessment-worthy passages
|
|
2789
|
+
*
|
|
2790
|
+
* @param content - The text content to analyze (will be truncated to 8000 chars)
|
|
2791
|
+
* @param instructions - Optional user-provided instructions
|
|
2792
|
+
* @param tone - Optional tone guidance (e.g., "critical", "supportive")
|
|
2793
|
+
* @param density - Optional target number of assessments per 2000 words
|
|
2794
|
+
* @returns Formatted prompt string
|
|
2795
|
+
*/
|
|
2796
|
+
static buildAssessmentPrompt(content, instructions, tone, density) {
|
|
2797
|
+
let prompt;
|
|
2798
|
+
if (instructions) {
|
|
2799
|
+
const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
|
|
2800
|
+
const densityGuidance = density ? `
|
|
2801
|
+
|
|
2802
|
+
Aim for approximately ${density} assessments per 2000 words of text.` : "";
|
|
2803
|
+
prompt = `Assess passages in this text following these instructions:
|
|
2804
|
+
|
|
2805
|
+
${instructions}${toneGuidance}${densityGuidance}
|
|
2806
|
+
|
|
2807
|
+
Text to analyze:
|
|
2808
|
+
---
|
|
2809
|
+
${content.substring(0, 8e3)}
|
|
2810
|
+
---
|
|
2811
|
+
|
|
2812
|
+
Return a JSON array of assessments. Each assessment must have:
|
|
2813
|
+
- "exact": the exact text passage being assessed (quoted verbatim from source)
|
|
2814
|
+
- "start": character offset where the passage starts
|
|
2815
|
+
- "end": character offset where the passage ends
|
|
2816
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2817
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2818
|
+
- "assessment": your assessment following the instructions above
|
|
2819
|
+
|
|
2820
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2821
|
+
|
|
2822
|
+
Example:
|
|
2823
|
+
[
|
|
2824
|
+
{"exact": "the quarterly revenue target", "start": 142, "end": 169, "prefix": "We established ", "suffix": " for Q4 2024.", "assessment": "This target seems ambitious given market conditions. Consider revising based on recent trends."}
|
|
2825
|
+
]`;
|
|
2826
|
+
} else {
|
|
2827
|
+
const toneGuidance = tone ? `
|
|
2828
|
+
|
|
2829
|
+
Tone: Use a ${tone} style in your assessments.` : "";
|
|
2830
|
+
const densityGuidance = density ? `
|
|
2831
|
+
- Aim for approximately ${density} assessments per 2000 words` : `
|
|
2832
|
+
- Aim for 2-6 assessments per 2000 words (focus on key passages)`;
|
|
2833
|
+
prompt = `Identify passages in this text that merit critical assessment or evaluation.
|
|
2834
|
+
For each passage, provide analysis of its validity, strength, or implications.${toneGuidance}
|
|
2835
|
+
|
|
2836
|
+
Guidelines:
|
|
2837
|
+
- Select passages containing claims, arguments, conclusions, or assertions
|
|
2838
|
+
- Assess evidence quality, logical soundness, or practical implications
|
|
2839
|
+
- Provide assessments that ADD INSIGHT beyond restating the text
|
|
2840
|
+
- Focus on passages where evaluation would help readers form judgments
|
|
2841
|
+
- Keep assessments concise yet substantive (1-3 sentences typically)${densityGuidance}
|
|
2842
|
+
|
|
2843
|
+
Text to analyze:
|
|
2844
|
+
---
|
|
2845
|
+
${content.substring(0, 8e3)}
|
|
2846
|
+
---
|
|
2847
|
+
|
|
2848
|
+
Return a JSON array of assessments. Each assessment should have:
|
|
2849
|
+
- "exact": the exact text passage being assessed (quoted verbatim from source)
|
|
2850
|
+
- "start": character offset where the passage starts
|
|
2851
|
+
- "end": character offset where the passage ends
|
|
2852
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2853
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2854
|
+
- "assessment": your analytical assessment (1-3 sentences, evaluate validity/strength/implications)
|
|
2855
|
+
|
|
2856
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2857
|
+
|
|
2858
|
+
Example format:
|
|
2859
|
+
[
|
|
2860
|
+
{"exact": "AI will replace most jobs by 2030", "start": 52, "end": 89, "prefix": "Many experts predict that ", "suffix": ", fundamentally reshaping", "assessment": "This claim lacks nuance and supporting evidence. Employment patterns historically show job transformation rather than wholesale replacement. The timeline appears speculative without specific sector analysis."}
|
|
2861
|
+
]`;
|
|
2862
|
+
}
|
|
2863
|
+
return prompt;
|
|
2864
|
+
}
|
|
2865
|
+
/**
|
|
2866
|
+
* Build a prompt for detecting structural tags
|
|
2867
|
+
*
|
|
2868
|
+
* @param content - The full text content to analyze (NOT truncated for structural analysis)
|
|
2869
|
+
* @param category - The specific category to detect
|
|
2870
|
+
* @param schemaName - Human-readable schema name
|
|
2871
|
+
* @param schemaDescription - Schema description
|
|
2872
|
+
* @param schemaDomain - Schema domain
|
|
2873
|
+
* @param categoryDescription - Category description
|
|
2874
|
+
* @param categoryExamples - Example questions/guidance for this category
|
|
2875
|
+
* @returns Formatted prompt string
|
|
2876
|
+
*/
|
|
2877
|
+
static buildTagPrompt(content, category, schemaName, schemaDescription, schemaDomain, categoryDescription, categoryExamples) {
|
|
2878
|
+
const prompt = `You are analyzing a text using the ${schemaName} framework.
|
|
2879
|
+
|
|
2880
|
+
Schema: ${schemaDescription}
|
|
2881
|
+
Domain: ${schemaDomain}
|
|
2882
|
+
|
|
2883
|
+
Your task: Identify passages that serve the structural role of "${category}".
|
|
2884
|
+
|
|
2885
|
+
Category: ${category}
|
|
2886
|
+
Description: ${categoryDescription}
|
|
2887
|
+
Key questions:
|
|
2888
|
+
${categoryExamples.map((ex) => `- ${ex}`).join("\n")}
|
|
2889
|
+
|
|
2890
|
+
Guidelines:
|
|
2891
|
+
- Focus on STRUCTURAL FUNCTION, not semantic content
|
|
2892
|
+
- A passage serves the "${category}" role if it performs this function in the document's structure
|
|
2893
|
+
- Look for passages that explicitly fulfill this role
|
|
2894
|
+
- Passages can be sentences, paragraphs, or sections
|
|
2895
|
+
- Aim for precision - only tag passages that clearly serve this structural role
|
|
2896
|
+
- Typical documents have 1-5 instances of each category (some may have 0)
|
|
2897
|
+
|
|
2898
|
+
Text to analyze:
|
|
2899
|
+
---
|
|
2900
|
+
${content}
|
|
2901
|
+
---
|
|
2902
|
+
|
|
2903
|
+
Return a JSON array of tags. Each tag should have:
|
|
2904
|
+
- "exact": the exact text passage (quoted verbatim from source)
|
|
2905
|
+
- "start": character offset where the passage starts
|
|
2906
|
+
- "end": character offset where the passage ends
|
|
2907
|
+
- "prefix": up to 32 characters of text immediately before the passage
|
|
2908
|
+
- "suffix": up to 32 characters of text immediately after the passage
|
|
2909
|
+
|
|
2910
|
+
Return ONLY a valid JSON array, no additional text or explanation.
|
|
2911
|
+
|
|
2912
|
+
Example format:
|
|
2913
|
+
[
|
|
2914
|
+
{"exact": "What duty did the defendant owe?", "start": 142, "end": 175, "prefix": "The central question is: ", "suffix": " This question must be"},
|
|
2915
|
+
{"exact": "In tort law, a duty of care is established when...", "start": 412, "end": 520, "prefix": "Legal framework:\\n", "suffix": "\\n\\nApplying this standard"}
|
|
2916
|
+
]`;
|
|
2917
|
+
return prompt;
|
|
2918
|
+
}
|
|
2919
|
+
};
|
|
2920
|
+
|
|
2921
|
+
// src/detection/motivation-parsers.ts
|
|
2922
|
+
import { validateAndCorrectOffsets as validateAndCorrectOffsets2 } from "@semiont/api-client";
|
|
2923
|
+
var MotivationParsers = class {
|
|
2924
|
+
/**
|
|
2925
|
+
* Parse and validate AI response for comment detection
|
|
2926
|
+
*
|
|
2927
|
+
* @param response - Raw AI response string (may include markdown code fences)
|
|
2928
|
+
* @param content - Original content to validate offsets against
|
|
2929
|
+
* @returns Array of validated comment matches
|
|
2930
|
+
*/
|
|
2931
|
+
static parseComments(response, content) {
|
|
2932
|
+
try {
|
|
2933
|
+
let cleaned = response.trim();
|
|
2934
|
+
if (cleaned.startsWith("```")) {
|
|
2935
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
2936
|
+
}
|
|
2937
|
+
const parsed = JSON.parse(cleaned);
|
|
2938
|
+
if (!Array.isArray(parsed)) {
|
|
2939
|
+
console.warn("[MotivationParsers] Comment response is not an array");
|
|
2940
|
+
return [];
|
|
2941
|
+
}
|
|
2942
|
+
const valid = parsed.filter(
|
|
2943
|
+
(c) => c && typeof c.exact === "string" && typeof c.start === "number" && typeof c.end === "number" && typeof c.comment === "string" && c.comment.trim().length > 0
|
|
2944
|
+
);
|
|
2945
|
+
console.log(`[MotivationParsers] Parsed ${valid.length} valid comments from ${parsed.length} total`);
|
|
2946
|
+
const validatedComments = [];
|
|
2947
|
+
for (const comment of valid) {
|
|
2948
|
+
try {
|
|
2949
|
+
const validated = validateAndCorrectOffsets2(content, comment.start, comment.end, comment.exact);
|
|
2950
|
+
validatedComments.push({
|
|
2951
|
+
...comment,
|
|
2952
|
+
start: validated.start,
|
|
2953
|
+
end: validated.end,
|
|
2954
|
+
prefix: validated.prefix,
|
|
2955
|
+
suffix: validated.suffix
|
|
2956
|
+
});
|
|
2957
|
+
} catch (error) {
|
|
2958
|
+
console.warn(`[MotivationParsers] Skipping invalid comment "${comment.exact}":`, error);
|
|
2959
|
+
}
|
|
2960
|
+
}
|
|
2961
|
+
return validatedComments;
|
|
2962
|
+
} catch (error) {
|
|
2963
|
+
console.error("[MotivationParsers] Failed to parse AI comment response:", error);
|
|
2964
|
+
return [];
|
|
2965
|
+
}
|
|
2966
|
+
}
|
|
2967
|
+
/**
|
|
2968
|
+
* Parse and validate AI response for highlight detection
|
|
2969
|
+
*
|
|
2970
|
+
* @param response - Raw AI response string (may include markdown code fences)
|
|
2971
|
+
* @param content - Original content to validate offsets against
|
|
2972
|
+
* @returns Array of validated highlight matches
|
|
2973
|
+
*/
|
|
2974
|
+
static parseHighlights(response, content) {
|
|
2975
|
+
try {
|
|
2976
|
+
let cleaned = response.trim();
|
|
2977
|
+
if (cleaned.startsWith("```json") || cleaned.startsWith("```")) {
|
|
2978
|
+
cleaned = cleaned.slice(cleaned.indexOf("\n") + 1);
|
|
2979
|
+
const endIndex = cleaned.lastIndexOf("```");
|
|
2980
|
+
if (endIndex !== -1) {
|
|
2981
|
+
cleaned = cleaned.slice(0, endIndex);
|
|
2982
|
+
}
|
|
2983
|
+
}
|
|
2984
|
+
const parsed = JSON.parse(cleaned);
|
|
2985
|
+
if (!Array.isArray(parsed)) {
|
|
2986
|
+
console.warn("[MotivationParsers] Highlight response was not an array");
|
|
2987
|
+
return [];
|
|
2988
|
+
}
|
|
2989
|
+
const highlights = parsed.filter(
|
|
2990
|
+
(h) => h && typeof h.exact === "string" && typeof h.start === "number" && typeof h.end === "number"
|
|
2991
|
+
);
|
|
2992
|
+
const validatedHighlights = [];
|
|
2993
|
+
for (const highlight of highlights) {
|
|
2994
|
+
try {
|
|
2995
|
+
const validated = validateAndCorrectOffsets2(content, highlight.start, highlight.end, highlight.exact);
|
|
2996
|
+
validatedHighlights.push({
|
|
2997
|
+
...highlight,
|
|
2998
|
+
start: validated.start,
|
|
2999
|
+
end: validated.end,
|
|
3000
|
+
prefix: validated.prefix,
|
|
3001
|
+
suffix: validated.suffix
|
|
3002
|
+
});
|
|
3003
|
+
} catch (error) {
|
|
3004
|
+
console.warn(`[MotivationParsers] Skipping invalid highlight "${highlight.exact}":`, error);
|
|
3005
|
+
}
|
|
3006
|
+
}
|
|
3007
|
+
return validatedHighlights;
|
|
3008
|
+
} catch (error) {
|
|
3009
|
+
console.error("[MotivationParsers] Failed to parse AI highlight response:", error);
|
|
3010
|
+
console.error("Raw response:", response);
|
|
3011
|
+
return [];
|
|
3012
|
+
}
|
|
3013
|
+
}
|
|
3014
|
+
/**
|
|
3015
|
+
* Parse and validate AI response for assessment detection
|
|
3016
|
+
*
|
|
3017
|
+
* @param response - Raw AI response string (may include markdown code fences)
|
|
3018
|
+
* @param content - Original content to validate offsets against
|
|
3019
|
+
* @returns Array of validated assessment matches
|
|
3020
|
+
*/
|
|
3021
|
+
static parseAssessments(response, content) {
|
|
3022
|
+
try {
|
|
3023
|
+
let cleaned = response.trim();
|
|
3024
|
+
if (cleaned.startsWith("```json") || cleaned.startsWith("```")) {
|
|
3025
|
+
cleaned = cleaned.slice(cleaned.indexOf("\n") + 1);
|
|
3026
|
+
const endIndex = cleaned.lastIndexOf("```");
|
|
3027
|
+
if (endIndex !== -1) {
|
|
3028
|
+
cleaned = cleaned.slice(0, endIndex);
|
|
3029
|
+
}
|
|
3030
|
+
}
|
|
3031
|
+
const parsed = JSON.parse(cleaned);
|
|
3032
|
+
if (!Array.isArray(parsed)) {
|
|
3033
|
+
console.warn("[MotivationParsers] Assessment response was not an array");
|
|
3034
|
+
return [];
|
|
3035
|
+
}
|
|
3036
|
+
const assessments = parsed.filter(
|
|
3037
|
+
(a) => a && typeof a.exact === "string" && typeof a.start === "number" && typeof a.end === "number" && typeof a.assessment === "string"
|
|
3038
|
+
);
|
|
3039
|
+
const validatedAssessments = [];
|
|
3040
|
+
for (const assessment of assessments) {
|
|
3041
|
+
try {
|
|
3042
|
+
const validated = validateAndCorrectOffsets2(content, assessment.start, assessment.end, assessment.exact);
|
|
3043
|
+
validatedAssessments.push({
|
|
3044
|
+
...assessment,
|
|
3045
|
+
start: validated.start,
|
|
3046
|
+
end: validated.end,
|
|
3047
|
+
prefix: validated.prefix,
|
|
3048
|
+
suffix: validated.suffix
|
|
3049
|
+
});
|
|
3050
|
+
} catch (error) {
|
|
3051
|
+
console.warn(`[MotivationParsers] Skipping invalid assessment "${assessment.exact}":`, error);
|
|
3052
|
+
}
|
|
3053
|
+
}
|
|
3054
|
+
return validatedAssessments;
|
|
3055
|
+
} catch (error) {
|
|
3056
|
+
console.error("[MotivationParsers] Failed to parse AI assessment response:", error);
|
|
3057
|
+
console.error("Raw response:", response);
|
|
3058
|
+
return [];
|
|
3059
|
+
}
|
|
3060
|
+
}
|
|
3061
|
+
/**
|
|
3062
|
+
* Parse and validate AI response for tag detection
|
|
3063
|
+
* Note: Does NOT validate offsets - caller must do that with content
|
|
3064
|
+
*
|
|
3065
|
+
* @param response - Raw AI response string (may include markdown code fences)
|
|
3066
|
+
* @returns Array of tag matches (offsets not yet validated)
|
|
3067
|
+
*/
|
|
3068
|
+
static parseTags(response) {
|
|
3069
|
+
try {
|
|
3070
|
+
let cleaned = response.trim();
|
|
3071
|
+
if (cleaned.startsWith("```")) {
|
|
3072
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
3073
|
+
}
|
|
3074
|
+
const parsed = JSON.parse(cleaned);
|
|
3075
|
+
if (!Array.isArray(parsed)) {
|
|
3076
|
+
console.warn("[MotivationParsers] Tag response is not an array");
|
|
3077
|
+
return [];
|
|
3078
|
+
}
|
|
3079
|
+
const valid = parsed.filter(
|
|
3080
|
+
(t) => t && typeof t.exact === "string" && typeof t.start === "number" && typeof t.end === "number" && t.exact.trim().length > 0
|
|
3081
|
+
);
|
|
3082
|
+
console.log(`[MotivationParsers] Parsed ${valid.length} valid tags from ${parsed.length} total`);
|
|
3083
|
+
return valid;
|
|
3084
|
+
} catch (error) {
|
|
3085
|
+
console.error("[MotivationParsers] Failed to parse AI tag response:", error);
|
|
3086
|
+
return [];
|
|
3087
|
+
}
|
|
3088
|
+
}
|
|
3089
|
+
/**
|
|
3090
|
+
* Validate tag offsets against content and add category
|
|
3091
|
+
* Helper for tag detection after initial parsing
|
|
3092
|
+
*
|
|
3093
|
+
* @param tags - Parsed tags without validated offsets
|
|
3094
|
+
* @param content - Original content to validate against
|
|
3095
|
+
* @param category - Category to assign to validated tags
|
|
3096
|
+
* @returns Array of validated tag matches
|
|
3097
|
+
*/
|
|
3098
|
+
static validateTagOffsets(tags, content, category) {
|
|
3099
|
+
const validatedTags = [];
|
|
3100
|
+
for (const tag of tags) {
|
|
3101
|
+
try {
|
|
3102
|
+
const validated = validateAndCorrectOffsets2(content, tag.start, tag.end, tag.exact);
|
|
3103
|
+
validatedTags.push({
|
|
3104
|
+
...tag,
|
|
3105
|
+
category,
|
|
3106
|
+
start: validated.start,
|
|
3107
|
+
end: validated.end,
|
|
3108
|
+
prefix: validated.prefix,
|
|
3109
|
+
suffix: validated.suffix
|
|
3110
|
+
});
|
|
3111
|
+
} catch (error) {
|
|
3112
|
+
console.warn(`[MotivationParsers] Skipping invalid tag for category "${category}":`, error);
|
|
3113
|
+
}
|
|
3114
|
+
}
|
|
3115
|
+
return validatedTags;
|
|
3116
|
+
}
|
|
3117
|
+
};
|
|
3118
|
+
|
|
3119
|
+
// src/annotation-detection.ts
|
|
3120
|
+
import { getTagSchema as getTagSchema2, getSchemaCategory } from "@semiont/ontology";
|
|
542
3121
|
var AnnotationDetection = class {
|
|
543
3122
|
/**
|
|
544
3123
|
* Detect comments in a resource
|
|
545
3124
|
*
|
|
546
3125
|
* @param resourceId - The resource to analyze
|
|
547
3126
|
* @param config - Environment configuration
|
|
3127
|
+
* @param client - Inference client for AI operations
|
|
548
3128
|
* @param instructions - Optional user instructions for comment generation
|
|
549
3129
|
* @param tone - Optional tone guidance (e.g., "academic", "conversational")
|
|
550
3130
|
* @param density - Optional target number of comments per 2000 words
|
|
551
3131
|
* @returns Array of validated comment matches
|
|
552
3132
|
*/
|
|
553
|
-
static async detectComments(
|
|
554
|
-
const resource = await ResourceContext.getResourceMetadata(
|
|
3133
|
+
static async detectComments(resourceId2, config, client, instructions, tone, density) {
|
|
3134
|
+
const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
|
|
555
3135
|
if (!resource) {
|
|
556
|
-
throw new Error(`Resource ${
|
|
3136
|
+
throw new Error(`Resource ${resourceId2} not found`);
|
|
557
3137
|
}
|
|
558
|
-
const content = await this.loadResourceContent(
|
|
3138
|
+
const content = await this.loadResourceContent(resourceId2, config);
|
|
559
3139
|
if (!content) {
|
|
560
|
-
throw new Error(`Could not load content for resource ${
|
|
3140
|
+
throw new Error(`Could not load content for resource ${resourceId2}`);
|
|
561
3141
|
}
|
|
562
3142
|
const prompt = MotivationPrompts.buildCommentPrompt(content, instructions, tone, density);
|
|
563
|
-
const response = await
|
|
3143
|
+
const response = await client.generateText(
|
|
564
3144
|
prompt,
|
|
565
|
-
config,
|
|
566
3145
|
3e3,
|
|
567
3146
|
// maxTokens: Higher than highlights/assessments due to comment text
|
|
568
3147
|
0.4
|
|
@@ -575,23 +3154,23 @@ var AnnotationDetection = class {
|
|
|
575
3154
|
*
|
|
576
3155
|
* @param resourceId - The resource to analyze
|
|
577
3156
|
* @param config - Environment configuration
|
|
3157
|
+
* @param client - Inference client for AI operations
|
|
578
3158
|
* @param instructions - Optional user instructions for highlight selection
|
|
579
3159
|
* @param density - Optional target number of highlights per 2000 words
|
|
580
3160
|
* @returns Array of validated highlight matches
|
|
581
3161
|
*/
|
|
582
|
-
static async detectHighlights(
|
|
583
|
-
const resource = await ResourceContext.getResourceMetadata(
|
|
3162
|
+
static async detectHighlights(resourceId2, config, client, instructions, density) {
|
|
3163
|
+
const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
|
|
584
3164
|
if (!resource) {
|
|
585
|
-
throw new Error(`Resource ${
|
|
3165
|
+
throw new Error(`Resource ${resourceId2} not found`);
|
|
586
3166
|
}
|
|
587
|
-
const content = await this.loadResourceContent(
|
|
3167
|
+
const content = await this.loadResourceContent(resourceId2, config);
|
|
588
3168
|
if (!content) {
|
|
589
|
-
throw new Error(`Could not load content for resource ${
|
|
3169
|
+
throw new Error(`Could not load content for resource ${resourceId2}`);
|
|
590
3170
|
}
|
|
591
3171
|
const prompt = MotivationPrompts.buildHighlightPrompt(content, instructions, density);
|
|
592
|
-
const response = await
|
|
3172
|
+
const response = await client.generateText(
|
|
593
3173
|
prompt,
|
|
594
|
-
config,
|
|
595
3174
|
2e3,
|
|
596
3175
|
// maxTokens: Lower than comments/assessments (no body text)
|
|
597
3176
|
0.3
|
|
@@ -604,24 +3183,24 @@ var AnnotationDetection = class {
|
|
|
604
3183
|
*
|
|
605
3184
|
* @param resourceId - The resource to analyze
|
|
606
3185
|
* @param config - Environment configuration
|
|
3186
|
+
* @param client - Inference client for AI operations
|
|
607
3187
|
* @param instructions - Optional user instructions for assessment generation
|
|
608
3188
|
* @param tone - Optional tone guidance (e.g., "critical", "supportive")
|
|
609
3189
|
* @param density - Optional target number of assessments per 2000 words
|
|
610
3190
|
* @returns Array of validated assessment matches
|
|
611
3191
|
*/
|
|
612
|
-
static async detectAssessments(
|
|
613
|
-
const resource = await ResourceContext.getResourceMetadata(
|
|
3192
|
+
static async detectAssessments(resourceId2, config, client, instructions, tone, density) {
|
|
3193
|
+
const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
|
|
614
3194
|
if (!resource) {
|
|
615
|
-
throw new Error(`Resource ${
|
|
3195
|
+
throw new Error(`Resource ${resourceId2} not found`);
|
|
616
3196
|
}
|
|
617
|
-
const content = await this.loadResourceContent(
|
|
3197
|
+
const content = await this.loadResourceContent(resourceId2, config);
|
|
618
3198
|
if (!content) {
|
|
619
|
-
throw new Error(`Could not load content for resource ${
|
|
3199
|
+
throw new Error(`Could not load content for resource ${resourceId2}`);
|
|
620
3200
|
}
|
|
621
3201
|
const prompt = MotivationPrompts.buildAssessmentPrompt(content, instructions, tone, density);
|
|
622
|
-
const response = await
|
|
3202
|
+
const response = await client.generateText(
|
|
623
3203
|
prompt,
|
|
624
|
-
config,
|
|
625
3204
|
3e3,
|
|
626
3205
|
// maxTokens: Higher for assessment text
|
|
627
3206
|
0.3
|
|
@@ -634,12 +3213,13 @@ var AnnotationDetection = class {
|
|
|
634
3213
|
*
|
|
635
3214
|
* @param resourceId - The resource to analyze
|
|
636
3215
|
* @param config - Environment configuration
|
|
3216
|
+
* @param client - Inference client for AI operations
|
|
637
3217
|
* @param schemaId - The tag schema identifier (e.g., "irac", "imrad")
|
|
638
3218
|
* @param category - The specific category to detect
|
|
639
3219
|
* @returns Array of validated tag matches
|
|
640
3220
|
*/
|
|
641
|
-
static async detectTags(
|
|
642
|
-
const schema =
|
|
3221
|
+
static async detectTags(resourceId2, config, client, schemaId, category) {
|
|
3222
|
+
const schema = getTagSchema2(schemaId);
|
|
643
3223
|
if (!schema) {
|
|
644
3224
|
throw new Error(`Invalid tag schema: ${schemaId}`);
|
|
645
3225
|
}
|
|
@@ -647,13 +3227,13 @@ var AnnotationDetection = class {
|
|
|
647
3227
|
if (!categoryInfo) {
|
|
648
3228
|
throw new Error(`Invalid category "${category}" for schema ${schemaId}`);
|
|
649
3229
|
}
|
|
650
|
-
const resource = await ResourceContext.getResourceMetadata(
|
|
3230
|
+
const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
|
|
651
3231
|
if (!resource) {
|
|
652
|
-
throw new Error(`Resource ${
|
|
3232
|
+
throw new Error(`Resource ${resourceId2} not found`);
|
|
653
3233
|
}
|
|
654
|
-
const content = await this.loadResourceContent(
|
|
3234
|
+
const content = await this.loadResourceContent(resourceId2, config);
|
|
655
3235
|
if (!content) {
|
|
656
|
-
throw new Error(`Could not load content for resource ${
|
|
3236
|
+
throw new Error(`Could not load content for resource ${resourceId2}`);
|
|
657
3237
|
}
|
|
658
3238
|
const prompt = MotivationPrompts.buildTagPrompt(
|
|
659
3239
|
content,
|
|
@@ -664,9 +3244,8 @@ var AnnotationDetection = class {
|
|
|
664
3244
|
categoryInfo.description,
|
|
665
3245
|
categoryInfo.examples
|
|
666
3246
|
);
|
|
667
|
-
const response = await
|
|
3247
|
+
const response = await client.generateText(
|
|
668
3248
|
prompt,
|
|
669
|
-
config,
|
|
670
3249
|
4e3,
|
|
671
3250
|
// maxTokens: Higher for full document analysis
|
|
672
3251
|
0.2
|
|
@@ -683,10 +3262,10 @@ var AnnotationDetection = class {
|
|
|
683
3262
|
* @param config - Environment configuration
|
|
684
3263
|
* @returns Resource content as string, or null if not available
|
|
685
3264
|
*/
|
|
686
|
-
static async loadResourceContent(
|
|
687
|
-
const resource = await ResourceContext.getResourceMetadata(
|
|
3265
|
+
static async loadResourceContent(resourceId2, config) {
|
|
3266
|
+
const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
|
|
688
3267
|
if (!resource) return null;
|
|
689
|
-
const primaryRep =
|
|
3268
|
+
const primaryRep = getPrimaryRepresentation4(resource);
|
|
690
3269
|
if (!primaryRep) return null;
|
|
691
3270
|
const baseMediaType = primaryRep.mediaType?.split(";")[0]?.trim() || "";
|
|
692
3271
|
if (baseMediaType !== "text/plain" && baseMediaType !== "text/markdown") {
|
|
@@ -695,9 +3274,9 @@ var AnnotationDetection = class {
|
|
|
695
3274
|
if (!primaryRep.checksum || !primaryRep.mediaType) return null;
|
|
696
3275
|
const basePath = config.services.filesystem.path;
|
|
697
3276
|
const projectRoot = config._metadata?.projectRoot;
|
|
698
|
-
const repStore = new
|
|
3277
|
+
const repStore = new FilesystemRepresentationStore6({ basePath }, projectRoot);
|
|
699
3278
|
const contentBuffer = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
|
|
700
|
-
return
|
|
3279
|
+
return decodeRepresentation4(contentBuffer, primaryRep.mediaType);
|
|
701
3280
|
}
|
|
702
3281
|
};
|
|
703
3282
|
|
|
@@ -707,9 +3286,23 @@ var VERSION = "0.1.0";
|
|
|
707
3286
|
export {
|
|
708
3287
|
AnnotationContext,
|
|
709
3288
|
AnnotationDetection,
|
|
3289
|
+
AssessmentDetectionWorker,
|
|
3290
|
+
CommentDetectionWorker,
|
|
3291
|
+
GenerationWorker,
|
|
710
3292
|
GraphContext,
|
|
3293
|
+
GraphDBConsumer,
|
|
3294
|
+
HighlightDetectionWorker,
|
|
3295
|
+
MotivationParsers,
|
|
3296
|
+
MotivationPrompts,
|
|
711
3297
|
PACKAGE_NAME,
|
|
3298
|
+
ReferenceDetectionWorker,
|
|
712
3299
|
ResourceContext,
|
|
713
|
-
|
|
3300
|
+
TagDetectionWorker,
|
|
3301
|
+
VERSION,
|
|
3302
|
+
extractEntities,
|
|
3303
|
+
generateReferenceSuggestions,
|
|
3304
|
+
generateResourceFromTopic,
|
|
3305
|
+
generateResourceSummary,
|
|
3306
|
+
startMakeMeaning
|
|
714
3307
|
};
|
|
715
3308
|
//# sourceMappingURL=index.js.map
|