@semiont/make-meaning 0.2.30-build.61 → 0.2.30-build.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,727 +1,1230 @@
1
- // src/resource-context.ts
2
- import { FilesystemViewStorage } from "@semiont/event-sourcing";
1
+ // src/service.ts
2
+ import * as path from "path";
3
+ import { JobQueue } from "@semiont/jobs";
4
+ import { createEventStore as createEventStoreCore } from "@semiont/event-sourcing";
5
+ import { FilesystemRepresentationStore as FilesystemRepresentationStore3 } from "@semiont/content";
6
+ import { resourceId as makeResourceId2 } from "@semiont/core";
7
+ import { getInferenceClient } from "@semiont/inference";
8
+ import { getGraphDatabase } from "@semiont/graph";
9
+
10
+ // src/jobs/reference-detection-worker.ts
11
+ import { JobWorker } from "@semiont/jobs";
12
+ import { generateAnnotationId } from "@semiont/event-sourcing";
13
+ import { resourceIdToURI } from "@semiont/core";
14
+ import {
15
+ getPrimaryRepresentation,
16
+ decodeRepresentation,
17
+ validateAndCorrectOffsets
18
+ } from "@semiont/api-client";
19
+
20
+ // src/detection/entity-extractor.ts
21
+ async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences = false) {
22
+ console.log("extractEntities called with:", {
23
+ textLength: exact.length,
24
+ entityTypes: Array.isArray(entityTypes) ? entityTypes.map((et) => typeof et === "string" ? et : et.type) : []
25
+ });
26
+ const entityTypesDescription = entityTypes.map((et) => {
27
+ if (typeof et === "string") {
28
+ return et;
29
+ }
30
+ return et.examples && et.examples.length > 0 ? `${et.type} (examples: ${et.examples.slice(0, 3).join(", ")})` : et.type;
31
+ }).join(", ");
32
+ const descriptiveReferenceGuidance = includeDescriptiveReferences ? `
33
+ Include both:
34
+ - Direct mentions (names, proper nouns)
35
+ - Descriptive references (substantive phrases that refer to entities)
36
+
37
+ For descriptive references, include:
38
+ - Definite descriptions: "the Nobel laureate", "the tech giant", "the former president"
39
+ - Role-based references: "the CEO", "the physicist", "the author", "the owner", "the contractor"
40
+ - Epithets with context: "the Cupertino-based company", "the iPhone maker"
41
+ - References to entities even when identity is unknown or unspecified
42
+
43
+ Do NOT include:
44
+ - Simple pronouns alone: he, she, it, they, him, her, them
45
+ - Generic determiners alone: this, that, these, those
46
+ - Possessives without substance: his, her, their, its
47
+
48
+ Examples:
49
+ - For "Marie Curie", include "the Nobel laureate" and "the physicist" but NOT "she"
50
+ - For an unknown person, include "the owner" or "the contractor" (role-based references count even when identity is unspecified)
51
+ ` : `
52
+ Find direct mentions only (names, proper nouns). Do not include pronouns or descriptive references.
53
+ `;
54
+ const prompt = `Identify entity references in the following text. Look for mentions of: ${entityTypesDescription}.
55
+ ${descriptiveReferenceGuidance}
56
+ Text to analyze:
57
+ """
58
+ ${exact}
59
+ """
60
+
61
+ Return ONLY a JSON array of entities found. Each entity should have:
62
+ - exact: the exact text span from the input
63
+ - entityType: one of the provided entity types
64
+ - startOffset: character position where the entity starts (0-indexed)
65
+ - endOffset: character position where the entity ends
66
+ - prefix: up to 32 characters of text immediately before the entity (helps identify correct occurrence)
67
+ - suffix: up to 32 characters of text immediately after the entity (helps identify correct occurrence)
68
+
69
+ Return empty array [] if no entities found.
70
+ Do not include markdown formatting or code fences, just the raw JSON array.
71
+
72
+ Example output:
73
+ [{"exact":"Alice","entityType":"Person","startOffset":0,"endOffset":5,"prefix":"","suffix":" went to"},{"exact":"Paris","entityType":"Location","startOffset":20,"endOffset":25,"prefix":"went to ","suffix":" yesterday"}]`;
74
+ console.log("Sending entity extraction request");
75
+ const response = await client.generateTextWithMetadata(
76
+ prompt,
77
+ 4e3,
78
+ // Increased to handle many entities without truncation
79
+ 0.3
80
+ // Lower temperature for more consistent extraction
81
+ );
82
+ console.log("Got entity extraction response");
83
+ console.log("Entity extraction raw response length:", response.text.length);
84
+ try {
85
+ let jsonStr = response.text.trim();
86
+ if (jsonStr.startsWith("```")) {
87
+ jsonStr = jsonStr.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
88
+ }
89
+ const entities = JSON.parse(jsonStr);
90
+ console.log("Parsed", entities.length, "entities from response");
91
+ if (response.stopReason === "max_tokens") {
92
+ const errorMsg = `AI response truncated: Found ${entities.length} entities but response hit max_tokens limit. Increase max_tokens or reduce resource size.`;
93
+ console.error(`\u274C ${errorMsg}`);
94
+ throw new Error(errorMsg);
95
+ }
96
+ return entities.map((entity, idx) => {
97
+ let startOffset = entity.startOffset;
98
+ let endOffset = entity.endOffset;
99
+ console.log(`
100
+ [Entity ${idx + 1}/${entities.length}]`);
101
+ console.log(` Type: ${entity.entityType}`);
102
+ console.log(` Text: "${entity.exact}"`);
103
+ console.log(` Offsets from AI: [${startOffset}, ${endOffset}]`);
104
+ const extractedText = exact.substring(startOffset, endOffset);
105
+ if (extractedText !== entity.exact) {
106
+ console.log(` \u26A0\uFE0F Offset mismatch!`);
107
+ console.log(` Expected: "${entity.exact}"`);
108
+ console.log(` Found at AI offsets [${startOffset}:${endOffset}]: "${extractedText}"`);
109
+ const contextStart = Math.max(0, startOffset - 50);
110
+ const contextEnd = Math.min(exact.length, endOffset + 50);
111
+ const contextBefore = exact.substring(contextStart, startOffset);
112
+ const contextAfter = exact.substring(endOffset, contextEnd);
113
+ console.log(` Context: "...${contextBefore}[${extractedText}]${contextAfter}..."`);
114
+ console.log(` Searching for exact match in resource...`);
115
+ let found = false;
116
+ if (entity.prefix || entity.suffix) {
117
+ console.log(` Using LLM-provided context for disambiguation:`);
118
+ if (entity.prefix) console.log(` Prefix: "${entity.prefix}"`);
119
+ if (entity.suffix) console.log(` Suffix: "${entity.suffix}"`);
120
+ let searchPos = 0;
121
+ while ((searchPos = exact.indexOf(entity.exact, searchPos)) !== -1) {
122
+ const candidatePrefix = exact.substring(Math.max(0, searchPos - 32), searchPos);
123
+ const candidateSuffix = exact.substring(
124
+ searchPos + entity.exact.length,
125
+ Math.min(exact.length, searchPos + entity.exact.length + 32)
126
+ );
127
+ const prefixMatch = !entity.prefix || candidatePrefix.endsWith(entity.prefix);
128
+ const suffixMatch = !entity.suffix || candidateSuffix.startsWith(entity.suffix);
129
+ if (prefixMatch && suffixMatch) {
130
+ console.log(` \u2705 Found match using context at offset ${searchPos} (diff: ${searchPos - startOffset})`);
131
+ console.log(` Candidate prefix: "${candidatePrefix}"`);
132
+ console.log(` Candidate suffix: "${candidateSuffix}"`);
133
+ startOffset = searchPos;
134
+ endOffset = searchPos + entity.exact.length;
135
+ found = true;
136
+ break;
137
+ }
138
+ searchPos++;
139
+ }
140
+ if (!found) {
141
+ console.log(` \u26A0\uFE0F No occurrence found with matching context`);
142
+ }
143
+ }
144
+ if (!found) {
145
+ const index = exact.indexOf(entity.exact);
146
+ if (index !== -1) {
147
+ console.log(` \u26A0\uFE0F Using first occurrence at offset ${index} (diff: ${index - startOffset})`);
148
+ startOffset = index;
149
+ endOffset = index + entity.exact.length;
150
+ } else {
151
+ console.log(` \u274C Cannot find "${entity.exact}" anywhere in resource`);
152
+ console.log(` Resource starts with: "${exact.substring(0, 200)}..."`);
153
+ return null;
154
+ }
155
+ }
156
+ } else {
157
+ console.log(` \u2705 Offsets correct`);
158
+ }
159
+ return {
160
+ exact: entity.exact,
161
+ entityType: entity.entityType,
162
+ startOffset,
163
+ endOffset,
164
+ prefix: entity.prefix,
165
+ suffix: entity.suffix
166
+ };
167
+ }).filter((entity) => {
168
+ if (entity === null) {
169
+ console.log("\u274C Filtered entity: null");
170
+ return false;
171
+ }
172
+ if (entity.startOffset === void 0 || entity.endOffset === void 0) {
173
+ console.log(`\u274C Filtered entity "${entity.exact}": missing offsets`);
174
+ return false;
175
+ }
176
+ if (entity.startOffset < 0) {
177
+ console.log(`\u274C Filtered entity "${entity.exact}": negative startOffset (${entity.startOffset})`);
178
+ return false;
179
+ }
180
+ if (entity.endOffset > exact.length) {
181
+ console.log(`\u274C Filtered entity "${entity.exact}": endOffset (${entity.endOffset}) > text length (${exact.length})`);
182
+ return false;
183
+ }
184
+ const extractedText = exact.substring(entity.startOffset, entity.endOffset);
185
+ if (extractedText !== entity.exact) {
186
+ console.log(`\u274C Filtered entity "${entity.exact}": offset mismatch`);
187
+ console.log(` Expected: "${entity.exact}"`);
188
+ console.log(` Got at [${entity.startOffset}:${entity.endOffset}]: "${extractedText}"`);
189
+ return false;
190
+ }
191
+ console.log(`\u2705 Accepted entity "${entity.exact}" at [${entity.startOffset}:${entity.endOffset}]`);
192
+ return true;
193
+ });
194
+ } catch (error) {
195
+ console.error("Failed to parse entity extraction response:", error);
196
+ return [];
197
+ }
198
+ }
199
+
200
+ // src/jobs/reference-detection-worker.ts
3
201
  import { FilesystemRepresentationStore } from "@semiont/content";
4
- import { getPrimaryRepresentation, decodeRepresentation } from "@semiont/api-client";
5
- var ResourceContext = class {
6
- /**
7
- * Get resource metadata from view storage
8
- */
9
- static async getResourceMetadata(resourceId2, config) {
10
- const basePath = config.services.filesystem.path;
11
- const projectRoot = config._metadata?.projectRoot;
12
- const viewStorage = new FilesystemViewStorage(basePath, projectRoot);
13
- const view = await viewStorage.get(resourceId2);
14
- if (!view) {
15
- return null;
202
+ var ReferenceDetectionWorker = class extends JobWorker {
203
+ constructor(jobQueue, config, eventStore, inferenceClient) {
204
+ super(jobQueue);
205
+ this.config = config;
206
+ this.eventStore = eventStore;
207
+ this.inferenceClient = inferenceClient;
208
+ }
209
+ getWorkerName() {
210
+ return "ReferenceDetectionWorker";
211
+ }
212
+ canProcessJob(job) {
213
+ return job.metadata.type === "detection";
214
+ }
215
+ async executeJob(job) {
216
+ if (job.metadata.type !== "detection") {
217
+ throw new Error(`Invalid job type: ${job.metadata.type}`);
16
218
  }
17
- return view.resource;
219
+ if (job.status !== "running") {
220
+ throw new Error(`Job must be in running state to execute, got: ${job.status}`);
221
+ }
222
+ await this.processDetectionJob(job);
18
223
  }
19
224
  /**
20
- * List all resources by scanning view storage
225
+ * Detect entity references in resource using AI
226
+ * Self-contained implementation for reference detection
227
+ *
228
+ * Public for testing charset handling - see entity-detection-charset.test.ts
21
229
  */
22
- static async listResources(filters, config) {
23
- const basePath = config.services.filesystem.path;
24
- const projectRoot = config._metadata?.projectRoot;
25
- const viewStorage = new FilesystemViewStorage(basePath, projectRoot);
26
- const allViews = await viewStorage.getAll();
27
- const resources = [];
28
- for (const view of allViews) {
29
- const doc = view.resource;
30
- if (filters?.archived !== void 0 && doc.archived !== filters.archived) {
31
- continue;
32
- }
33
- if (filters?.search) {
34
- const searchLower = filters.search.toLowerCase();
35
- if (!doc.name.toLowerCase().includes(searchLower)) {
36
- continue;
230
+ async detectReferences(resource, entityTypes, includeDescriptiveReferences = false) {
231
+ console.log(`Detecting entities of types: ${entityTypes.join(", ")}${includeDescriptiveReferences ? " (including descriptive references)" : ""}`);
232
+ const detectedAnnotations = [];
233
+ const primaryRep = getPrimaryRepresentation(resource);
234
+ if (!primaryRep) return detectedAnnotations;
235
+ const mediaType = primaryRep.mediaType;
236
+ const baseMediaType = mediaType?.split(";")[0]?.trim() || "";
237
+ if (baseMediaType === "text/plain" || baseMediaType === "text/markdown") {
238
+ if (!primaryRep.checksum || !primaryRep.mediaType) return detectedAnnotations;
239
+ const basePath = this.config.services.filesystem.path;
240
+ const projectRoot = this.config._metadata?.projectRoot;
241
+ const repStore = new FilesystemRepresentationStore({ basePath }, projectRoot);
242
+ const contentBuffer = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
243
+ const content = decodeRepresentation(contentBuffer, primaryRep.mediaType);
244
+ const extractedEntities = await extractEntities(content, entityTypes, this.inferenceClient, includeDescriptiveReferences);
245
+ for (const entity of extractedEntities) {
246
+ try {
247
+ const validated = validateAndCorrectOffsets(
248
+ content,
249
+ entity.startOffset,
250
+ entity.endOffset,
251
+ entity.exact
252
+ );
253
+ const annotation = {
254
+ annotation: {
255
+ selector: {
256
+ start: validated.start,
257
+ end: validated.end,
258
+ exact: validated.exact,
259
+ prefix: validated.prefix,
260
+ suffix: validated.suffix
261
+ },
262
+ entityTypes: [entity.entityType]
263
+ }
264
+ };
265
+ detectedAnnotations.push(annotation);
266
+ } catch (error) {
267
+ console.warn(`[ReferenceDetectionWorker] Skipping invalid entity "${entity.exact}":`, error);
37
268
  }
38
269
  }
39
- resources.push(doc);
40
270
  }
41
- resources.sort((a, b) => {
42
- const aTime = a.dateCreated ? new Date(a.dateCreated).getTime() : 0;
43
- const bTime = b.dateCreated ? new Date(b.dateCreated).getTime() : 0;
44
- return bTime - aTime;
45
- });
46
- return resources;
271
+ return detectedAnnotations;
47
272
  }
48
- /**
49
- * Add content previews to resources (for search results)
50
- * Retrieves and decodes the first 200 characters of each resource's primary representation
51
- */
52
- static async addContentPreviews(resources, config) {
53
- const basePath = config.services.filesystem.path;
54
- const projectRoot = config._metadata?.projectRoot;
55
- const repStore = new FilesystemRepresentationStore({ basePath }, projectRoot);
56
- return await Promise.all(
57
- resources.map(async (doc) => {
273
+ async processDetectionJob(job) {
274
+ console.log(`[ReferenceDetectionWorker] Processing detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
275
+ console.log(`[ReferenceDetectionWorker] \u{1F50D} Entity types: ${job.params.entityTypes.join(", ")}`);
276
+ const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
277
+ if (!resource) {
278
+ throw new Error(`Resource ${job.params.resourceId} not found`);
279
+ }
280
+ let totalFound = 0;
281
+ let totalEmitted = 0;
282
+ let totalErrors = 0;
283
+ let updatedJob = {
284
+ ...job,
285
+ progress: {
286
+ totalEntityTypes: job.params.entityTypes.length,
287
+ processedEntityTypes: 0,
288
+ entitiesFound: 0,
289
+ entitiesEmitted: 0
290
+ }
291
+ };
292
+ await this.updateJobProgress(updatedJob);
293
+ for (let i = 0; i < job.params.entityTypes.length; i++) {
294
+ const entityType = job.params.entityTypes[i];
295
+ if (!entityType) continue;
296
+ console.log(`[ReferenceDetectionWorker] \u{1F916} [${i + 1}/${job.params.entityTypes.length}] Detecting ${entityType}...`);
297
+ const detectedAnnotations = await this.detectReferences(resource, [entityType], job.params.includeDescriptiveReferences);
298
+ totalFound += detectedAnnotations.length;
299
+ console.log(`[ReferenceDetectionWorker] \u2705 Found ${detectedAnnotations.length} ${entityType} entities`);
300
+ for (let idx = 0; idx < detectedAnnotations.length; idx++) {
301
+ const detected = detectedAnnotations[idx];
302
+ if (!detected) {
303
+ console.warn(`[ReferenceDetectionWorker] Skipping undefined entity at index ${idx}`);
304
+ continue;
305
+ }
306
+ let referenceId;
58
307
  try {
59
- const primaryRep = getPrimaryRepresentation(doc);
60
- if (primaryRep?.checksum && primaryRep?.mediaType) {
61
- const contentBuffer = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
62
- const contentPreview = decodeRepresentation(contentBuffer, primaryRep.mediaType).slice(0, 200);
63
- return { ...doc, content: contentPreview };
308
+ const backendUrl = this.config.services.backend?.publicURL;
309
+ if (!backendUrl) {
310
+ throw new Error("Backend publicURL not configured");
64
311
  }
65
- return { ...doc, content: "" };
66
- } catch {
67
- return { ...doc, content: "" };
312
+ referenceId = generateAnnotationId(backendUrl);
313
+ } catch (error) {
314
+ console.error(`[ReferenceDetectionWorker] Failed to generate annotation ID:`, error);
315
+ throw new Error("Configuration error: Backend publicURL not set");
68
316
  }
69
- })
70
- );
317
+ try {
318
+ await this.eventStore.appendEvent({
319
+ type: "annotation.added",
320
+ resourceId: job.params.resourceId,
321
+ userId: job.metadata.userId,
322
+ version: 1,
323
+ payload: {
324
+ annotation: {
325
+ "@context": "http://www.w3.org/ns/anno.jsonld",
326
+ "type": "Annotation",
327
+ id: referenceId,
328
+ motivation: "linking",
329
+ target: {
330
+ source: resourceIdToURI(job.params.resourceId, this.config.services.backend.publicURL),
331
+ // Convert to full URI
332
+ selector: [
333
+ {
334
+ type: "TextPositionSelector",
335
+ start: detected.annotation.selector.start,
336
+ end: detected.annotation.selector.end
337
+ },
338
+ {
339
+ type: "TextQuoteSelector",
340
+ exact: detected.annotation.selector.exact,
341
+ ...detected.annotation.selector.prefix && { prefix: detected.annotation.selector.prefix },
342
+ ...detected.annotation.selector.suffix && { suffix: detected.annotation.selector.suffix }
343
+ }
344
+ ]
345
+ },
346
+ body: (detected.annotation.entityTypes || []).map((et) => ({
347
+ type: "TextualBody",
348
+ value: et,
349
+ purpose: "tagging"
350
+ })),
351
+ modified: (/* @__PURE__ */ new Date()).toISOString()
352
+ }
353
+ }
354
+ });
355
+ totalEmitted++;
356
+ if ((idx + 1) % 10 === 0 || idx === detectedAnnotations.length - 1) {
357
+ console.log(`[ReferenceDetectionWorker] \u{1F4E4} Emitted ${idx + 1}/${detectedAnnotations.length} events for ${entityType}`);
358
+ }
359
+ } catch (error) {
360
+ totalErrors++;
361
+ console.error(`[ReferenceDetectionWorker] \u274C Failed to emit event for ${referenceId}:`, error);
362
+ }
363
+ }
364
+ console.log(`[ReferenceDetectionWorker] \u2705 Completed ${entityType}: ${detectedAnnotations.length} found, ${detectedAnnotations.length - (totalErrors - (totalFound - totalEmitted))} emitted`);
365
+ updatedJob = {
366
+ ...updatedJob,
367
+ progress: {
368
+ totalEntityTypes: job.params.entityTypes.length,
369
+ processedEntityTypes: i + 1,
370
+ currentEntityType: entityType,
371
+ entitiesFound: totalFound,
372
+ entitiesEmitted: totalEmitted
373
+ }
374
+ };
375
+ await this.updateJobProgress(updatedJob);
376
+ }
377
+ console.log(`[ReferenceDetectionWorker] \u2705 Detection complete: ${totalFound} entities found, ${totalEmitted} events emitted, ${totalErrors} errors`);
378
+ }
379
+ async handleJobFailure(job, error) {
380
+ await super.handleJobFailure(job, error);
381
+ if (job.status === "failed" && job.metadata.type === "detection") {
382
+ const detJob = job;
383
+ await this.eventStore.appendEvent({
384
+ type: "job.failed",
385
+ resourceId: detJob.params.resourceId,
386
+ userId: detJob.metadata.userId,
387
+ version: 1,
388
+ payload: {
389
+ jobId: detJob.metadata.id,
390
+ jobType: detJob.metadata.type,
391
+ error: "Entity detection failed. Please try again later."
392
+ }
393
+ });
394
+ }
71
395
  }
72
- };
73
-
74
- // src/annotation-context.ts
75
- import { generateResourceSummary, generateText } from "@semiont/inference";
76
- import {
77
- getBodySource,
78
- getTargetSource,
79
- getTargetSelector,
80
- getResourceEntityTypes,
81
- getTextPositionSelector,
82
- getPrimaryRepresentation as getPrimaryRepresentation2,
83
- decodeRepresentation as decodeRepresentation2
84
- } from "@semiont/api-client";
85
- import { FilesystemRepresentationStore as FilesystemRepresentationStore2 } from "@semiont/content";
86
- import { FilesystemViewStorage as FilesystemViewStorage2 } from "@semiont/event-sourcing";
87
- import { resourceId as createResourceId, uriToResourceId } from "@semiont/core";
88
- import { getEntityTypes } from "@semiont/ontology";
89
- var AnnotationContext = class {
90
396
  /**
91
- * Build LLM context for an annotation
92
- *
93
- * @param annotationUri - Full annotation URI (e.g., http://localhost:4000/annotations/abc123)
94
- * @param resourceId - Source resource ID
95
- * @param config - Application configuration
96
- * @param options - Context building options
97
- * @returns Rich context for LLM processing
98
- * @throws Error if annotation or resource not found
397
+ * Update job progress and emit events to Event Store
398
+ * Overrides base class to also emit job progress events
99
399
  */
100
- static async buildLLMContext(annotationUri2, resourceId2, config, options = {}) {
101
- const {
102
- includeSourceContext = true,
103
- includeTargetContext = true,
104
- contextWindow = 1e3
105
- } = options;
106
- if (contextWindow < 100 || contextWindow > 5e3) {
107
- throw new Error("contextWindow must be between 100 and 5000");
108
- }
109
- console.log(`[AnnotationContext] buildLLMContext called with annotationUri=${annotationUri2}, resourceId=${resourceId2}`);
110
- const basePath = config.services.filesystem.path;
111
- console.log(`[AnnotationContext] basePath=${basePath}`);
112
- const projectRoot = config._metadata?.projectRoot;
113
- const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
114
- const repStore = new FilesystemRepresentationStore2({ basePath }, projectRoot);
115
- console.log(`[AnnotationContext] Getting view for resourceId=${resourceId2}`);
116
- let sourceView;
117
- try {
118
- sourceView = await viewStorage.get(resourceId2);
119
- console.log(`[AnnotationContext] Got view:`, !!sourceView);
120
- if (!sourceView) {
121
- throw new Error("Source resource not found");
122
- }
123
- } catch (error) {
124
- console.error(`[AnnotationContext] Error getting view:`, error);
125
- throw error;
126
- }
127
- console.log(`[AnnotationContext] Looking for annotation ${annotationUri2} in resource ${resourceId2}`);
128
- console.log(`[AnnotationContext] View has ${sourceView.annotations.annotations.length} annotations`);
129
- console.log(`[AnnotationContext] First 5 annotation IDs:`, sourceView.annotations.annotations.slice(0, 5).map((a) => a.id));
130
- const annotation = sourceView.annotations.annotations.find((a) => a.id === annotationUri2);
131
- console.log(`[AnnotationContext] Found annotation:`, !!annotation);
132
- if (!annotation) {
133
- throw new Error("Annotation not found in view");
134
- }
135
- const targetSource = getTargetSource(annotation.target);
136
- const targetResourceId = targetSource.split("/").pop();
137
- console.log(`[AnnotationContext] Target source: ${targetSource}, Expected resource ID: ${resourceId2}, Extracted ID: ${targetResourceId}`);
138
- if (targetResourceId !== resourceId2) {
139
- throw new Error(`Annotation target resource ID (${targetResourceId}) does not match expected resource ID (${resourceId2})`);
400
+ async updateJobProgress(job) {
401
+ await super.updateJobProgress(job);
402
+ if (job.metadata.type !== "detection") {
403
+ return;
140
404
  }
141
- const sourceDoc = sourceView.resource;
142
- const bodySource = getBodySource(annotation.body);
143
- let targetDoc = null;
144
- if (bodySource) {
145
- const parts = bodySource.split("/");
146
- const lastPart = parts[parts.length - 1];
147
- if (!lastPart) {
148
- throw new Error(`Invalid body source URI: ${bodySource}`);
149
- }
150
- const targetResourceId2 = createResourceId(lastPart);
151
- const targetView = await viewStorage.get(targetResourceId2);
152
- targetDoc = targetView?.resource || null;
405
+ if (job.status !== "running") {
406
+ return;
153
407
  }
154
- let sourceContext;
155
- if (includeSourceContext) {
156
- const primaryRep = getPrimaryRepresentation2(sourceDoc);
157
- if (!primaryRep?.checksum || !primaryRep?.mediaType) {
158
- throw new Error("Source content not found");
159
- }
160
- const sourceContent = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
161
- const contentStr = decodeRepresentation2(sourceContent, primaryRep.mediaType);
162
- const targetSelectorRaw = getTargetSelector(annotation.target);
163
- const targetSelector = Array.isArray(targetSelectorRaw) ? targetSelectorRaw[0] : targetSelectorRaw;
164
- console.log(`[AnnotationContext] Target selector type:`, targetSelector?.type);
165
- if (!targetSelector) {
166
- console.warn(`[AnnotationContext] No target selector found`);
167
- } else if (targetSelector.type === "TextPositionSelector") {
168
- const selector = targetSelector;
169
- const start = selector.start;
170
- const end = selector.end;
171
- const before = contentStr.slice(Math.max(0, start - contextWindow), start);
172
- const selected = contentStr.slice(start, end);
173
- const after = contentStr.slice(end, Math.min(contentStr.length, end + contextWindow));
174
- sourceContext = { before, selected, after };
175
- console.log(`[AnnotationContext] Built source context using TextPositionSelector (${start}-${end})`);
176
- } else if (targetSelector.type === "TextQuoteSelector") {
177
- const selector = targetSelector;
178
- const exact = selector.exact;
179
- const index = contentStr.indexOf(exact);
180
- if (index !== -1) {
181
- const start = index;
182
- const end = index + exact.length;
183
- const before = contentStr.slice(Math.max(0, start - contextWindow), start);
184
- const selected = exact;
185
- const after = contentStr.slice(end, Math.min(contentStr.length, end + contextWindow));
186
- sourceContext = { before, selected, after };
187
- console.log(`[AnnotationContext] Built source context using TextQuoteSelector (found at ${index})`);
188
- } else {
189
- console.warn(`[AnnotationContext] TextQuoteSelector exact text not found in content: "${exact.substring(0, 50)}..."`);
408
+ const detJob = job;
409
+ const baseEvent = {
410
+ resourceId: detJob.params.resourceId,
411
+ userId: detJob.metadata.userId,
412
+ version: 1
413
+ };
414
+ const isFirstUpdate = detJob.progress.processedEntityTypes === 0;
415
+ const isFinalUpdate = detJob.progress.processedEntityTypes === detJob.progress.totalEntityTypes && detJob.progress.totalEntityTypes > 0;
416
+ if (isFirstUpdate) {
417
+ await this.eventStore.appendEvent({
418
+ type: "job.started",
419
+ ...baseEvent,
420
+ payload: {
421
+ jobId: detJob.metadata.id,
422
+ jobType: detJob.metadata.type,
423
+ totalSteps: detJob.params.entityTypes.length
190
424
  }
191
- } else {
192
- console.warn(`[AnnotationContext] Unknown selector type: ${targetSelector.type}`);
193
- }
425
+ });
426
+ } else if (isFinalUpdate) {
427
+ await this.eventStore.appendEvent({
428
+ type: "job.completed",
429
+ ...baseEvent,
430
+ payload: {
431
+ jobId: detJob.metadata.id,
432
+ jobType: detJob.metadata.type,
433
+ foundCount: detJob.progress.entitiesFound
434
+ }
435
+ });
436
+ } else {
437
+ const percentage = Math.round(detJob.progress.processedEntityTypes / detJob.progress.totalEntityTypes * 100);
438
+ await this.eventStore.appendEvent({
439
+ type: "job.progress",
440
+ ...baseEvent,
441
+ payload: {
442
+ jobId: detJob.metadata.id,
443
+ jobType: detJob.metadata.type,
444
+ percentage,
445
+ currentStep: detJob.progress.currentEntityType,
446
+ processedSteps: detJob.progress.processedEntityTypes,
447
+ totalSteps: detJob.progress.totalEntityTypes,
448
+ foundCount: detJob.progress.entitiesFound
449
+ }
450
+ });
194
451
  }
195
- let targetContext;
196
- if (includeTargetContext && targetDoc) {
197
- const targetRep = getPrimaryRepresentation2(targetDoc);
198
- if (targetRep?.checksum && targetRep?.mediaType) {
199
- const targetContent = await repStore.retrieve(targetRep.checksum, targetRep.mediaType);
200
- const contentStr = decodeRepresentation2(targetContent, targetRep.mediaType);
201
- targetContext = {
202
- content: contentStr.slice(0, contextWindow * 2),
203
- summary: await generateResourceSummary(targetDoc.name, contentStr, getResourceEntityTypes(targetDoc), config)
204
- };
452
+ }
453
+ };
454
+
455
+ // src/jobs/generation-worker.ts
456
+ import { JobWorker as JobWorker2 } from "@semiont/jobs";
457
+ import { FilesystemRepresentationStore as FilesystemRepresentationStore2 } from "@semiont/content";
458
+
459
+ // src/generation/resource-generation.ts
460
+ import { getLocaleEnglishName } from "@semiont/api-client";
461
+ function getLanguageName(locale) {
462
+ return getLocaleEnglishName(locale) || locale;
463
+ }
464
+ async function generateResourceFromTopic(topic, entityTypes, client, userPrompt, locale, context, temperature, maxTokens) {
465
+ console.log("generateResourceFromTopic called with:", {
466
+ topic: topic.substring(0, 100),
467
+ entityTypes,
468
+ hasUserPrompt: !!userPrompt,
469
+ locale,
470
+ hasContext: !!context,
471
+ temperature,
472
+ maxTokens
473
+ });
474
+ const finalTemperature = temperature ?? 0.7;
475
+ const finalMaxTokens = maxTokens ?? 500;
476
+ const languageInstruction = locale && locale !== "en" ? `
477
+
478
+ IMPORTANT: Write the entire resource in ${getLanguageName(locale)}.` : "";
479
+ let contextSection = "";
480
+ if (context?.sourceContext) {
481
+ const { before, selected, after } = context.sourceContext;
482
+ contextSection = `
483
+
484
+ Source document context:
485
+ ---
486
+ ${before ? `...${before}` : ""}
487
+ **[${selected}]**
488
+ ${after ? `${after}...` : ""}
489
+ ---
490
+ `;
491
+ }
492
+ const prompt = `Generate a concise, informative resource about "${topic}".
493
+ ${entityTypes.length > 0 ? `Focus on these entity types: ${entityTypes.join(", ")}.` : ""}
494
+ ${userPrompt ? `Additional context: ${userPrompt}` : ""}${contextSection}${languageInstruction}
495
+
496
+ Requirements:
497
+ - Start with a clear heading (# Title)
498
+ - Write 2-3 paragraphs of substantive content
499
+ - Be factual and informative
500
+ - Use markdown formatting
501
+ - Return ONLY the markdown content, no JSON, no code fences, no additional wrapper`;
502
+ const parseResponse = (response2) => {
503
+ let content = response2.trim();
504
+ if (content.startsWith("```markdown") || content.startsWith("```md")) {
505
+ content = content.slice(content.indexOf("\n") + 1);
506
+ const endIndex = content.lastIndexOf("```");
507
+ if (endIndex !== -1) {
508
+ content = content.slice(0, endIndex);
205
509
  }
206
- }
207
- const suggestedResolution = void 0;
208
- const generationContext = sourceContext ? {
209
- sourceContext: {
210
- before: sourceContext.before || "",
211
- selected: sourceContext.selected,
212
- after: sourceContext.after || ""
213
- },
214
- metadata: {
215
- resourceType: "document",
216
- language: sourceDoc.language,
217
- entityTypes: getEntityTypes(annotation)
510
+ } else if (content.startsWith("```")) {
511
+ content = content.slice(3);
512
+ const endIndex = content.lastIndexOf("```");
513
+ if (endIndex !== -1) {
514
+ content = content.slice(0, endIndex);
218
515
  }
219
- } : void 0;
220
- const response = {
221
- annotation,
222
- sourceResource: sourceDoc,
223
- targetResource: targetDoc,
224
- ...generationContext ? { context: generationContext } : {},
225
- ...sourceContext ? { sourceContext } : {},
226
- // Keep for backward compatibility
227
- ...targetContext ? { targetContext } : {},
228
- ...suggestedResolution ? { suggestedResolution } : {}
516
+ }
517
+ content = content.trim();
518
+ return {
519
+ title: topic,
520
+ content
229
521
  };
230
- return response;
522
+ };
523
+ console.log("Sending prompt to inference (length:", prompt.length, "chars)", "temp:", finalTemperature, "maxTokens:", finalMaxTokens);
524
+ const response = await client.generateText(prompt, finalMaxTokens, finalTemperature);
525
+ console.log("Got raw response (length:", response.length, "chars)");
526
+ const result = parseResponse(response);
527
+ console.log("Parsed result:", {
528
+ hasTitle: !!result.title,
529
+ titleLength: result.title?.length,
530
+ hasContent: !!result.content,
531
+ contentLength: result.content?.length
532
+ });
533
+ return result;
534
+ }
535
+ async function generateResourceSummary(resourceName, content, entityTypes, client) {
536
+ const truncatedContent = content.length > 2e3 ? content.substring(0, 2e3) + "..." : content;
537
+ const prompt = `Create a brief, intelligent summary of this resource titled "${resourceName}".
538
+ ${entityTypes.length > 0 ? `Key entity types: ${entityTypes.join(", ")}` : ""}
539
+
540
+ Resource content:
541
+ ${truncatedContent}
542
+
543
+ Write a 2-3 sentence summary that captures the key points and would help someone understand what this resource contains.`;
544
+ return await client.generateText(prompt, 150, 0.5);
545
+ }
546
+ async function generateReferenceSuggestions(referenceTitle, client, entityType, currentContent) {
547
+ const prompt = `For a reference titled "${referenceTitle}"${entityType ? ` (type: ${entityType})` : ""}${currentContent ? ` with current stub: "${currentContent}"` : ""}, suggest 3 specific, actionable next steps or related topics to explore.
548
+
549
+ Format as a simple list, one suggestion per line.`;
550
+ const response = await client.generateText(prompt, 200, 0.8);
551
+ if (!response) {
552
+ return null;
231
553
  }
232
- /**
233
- * Get resource annotations from view storage (fast path)
234
- * Throws if view missing
235
- */
236
- static async getResourceAnnotations(resourceId2, config) {
237
- if (!config.services?.filesystem?.path) {
238
- throw new Error("Filesystem path not found in configuration");
239
- }
240
- const basePath = config.services.filesystem.path;
241
- const projectRoot = config._metadata?.projectRoot;
242
- const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
243
- const view = await viewStorage.get(resourceId2);
244
- if (!view) {
245
- throw new Error(`Resource ${resourceId2} not found in view storage`);
246
- }
247
- return view.annotations;
554
+ return response.split("\n").map((line) => line.replace(/^[-*•]\s*/, "").trim()).filter((line) => line.length > 0).slice(0, 3);
555
+ }
556
+
557
+ // src/jobs/generation-worker.ts
558
+ import {
559
+ getTargetSelector,
560
+ getExactText,
561
+ resourceUri,
562
+ annotationUri
563
+ } from "@semiont/api-client";
564
+ import { getEntityTypes } from "@semiont/ontology";
565
+ import {
566
+ CREATION_METHODS,
567
+ generateUuid,
568
+ resourceId,
569
+ annotationId
570
+ } from "@semiont/core";
571
+ var GenerationWorker = class extends JobWorker2 {
572
+ constructor(jobQueue, config, eventStore, inferenceClient) {
573
+ super(jobQueue);
574
+ this.config = config;
575
+ this.eventStore = eventStore;
576
+ this.inferenceClient = inferenceClient;
248
577
  }
249
- /**
250
- * Get all annotations
251
- * @returns Array of all annotation objects
252
- */
253
- static async getAllAnnotations(resourceId2, config) {
254
- const annotations = await this.getResourceAnnotations(resourceId2, config);
255
- return await this.enrichResolvedReferences(annotations.annotations, config);
578
+ getWorkerName() {
579
+ return "GenerationWorker";
256
580
  }
257
- /**
258
- * Enrich reference annotations with resolved document names
259
- * Adds _resolvedDocumentName property to annotations that link to documents
260
- * @private
261
- */
262
- static async enrichResolvedReferences(annotations, config) {
263
- if (!config.services?.filesystem?.path) {
264
- return annotations;
581
+ canProcessJob(job) {
582
+ return job.metadata.type === "generation";
583
+ }
584
+ async executeJob(job) {
585
+ if (job.metadata.type !== "generation") {
586
+ throw new Error(`Invalid job type: ${job.metadata.type}`);
265
587
  }
266
- const resolvedUris = /* @__PURE__ */ new Set();
267
- for (const ann of annotations) {
268
- if (ann.motivation === "linking" && ann.body) {
269
- const body = Array.isArray(ann.body) ? ann.body : [ann.body];
270
- for (const item of body) {
271
- if (item.purpose === "linking" && item.source) {
272
- resolvedUris.add(item.source);
273
- }
274
- }
588
+ if (job.status !== "running") {
589
+ throw new Error(`Job must be in running state to execute, got: ${job.status}`);
590
+ }
591
+ await this.processGenerationJob(job);
592
+ }
593
+ async processGenerationJob(job) {
594
+ console.log(`[GenerationWorker] Processing generation for reference ${job.params.referenceId} (job: ${job.metadata.id})`);
595
+ const basePath = this.config.services.filesystem.path;
596
+ const projectRoot = this.config._metadata?.projectRoot;
597
+ const repStore = new FilesystemRepresentationStore2({ basePath }, projectRoot);
598
+ let updatedJob = {
599
+ ...job,
600
+ progress: {
601
+ stage: "fetching",
602
+ percentage: 20,
603
+ message: "Fetching source resource..."
275
604
  }
605
+ };
606
+ console.log(`[GenerationWorker] \u{1F4E5} ${updatedJob.progress.message}`);
607
+ await this.updateJobProgress(updatedJob);
608
+ const { FilesystemViewStorage: FilesystemViewStorage3 } = await import("@semiont/event-sourcing");
609
+ const viewStorage = new FilesystemViewStorage3(basePath, projectRoot);
610
+ const view = await viewStorage.get(job.params.sourceResourceId);
611
+ if (!view) {
612
+ throw new Error(`Resource ${job.params.sourceResourceId} not found`);
276
613
  }
277
- if (resolvedUris.size === 0) {
278
- return annotations;
614
+ const projection = view.annotations;
615
+ const expectedAnnotationUri = `${this.config.services.backend.publicURL}/annotations/${job.params.referenceId}`;
616
+ const annotation = projection.annotations.find(
617
+ (a) => a.id === expectedAnnotationUri && a.motivation === "linking"
618
+ );
619
+ if (!annotation) {
620
+ throw new Error(`Annotation ${job.params.referenceId} not found in resource ${job.params.sourceResourceId}`);
279
621
  }
280
- const basePath = config.services.filesystem.path;
281
- const projectRoot = config._metadata?.projectRoot;
282
- const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
283
- const metadataPromises = Array.from(resolvedUris).map(async (uri) => {
284
- const docId = uri.split("/resources/")[1];
285
- if (!docId) return null;
286
- try {
287
- const view = await viewStorage.get(docId);
288
- if (view?.resource?.name) {
289
- return {
290
- uri,
291
- metadata: {
292
- name: view.resource.name,
293
- mediaType: view.resource.mediaType
294
- }
295
- };
296
- }
297
- } catch (e) {
622
+ const sourceResource = await ResourceContext.getResourceMetadata(job.params.sourceResourceId, this.config);
623
+ if (!sourceResource) {
624
+ throw new Error(`Source resource ${job.params.sourceResourceId} not found`);
625
+ }
626
+ const targetSelector = getTargetSelector(annotation.target);
627
+ const resourceName = job.params.title || (targetSelector ? getExactText(targetSelector) : "") || "New Resource";
628
+ console.log(`[GenerationWorker] Generating resource: "${resourceName}"`);
629
+ if (!job.params.context) {
630
+ throw new Error("Generation context is required but was not provided in job");
631
+ }
632
+ console.log(`[GenerationWorker] Using pre-fetched context: ${job.params.context.sourceContext?.before?.length || 0} chars before, ${job.params.context.sourceContext?.selected?.length || 0} chars selected, ${job.params.context.sourceContext?.after?.length || 0} chars after`);
633
+ updatedJob = {
634
+ ...updatedJob,
635
+ progress: {
636
+ stage: "generating",
637
+ percentage: 40,
638
+ message: "Creating content with AI..."
298
639
  }
299
- return null;
640
+ };
641
+ console.log(`[GenerationWorker] \u{1F916} ${updatedJob.progress.message}`);
642
+ await this.updateJobProgress(updatedJob);
643
+ const prompt = job.params.prompt || `Create a comprehensive resource about "${resourceName}"`;
644
+ const annotationEntityTypes = getEntityTypes({ body: annotation.body });
645
+ const generatedContent = await generateResourceFromTopic(
646
+ resourceName,
647
+ job.params.entityTypes || annotationEntityTypes,
648
+ this.inferenceClient,
649
+ prompt,
650
+ job.params.language,
651
+ job.params.context,
652
+ // NEW - context from job (passed from modal)
653
+ job.params.temperature,
654
+ // NEW - from job
655
+ job.params.maxTokens
656
+ // NEW - from job
657
+ );
658
+ console.log(`[GenerationWorker] \u2705 Generated ${generatedContent.content.length} bytes of content`);
659
+ updatedJob = {
660
+ ...updatedJob,
661
+ progress: {
662
+ stage: "generating",
663
+ percentage: 70,
664
+ message: "Content ready, creating resource..."
665
+ }
666
+ };
667
+ await this.updateJobProgress(updatedJob);
668
+ const rId = resourceId(generateUuid());
669
+ updatedJob = {
670
+ ...updatedJob,
671
+ progress: {
672
+ stage: "creating",
673
+ percentage: 85,
674
+ message: "Saving resource..."
675
+ }
676
+ };
677
+ console.log(`[GenerationWorker] \u{1F4BE} ${updatedJob.progress.message}`);
678
+ await this.updateJobProgress(updatedJob);
679
+ const storedRep = await repStore.store(Buffer.from(generatedContent.content), {
680
+ mediaType: "text/markdown",
681
+ rel: "original"
300
682
  });
301
- const results = await Promise.all(metadataPromises);
302
- const uriToMetadata = /* @__PURE__ */ new Map();
303
- for (const result of results) {
304
- if (result) {
305
- uriToMetadata.set(result.uri, result.metadata);
683
+ console.log(`[GenerationWorker] \u2705 Saved resource representation to filesystem: ${rId}`);
684
+ await this.eventStore.appendEvent({
685
+ type: "resource.created",
686
+ resourceId: rId,
687
+ userId: job.metadata.userId,
688
+ version: 1,
689
+ payload: {
690
+ name: resourceName,
691
+ format: "text/markdown",
692
+ contentChecksum: storedRep.checksum,
693
+ creationMethod: CREATION_METHODS.GENERATED,
694
+ entityTypes: job.params.entityTypes || annotationEntityTypes,
695
+ language: job.params.language,
696
+ isDraft: true,
697
+ generatedFrom: job.params.referenceId,
698
+ generationPrompt: void 0
699
+ // Could be added if we track the prompt
306
700
  }
307
- }
308
- return annotations.map((ann) => {
309
- if (ann.motivation === "linking" && ann.body) {
310
- const body = Array.isArray(ann.body) ? ann.body : [ann.body];
311
- for (const item of body) {
312
- if (item.purpose === "linking" && item.source) {
313
- const metadata = uriToMetadata.get(item.source);
314
- if (metadata) {
315
- return {
316
- ...ann,
317
- _resolvedDocumentName: metadata.name,
318
- _resolvedDocumentMediaType: metadata.mediaType
319
- };
320
- }
321
- }
322
- }
701
+ });
702
+ console.log(`[GenerationWorker] Emitted resource.created event for ${rId}`);
703
+ updatedJob = {
704
+ ...updatedJob,
705
+ progress: {
706
+ stage: "linking",
707
+ percentage: 95,
708
+ message: "Linking reference...",
709
+ resultResourceId: rId
710
+ // Store for job.completed event
711
+ }
712
+ };
713
+ console.log(`[GenerationWorker] \u{1F517} ${updatedJob.progress.message}`);
714
+ await this.updateJobProgress(updatedJob);
715
+ const newResourceUri = resourceUri(`${this.config.services.backend.publicURL}/resources/${rId}`);
716
+ const operations = [{
717
+ op: "add",
718
+ item: {
719
+ type: "SpecificResource",
720
+ source: newResourceUri,
721
+ purpose: "linking"
722
+ }
723
+ }];
724
+ const annotationIdSegment = job.params.referenceId.split("/").pop();
725
+ await this.eventStore.appendEvent({
726
+ type: "annotation.body.updated",
727
+ resourceId: job.params.sourceResourceId,
728
+ userId: job.metadata.userId,
729
+ version: 1,
730
+ payload: {
731
+ annotationId: annotationId(annotationIdSegment),
732
+ operations
323
733
  }
324
- return ann;
325
734
  });
326
- }
327
- /**
328
- * Get resource stats (version info)
329
- * @returns Version and timestamp info for the annotations
330
- */
331
- static async getResourceStats(resourceId2, config) {
332
- const annotations = await this.getResourceAnnotations(resourceId2, config);
333
- return {
334
- resourceId: annotations.resourceId,
335
- version: annotations.version,
336
- updatedAt: annotations.updatedAt
735
+ console.log(`[GenerationWorker] \u2705 Emitted annotation.body.updated event linking ${job.params.referenceId} \u2192 ${rId}`);
736
+ updatedJob = {
737
+ ...updatedJob,
738
+ progress: {
739
+ stage: "linking",
740
+ percentage: 100,
741
+ message: "Complete!",
742
+ resultResourceId: rId
743
+ // Store for job.completed event
744
+ }
337
745
  };
746
+ await this.updateJobProgress(updatedJob);
747
+ console.log(`[GenerationWorker] \u2705 Generation complete: created resource ${rId}`);
338
748
  }
339
749
  /**
340
- * Check if resource exists in view storage
750
+ * Update job progress and emit events to Event Store
751
+ * Overrides base class to also emit job progress events
341
752
  */
342
- static async resourceExists(resourceId2, config) {
343
- if (!config.services?.filesystem?.path) {
344
- throw new Error("Filesystem path not found in configuration");
753
+ async updateJobProgress(job) {
754
+ await super.updateJobProgress(job);
755
+ if (job.metadata.type !== "generation") {
756
+ return;
757
+ }
758
+ if (job.status !== "running") {
759
+ return;
760
+ }
761
+ const genJob = job;
762
+ const baseEvent = {
763
+ resourceId: genJob.params.sourceResourceId,
764
+ userId: genJob.metadata.userId,
765
+ version: 1
766
+ };
767
+ if (genJob.progress.stage === "fetching" && genJob.progress.percentage === 20) {
768
+ await this.eventStore.appendEvent({
769
+ type: "job.started",
770
+ ...baseEvent,
771
+ payload: {
772
+ jobId: genJob.metadata.id,
773
+ jobType: genJob.metadata.type,
774
+ totalSteps: 5
775
+ // fetching, generating, creating, linking, complete
776
+ }
777
+ });
778
+ } else if (genJob.progress.stage === "linking" && genJob.progress.percentage === 100) {
779
+ await this.eventStore.appendEvent({
780
+ type: "job.completed",
781
+ ...baseEvent,
782
+ payload: {
783
+ jobId: genJob.metadata.id,
784
+ jobType: genJob.metadata.type,
785
+ resultResourceId: genJob.progress.resultResourceId,
786
+ annotationUri: annotationUri(`${this.config.services.backend.publicURL}/annotations/${genJob.params.referenceId}`)
787
+ }
788
+ });
789
+ } else {
790
+ await this.eventStore.appendEvent({
791
+ type: "job.progress",
792
+ ...baseEvent,
793
+ payload: {
794
+ jobId: genJob.metadata.id,
795
+ jobType: genJob.metadata.type,
796
+ currentStep: genJob.progress.stage,
797
+ percentage: genJob.progress.percentage,
798
+ message: genJob.progress.message
799
+ }
800
+ });
345
801
  }
346
- const basePath = config.services.filesystem.path;
347
- const projectRoot = config._metadata?.projectRoot;
348
- const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
349
- return await viewStorage.exists(resourceId2);
350
802
  }
351
- /**
352
- * Get a single annotation by ID
353
- * O(1) lookup using resource ID to access view storage
354
- */
355
- static async getAnnotation(annotationId2, resourceId2, config) {
356
- const annotations = await this.getResourceAnnotations(resourceId2, config);
357
- return annotations.annotations.find((a) => {
358
- const shortId = a.id.split("/").pop();
359
- return shortId === annotationId2;
360
- }) || null;
803
+ };
804
+
805
+ // src/jobs/highlight-detection-worker.ts
806
+ import { JobWorker as JobWorker3 } from "@semiont/jobs";
807
+ import { generateAnnotationId as generateAnnotationId2 } from "@semiont/event-sourcing";
808
+ import { resourceIdToURI as resourceIdToURI2 } from "@semiont/core";
809
+ import { userId } from "@semiont/core";
810
+ var HighlightDetectionWorker = class extends JobWorker3 {
811
+ constructor(jobQueue, config, eventStore, inferenceClient) {
812
+ super(jobQueue);
813
+ this.config = config;
814
+ this.eventStore = eventStore;
815
+ this.inferenceClient = inferenceClient;
361
816
  }
362
- /**
363
- * List annotations with optional filtering
364
- * @param filters - Optional filters like resourceId and type
365
- * @throws Error if resourceId not provided (cross-resource queries not supported in view storage)
366
- */
367
- static async listAnnotations(filters, config) {
368
- if (!filters?.resourceId) {
369
- throw new Error("resourceId is required for annotation listing - cross-resource queries not supported in view storage");
817
+ isFirstProgress = true;
818
+ getWorkerName() {
819
+ return "HighlightDetectionWorker";
820
+ }
821
+ canProcessJob(job) {
822
+ return job.metadata.type === "highlight-detection";
823
+ }
824
+ async executeJob(job) {
825
+ if (job.metadata.type !== "highlight-detection") {
826
+ throw new Error(`Invalid job type: ${job.metadata.type}`);
370
827
  }
371
- return await this.getAllAnnotations(filters.resourceId, config);
828
+ if (job.status !== "running") {
829
+ throw new Error(`Job must be in running state to execute, got: ${job.status}`);
830
+ }
831
+ this.isFirstProgress = true;
832
+ await this.processHighlightDetectionJob(job);
372
833
  }
373
834
  /**
374
- * Get annotation context (selected text with surrounding context)
835
+ * Override updateJobProgress to emit events to Event Store
375
836
  */
376
- static async getAnnotationContext(annotationId2, resourceId2, contextBefore, contextAfter, config) {
377
- const basePath = config.services.filesystem.path;
378
- const projectRoot = config._metadata?.projectRoot;
379
- const repStore = new FilesystemRepresentationStore2({ basePath }, projectRoot);
380
- const annotation = await this.getAnnotation(annotationId2, resourceId2, config);
381
- if (!annotation) {
382
- throw new Error("Annotation not found");
837
+ async updateJobProgress(job) {
838
+ await super.updateJobProgress(job);
839
+ if (job.metadata.type !== "highlight-detection") return;
840
+ if (job.status !== "running") {
841
+ return;
383
842
  }
384
- const resource = await ResourceContext.getResourceMetadata(
385
- uriToResourceId(getTargetSource(annotation.target)),
386
- config
387
- );
843
+ const hlJob = job;
844
+ const baseEvent = {
845
+ resourceId: hlJob.params.resourceId,
846
+ userId: hlJob.metadata.userId,
847
+ version: 1
848
+ };
849
+ const isComplete = hlJob.progress.percentage === 100;
850
+ if (this.isFirstProgress) {
851
+ this.isFirstProgress = false;
852
+ await this.eventStore.appendEvent({
853
+ type: "job.started",
854
+ ...baseEvent,
855
+ payload: {
856
+ jobId: hlJob.metadata.id,
857
+ jobType: hlJob.metadata.type
858
+ }
859
+ });
860
+ } else if (isComplete) {
861
+ await this.eventStore.appendEvent({
862
+ type: "job.completed",
863
+ ...baseEvent,
864
+ payload: {
865
+ jobId: hlJob.metadata.id,
866
+ jobType: hlJob.metadata.type
867
+ // Note: result would come from job.result, but that's handled by base class
868
+ }
869
+ });
870
+ } else {
871
+ await this.eventStore.appendEvent({
872
+ type: "job.progress",
873
+ ...baseEvent,
874
+ payload: {
875
+ jobId: hlJob.metadata.id,
876
+ jobType: hlJob.metadata.type,
877
+ progress: hlJob.progress
878
+ }
879
+ });
880
+ }
881
+ }
882
+ async handleJobFailure(job, error) {
883
+ await super.handleJobFailure(job, error);
884
+ if (job.status === "failed" && job.metadata.type === "highlight-detection") {
885
+ const hlJob = job;
886
+ await this.eventStore.appendEvent({
887
+ type: "job.failed",
888
+ resourceId: hlJob.params.resourceId,
889
+ userId: hlJob.metadata.userId,
890
+ version: 1,
891
+ payload: {
892
+ jobId: hlJob.metadata.id,
893
+ jobType: hlJob.metadata.type,
894
+ error: "Highlight detection failed. Please try again later."
895
+ }
896
+ });
897
+ }
898
+ }
899
+ async processHighlightDetectionJob(job) {
900
+ console.log(`[HighlightDetectionWorker] Processing highlight detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
901
+ const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
388
902
  if (!resource) {
389
- throw new Error("Resource not found");
903
+ throw new Error(`Resource ${job.params.resourceId} not found`);
390
904
  }
391
- const contentStr = await this.getResourceContent(resource, repStore);
392
- const context = this.extractAnnotationContext(annotation, contentStr, contextBefore, contextAfter);
393
- return {
394
- annotation,
395
- context,
396
- resource: {
397
- "@context": resource["@context"],
398
- "@id": resource["@id"],
399
- name: resource.name,
400
- entityTypes: resource.entityTypes,
401
- representations: resource.representations,
402
- archived: resource.archived,
403
- creationMethod: resource.creationMethod,
404
- wasAttributedTo: resource.wasAttributedTo,
405
- dateCreated: resource.dateCreated
905
+ let updatedJob = {
906
+ ...job,
907
+ progress: {
908
+ stage: "analyzing",
909
+ percentage: 10,
910
+ message: "Loading resource..."
406
911
  }
407
912
  };
408
- }
409
- /**
410
- * Generate AI summary of annotation in context
411
- */
412
- static async generateAnnotationSummary(annotationId2, resourceId2, config) {
413
- const basePath = config.services.filesystem.path;
414
- const projectRoot = config._metadata?.projectRoot;
415
- const repStore = new FilesystemRepresentationStore2({ basePath }, projectRoot);
416
- const annotation = await this.getAnnotation(annotationId2, resourceId2, config);
417
- if (!annotation) {
418
- throw new Error("Annotation not found");
419
- }
420
- const resource = await ResourceContext.getResourceMetadata(
421
- uriToResourceId(getTargetSource(annotation.target)),
422
- config
913
+ await this.updateJobProgress(updatedJob);
914
+ updatedJob = {
915
+ ...updatedJob,
916
+ progress: {
917
+ stage: "analyzing",
918
+ percentage: 30,
919
+ message: "Analyzing text..."
920
+ }
921
+ };
922
+ await this.updateJobProgress(updatedJob);
923
+ const highlights = await AnnotationDetection.detectHighlights(
924
+ job.params.resourceId,
925
+ this.config,
926
+ this.inferenceClient,
927
+ job.params.instructions,
928
+ job.params.density
423
929
  );
424
- if (!resource) {
425
- throw new Error("Resource not found");
426
- }
427
- const contentStr = await this.getResourceContent(resource, repStore);
428
- const contextSize = 500;
429
- const context = this.extractAnnotationContext(annotation, contentStr, contextSize, contextSize);
430
- const annotationEntityTypes = getEntityTypes(annotation);
431
- const summary = await this.generateSummary(resource, context, annotationEntityTypes, config);
432
- return {
433
- summary,
434
- relevantFields: {
435
- resourceId: resource.id,
436
- resourceName: resource.name,
437
- entityTypes: annotationEntityTypes
438
- },
439
- context: {
440
- before: context.before.substring(Math.max(0, context.before.length - 200)),
441
- // Last 200 chars
442
- selected: context.selected,
443
- after: context.after.substring(0, 200)
444
- // First 200 chars
930
+ console.log(`[HighlightDetectionWorker] Found ${highlights.length} highlights to create`);
931
+ updatedJob = {
932
+ ...updatedJob,
933
+ progress: {
934
+ stage: "creating",
935
+ percentage: 60,
936
+ message: `Creating ${highlights.length} annotations...`
445
937
  }
446
938
  };
447
- }
448
- /**
449
- * Get resource content as string
450
- */
451
- static async getResourceContent(resource, repStore) {
452
- const primaryRep = getPrimaryRepresentation2(resource);
453
- if (!primaryRep?.checksum || !primaryRep?.mediaType) {
454
- throw new Error("Resource content not found");
455
- }
456
- const content = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
457
- return decodeRepresentation2(content, primaryRep.mediaType);
458
- }
459
- /**
460
- * Extract annotation context from resource content
461
- */
462
- static extractAnnotationContext(annotation, contentStr, contextBefore, contextAfter) {
463
- const targetSelector = getTargetSelector(annotation.target);
464
- const posSelector = targetSelector ? getTextPositionSelector(targetSelector) : null;
465
- if (!posSelector) {
466
- throw new Error("TextPositionSelector required for context");
939
+ await this.updateJobProgress(updatedJob);
940
+ let created = 0;
941
+ for (const highlight of highlights) {
942
+ try {
943
+ await this.createHighlightAnnotation(job.params.resourceId, job.metadata.userId, highlight);
944
+ created++;
945
+ } catch (error) {
946
+ console.error(`[HighlightDetectionWorker] Failed to create highlight:`, error);
947
+ }
467
948
  }
468
- const selStart = posSelector.start;
469
- const selEnd = posSelector.end;
470
- const start = Math.max(0, selStart - contextBefore);
471
- const end = Math.min(contentStr.length, selEnd + contextAfter);
472
- return {
473
- before: contentStr.substring(start, selStart),
474
- selected: contentStr.substring(selStart, selEnd),
475
- after: contentStr.substring(selEnd, end)
949
+ updatedJob = {
950
+ ...updatedJob,
951
+ progress: {
952
+ stage: "creating",
953
+ percentage: 100,
954
+ message: `Complete! Created ${created} highlights`
955
+ }
476
956
  };
957
+ await this.updateJobProgress(updatedJob);
958
+ console.log(`[HighlightDetectionWorker] \u2705 Created ${created}/${highlights.length} highlights`);
477
959
  }
478
- /**
479
- * Generate LLM summary of annotation in context
480
- */
481
- static async generateSummary(resource, context, entityTypes, config) {
482
- const summaryPrompt = `Summarize this text in context:
483
-
484
- Context before: "${context.before.substring(Math.max(0, context.before.length - 200))}"
485
- Selected exact: "${context.selected}"
486
- Context after: "${context.after.substring(0, 200)}"
487
-
488
- Resource: ${resource.name}
489
- Entity types: ${entityTypes.join(", ")}`;
490
- return await generateText(summaryPrompt, config, 500, 0.5);
960
+ async createHighlightAnnotation(resourceId2, creatorUserId, highlight) {
961
+ const backendUrl = this.config.services.backend?.publicURL;
962
+ if (!backendUrl) throw new Error("Backend publicURL not configured");
963
+ const annotationId2 = generateAnnotationId2(backendUrl);
964
+ const resourceUri3 = resourceIdToURI2(resourceId2, backendUrl);
965
+ const annotation = {
966
+ "@context": "http://www.w3.org/ns/anno.jsonld",
967
+ "type": "Annotation",
968
+ "id": annotationId2,
969
+ "motivation": "highlighting",
970
+ "creator": userId(creatorUserId),
971
+ "created": (/* @__PURE__ */ new Date()).toISOString(),
972
+ "target": {
973
+ type: "SpecificResource",
974
+ source: resourceUri3,
975
+ selector: [
976
+ {
977
+ type: "TextPositionSelector",
978
+ start: highlight.start,
979
+ end: highlight.end
980
+ },
981
+ {
982
+ type: "TextQuoteSelector",
983
+ exact: highlight.exact,
984
+ ...highlight.prefix && { prefix: highlight.prefix },
985
+ ...highlight.suffix && { suffix: highlight.suffix }
986
+ }
987
+ ]
988
+ },
989
+ "body": []
990
+ // Empty body for highlights
991
+ };
992
+ await this.eventStore.appendEvent({
993
+ type: "annotation.added",
994
+ resourceId: resourceId2,
995
+ userId: userId(creatorUserId),
996
+ version: 1,
997
+ payload: { annotation }
998
+ });
491
999
  }
492
1000
  };
493
1001
 
494
- // src/graph-context.ts
495
- import { getGraphDatabase } from "@semiont/graph";
496
- import { resourceIdToURI } from "@semiont/core";
497
- var GraphContext = class {
498
- /**
499
- * Get all resources referencing this resource (backlinks)
500
- * Requires graph traversal - must use graph database
501
- */
502
- static async getBacklinks(resourceId2, config) {
503
- const graphDb = await getGraphDatabase(config);
504
- const resourceUri2 = resourceIdToURI(resourceId2, config.services.backend.publicURL);
505
- return await graphDb.getResourceReferencedBy(resourceUri2);
506
- }
507
- /**
508
- * Find shortest path between two resources
509
- * Requires graph traversal - must use graph database
510
- */
511
- static async findPath(fromResourceId, toResourceId, config, maxDepth) {
512
- const graphDb = await getGraphDatabase(config);
513
- return await graphDb.findPath(fromResourceId, toResourceId, maxDepth);
1002
+ // src/jobs/assessment-detection-worker.ts
1003
+ import { JobWorker as JobWorker4 } from "@semiont/jobs";
1004
+ import { generateAnnotationId as generateAnnotationId3 } from "@semiont/event-sourcing";
1005
+ import { resourceIdToURI as resourceIdToURI3 } from "@semiont/core";
1006
+ import { userId as userId2 } from "@semiont/core";
1007
+ var AssessmentDetectionWorker = class extends JobWorker4 {
1008
+ constructor(jobQueue, config, eventStore, inferenceClient) {
1009
+ super(jobQueue);
1010
+ this.config = config;
1011
+ this.eventStore = eventStore;
1012
+ this.inferenceClient = inferenceClient;
514
1013
  }
515
- /**
516
- * Get resource connections (graph edges)
517
- * Requires graph traversal - must use graph database
518
- */
519
- static async getResourceConnections(resourceId2, config) {
520
- const graphDb = await getGraphDatabase(config);
521
- return await graphDb.getResourceConnections(resourceId2);
1014
+ isFirstProgress = true;
1015
+ getWorkerName() {
1016
+ return "AssessmentDetectionWorker";
522
1017
  }
523
- /**
524
- * Search resources by name (cross-resource query)
525
- * Requires full-text search - must use graph database
526
- */
527
- static async searchResources(query, config, limit) {
528
- const graphDb = await getGraphDatabase(config);
529
- return await graphDb.searchResources(query, limit);
1018
+ canProcessJob(job) {
1019
+ return job.metadata.type === "assessment-detection";
530
1020
  }
531
- };
532
-
533
- // src/annotation-detection.ts
534
- import { FilesystemRepresentationStore as FilesystemRepresentationStore3 } from "@semiont/content";
535
- import { getPrimaryRepresentation as getPrimaryRepresentation3, decodeRepresentation as decodeRepresentation3 } from "@semiont/api-client";
536
- import {
537
- MotivationPrompts,
538
- MotivationParsers,
539
- generateText as generateText2
540
- } from "@semiont/inference";
541
- import { getTagSchema, getSchemaCategory } from "@semiont/ontology";
542
- var AnnotationDetection = class {
543
- /**
544
- * Detect comments in a resource
545
- *
546
- * @param resourceId - The resource to analyze
547
- * @param config - Environment configuration
548
- * @param instructions - Optional user instructions for comment generation
549
- * @param tone - Optional tone guidance (e.g., "academic", "conversational")
550
- * @param density - Optional target number of comments per 2000 words
551
- * @returns Array of validated comment matches
552
- */
553
- static async detectComments(resourceId2, config, instructions, tone, density) {
554
- const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
555
- if (!resource) {
556
- throw new Error(`Resource ${resourceId2} not found`);
1021
+ async executeJob(job) {
1022
+ if (job.metadata.type !== "assessment-detection") {
1023
+ throw new Error(`Invalid job type: ${job.metadata.type}`);
557
1024
  }
558
- const content = await this.loadResourceContent(resourceId2, config);
559
- if (!content) {
560
- throw new Error(`Could not load content for resource ${resourceId2}`);
1025
+ if (job.status !== "running") {
1026
+ throw new Error(`Job must be in running state to execute, got: ${job.status}`);
561
1027
  }
562
- const prompt = MotivationPrompts.buildCommentPrompt(content, instructions, tone, density);
563
- const response = await generateText2(
564
- prompt,
565
- config,
566
- 3e3,
567
- // maxTokens: Higher than highlights/assessments due to comment text
568
- 0.4
569
- // temperature: Slightly higher to allow creative context
570
- );
571
- return MotivationParsers.parseComments(response, content);
1028
+ this.isFirstProgress = true;
1029
+ await this.processAssessmentDetectionJob(job);
572
1030
  }
573
1031
  /**
574
- * Detect highlights in a resource
575
- *
576
- * @param resourceId - The resource to analyze
577
- * @param config - Environment configuration
578
- * @param instructions - Optional user instructions for highlight selection
579
- * @param density - Optional target number of highlights per 2000 words
580
- * @returns Array of validated highlight matches
1032
+ * Override updateJobProgress to emit events to Event Store
581
1033
  */
582
- static async detectHighlights(resourceId2, config, instructions, density) {
583
- const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
584
- if (!resource) {
585
- throw new Error(`Resource ${resourceId2} not found`);
1034
+ async updateJobProgress(job) {
1035
+ await super.updateJobProgress(job);
1036
+ if (job.metadata.type !== "assessment-detection") return;
1037
+ if (job.status !== "running") {
1038
+ return;
586
1039
  }
587
- const content = await this.loadResourceContent(resourceId2, config);
588
- if (!content) {
589
- throw new Error(`Could not load content for resource ${resourceId2}`);
1040
+ const assJob = job;
1041
+ const baseEvent = {
1042
+ resourceId: assJob.params.resourceId,
1043
+ userId: assJob.metadata.userId,
1044
+ version: 1
1045
+ };
1046
+ const isComplete = assJob.progress.percentage === 100;
1047
+ if (this.isFirstProgress) {
1048
+ this.isFirstProgress = false;
1049
+ await this.eventStore.appendEvent({
1050
+ type: "job.started",
1051
+ ...baseEvent,
1052
+ payload: {
1053
+ jobId: assJob.metadata.id,
1054
+ jobType: assJob.metadata.type
1055
+ }
1056
+ });
1057
+ } else if (isComplete) {
1058
+ await this.eventStore.appendEvent({
1059
+ type: "job.completed",
1060
+ ...baseEvent,
1061
+ payload: {
1062
+ jobId: assJob.metadata.id,
1063
+ jobType: assJob.metadata.type
1064
+ // Note: result would come from job.result, but that's handled by base class
1065
+ }
1066
+ });
1067
+ } else {
1068
+ await this.eventStore.appendEvent({
1069
+ type: "job.progress",
1070
+ ...baseEvent,
1071
+ payload: {
1072
+ jobId: assJob.metadata.id,
1073
+ jobType: assJob.metadata.type,
1074
+ progress: assJob.progress
1075
+ }
1076
+ });
590
1077
  }
591
- const prompt = MotivationPrompts.buildHighlightPrompt(content, instructions, density);
592
- const response = await generateText2(
593
- prompt,
594
- config,
595
- 2e3,
596
- // maxTokens: Lower than comments/assessments (no body text)
597
- 0.3
598
- // temperature: Low for consistent importance judgments
599
- );
600
- return MotivationParsers.parseHighlights(response, content);
601
1078
  }
602
- /**
603
- * Detect assessments in a resource
604
- *
605
- * @param resourceId - The resource to analyze
606
- * @param config - Environment configuration
607
- * @param instructions - Optional user instructions for assessment generation
608
- * @param tone - Optional tone guidance (e.g., "critical", "supportive")
609
- * @param density - Optional target number of assessments per 2000 words
610
- * @returns Array of validated assessment matches
611
- */
612
- static async detectAssessments(resourceId2, config, instructions, tone, density) {
613
- const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
614
- if (!resource) {
615
- throw new Error(`Resource ${resourceId2} not found`);
616
- }
617
- const content = await this.loadResourceContent(resourceId2, config);
618
- if (!content) {
619
- throw new Error(`Could not load content for resource ${resourceId2}`);
1079
+ async handleJobFailure(job, error) {
1080
+ await super.handleJobFailure(job, error);
1081
+ if (job.status === "failed" && job.metadata.type === "assessment-detection") {
1082
+ const aJob = job;
1083
+ await this.eventStore.appendEvent({
1084
+ type: "job.failed",
1085
+ resourceId: aJob.params.resourceId,
1086
+ userId: aJob.metadata.userId,
1087
+ version: 1,
1088
+ payload: {
1089
+ jobId: aJob.metadata.id,
1090
+ jobType: aJob.metadata.type,
1091
+ error: "Assessment detection failed. Please try again later."
1092
+ }
1093
+ });
620
1094
  }
621
- const prompt = MotivationPrompts.buildAssessmentPrompt(content, instructions, tone, density);
622
- const response = await generateText2(
623
- prompt,
624
- config,
625
- 3e3,
626
- // maxTokens: Higher for assessment text
627
- 0.3
628
- // temperature: Lower for analytical consistency
629
- );
630
- return MotivationParsers.parseAssessments(response, content);
631
1095
  }
632
- /**
633
- * Detect tags in a resource for a specific category
634
- *
635
- * @param resourceId - The resource to analyze
636
- * @param config - Environment configuration
637
- * @param schemaId - The tag schema identifier (e.g., "irac", "imrad")
638
- * @param category - The specific category to detect
639
- * @returns Array of validated tag matches
640
- */
641
- static async detectTags(resourceId2, config, schemaId, category) {
642
- const schema = getTagSchema(schemaId);
643
- if (!schema) {
644
- throw new Error(`Invalid tag schema: ${schemaId}`);
645
- }
646
- const categoryInfo = getSchemaCategory(schemaId, category);
647
- if (!categoryInfo) {
648
- throw new Error(`Invalid category "${category}" for schema ${schemaId}`);
649
- }
650
- const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
1096
+ async processAssessmentDetectionJob(job) {
1097
+ console.log(`[AssessmentDetectionWorker] Processing assessment detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
1098
+ const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
651
1099
  if (!resource) {
652
- throw new Error(`Resource ${resourceId2} not found`);
653
- }
654
- const content = await this.loadResourceContent(resourceId2, config);
655
- if (!content) {
656
- throw new Error(`Could not load content for resource ${resourceId2}`);
1100
+ throw new Error(`Resource ${job.params.resourceId} not found`);
657
1101
  }
658
- const prompt = MotivationPrompts.buildTagPrompt(
659
- content,
660
- category,
661
- schema.name,
662
- schema.description,
663
- schema.domain,
664
- categoryInfo.description,
665
- categoryInfo.examples
666
- );
667
- const response = await generateText2(
668
- prompt,
669
- config,
670
- 4e3,
671
- // maxTokens: Higher for full document analysis
672
- 0.2
673
- // temperature: Lower for structural consistency
1102
+ let updatedJob = {
1103
+ ...job,
1104
+ progress: {
1105
+ stage: "analyzing",
1106
+ percentage: 10,
1107
+ message: "Loading resource..."
1108
+ }
1109
+ };
1110
+ await this.updateJobProgress(updatedJob);
1111
+ updatedJob = {
1112
+ ...updatedJob,
1113
+ progress: {
1114
+ stage: "analyzing",
1115
+ percentage: 30,
1116
+ message: "Analyzing text..."
1117
+ }
1118
+ };
1119
+ await this.updateJobProgress(updatedJob);
1120
+ const assessments = await AnnotationDetection.detectAssessments(
1121
+ job.params.resourceId,
1122
+ this.config,
1123
+ this.inferenceClient,
1124
+ job.params.instructions,
1125
+ job.params.tone,
1126
+ job.params.density
674
1127
  );
675
- const parsedTags = MotivationParsers.parseTags(response);
676
- return MotivationParsers.validateTagOffsets(parsedTags, content, category);
677
- }
678
- /**
679
- * Load resource content from representation store
680
- * Helper method used by all detection methods
681
- *
682
- * @param resourceId - The resource ID to load
683
- * @param config - Environment configuration
684
- * @returns Resource content as string, or null if not available
685
- */
686
- static async loadResourceContent(resourceId2, config) {
687
- const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
688
- if (!resource) return null;
689
- const primaryRep = getPrimaryRepresentation3(resource);
690
- if (!primaryRep) return null;
691
- const baseMediaType = primaryRep.mediaType?.split(";")[0]?.trim() || "";
692
- if (baseMediaType !== "text/plain" && baseMediaType !== "text/markdown") {
693
- return null;
1128
+ console.log(`[AssessmentDetectionWorker] Found ${assessments.length} assessments to create`);
1129
+ updatedJob = {
1130
+ ...updatedJob,
1131
+ progress: {
1132
+ stage: "creating",
1133
+ percentage: 60,
1134
+ message: `Creating ${assessments.length} annotations...`
1135
+ }
1136
+ };
1137
+ await this.updateJobProgress(updatedJob);
1138
+ let created = 0;
1139
+ for (const assessment of assessments) {
1140
+ try {
1141
+ await this.createAssessmentAnnotation(job.params.resourceId, job.metadata.userId, assessment);
1142
+ created++;
1143
+ } catch (error) {
1144
+ console.error(`[AssessmentDetectionWorker] Failed to create assessment:`, error);
1145
+ }
694
1146
  }
695
- if (!primaryRep.checksum || !primaryRep.mediaType) return null;
696
- const basePath = config.services.filesystem.path;
697
- const projectRoot = config._metadata?.projectRoot;
698
- const repStore = new FilesystemRepresentationStore3({ basePath }, projectRoot);
699
- const contentBuffer = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
700
- return decodeRepresentation3(contentBuffer, primaryRep.mediaType);
1147
+ updatedJob = {
1148
+ ...updatedJob,
1149
+ progress: {
1150
+ stage: "creating",
1151
+ percentage: 100,
1152
+ message: `Complete! Created ${created} assessments`
1153
+ }
1154
+ };
1155
+ await this.updateJobProgress(updatedJob);
1156
+ console.log(`[AssessmentDetectionWorker] \u2705 Created ${created}/${assessments.length} assessments`);
1157
+ }
1158
+ async createAssessmentAnnotation(resourceId2, creatorUserId, assessment) {
1159
+ const backendUrl = this.config.services.backend?.publicURL;
1160
+ if (!backendUrl) throw new Error("Backend publicURL not configured");
1161
+ const annotationId2 = generateAnnotationId3(backendUrl);
1162
+ const resourceUri3 = resourceIdToURI3(resourceId2, backendUrl);
1163
+ const annotation = {
1164
+ "@context": "http://www.w3.org/ns/anno.jsonld",
1165
+ "type": "Annotation",
1166
+ "id": annotationId2,
1167
+ "motivation": "assessing",
1168
+ "creator": userId2(creatorUserId),
1169
+ "created": (/* @__PURE__ */ new Date()).toISOString(),
1170
+ "target": {
1171
+ type: "SpecificResource",
1172
+ source: resourceUri3,
1173
+ selector: [
1174
+ {
1175
+ type: "TextPositionSelector",
1176
+ start: assessment.start,
1177
+ end: assessment.end
1178
+ },
1179
+ {
1180
+ type: "TextQuoteSelector",
1181
+ exact: assessment.exact,
1182
+ ...assessment.prefix && { prefix: assessment.prefix },
1183
+ ...assessment.suffix && { suffix: assessment.suffix }
1184
+ }
1185
+ ]
1186
+ },
1187
+ "body": {
1188
+ type: "TextualBody",
1189
+ value: assessment.assessment,
1190
+ format: "text/plain"
1191
+ }
1192
+ };
1193
+ await this.eventStore.appendEvent({
1194
+ type: "annotation.added",
1195
+ resourceId: resourceId2,
1196
+ userId: userId2(creatorUserId),
1197
+ version: 1,
1198
+ payload: { annotation }
1199
+ });
701
1200
  }
702
1201
  };
703
1202
 
704
- // src/jobs/workers/comment-detection-worker.ts
705
- import { JobWorker } from "@semiont/jobs";
706
- import { generateAnnotationId } from "@semiont/event-sourcing";
707
- import { resourceIdToURI as resourceIdToURI2 } from "@semiont/core";
708
- import { userId } from "@semiont/core";
709
- var CommentDetectionWorker = class extends JobWorker {
710
- constructor(jobQueue, config, eventStore) {
1203
+ // src/jobs/comment-detection-worker.ts
1204
+ import { JobWorker as JobWorker5 } from "@semiont/jobs";
1205
+ import { generateAnnotationId as generateAnnotationId4 } from "@semiont/event-sourcing";
1206
+ import { resourceIdToURI as resourceIdToURI4 } from "@semiont/core";
1207
+ import { userId as userId3 } from "@semiont/core";
1208
+ var CommentDetectionWorker = class extends JobWorker5 {
1209
+ constructor(jobQueue, config, eventStore, inferenceClient) {
711
1210
  super(jobQueue);
712
1211
  this.config = config;
713
1212
  this.eventStore = eventStore;
1213
+ this.inferenceClient = inferenceClient;
714
1214
  }
715
1215
  isFirstProgress = true;
716
1216
  getWorkerName() {
717
1217
  return "CommentDetectionWorker";
718
1218
  }
719
1219
  canProcessJob(job) {
720
- return job.type === "comment-detection";
1220
+ return job.metadata.type === "comment-detection";
721
1221
  }
722
1222
  async executeJob(job) {
723
- if (job.type !== "comment-detection") {
724
- throw new Error(`Invalid job type: ${job.type}`);
1223
+ if (job.metadata.type !== "comment-detection") {
1224
+ throw new Error(`Invalid job type: ${job.metadata.type}`);
1225
+ }
1226
+ if (job.status !== "running") {
1227
+ throw new Error(`Job must be in running state to execute, got: ${job.status}`);
725
1228
  }
726
1229
  this.isFirstProgress = true;
727
1230
  await this.processCommentDetectionJob(job);
@@ -731,23 +1234,25 @@ var CommentDetectionWorker = class extends JobWorker {
731
1234
  */
732
1235
  async updateJobProgress(job) {
733
1236
  await super.updateJobProgress(job);
734
- if (job.type !== "comment-detection") return;
1237
+ if (job.metadata.type !== "comment-detection") return;
1238
+ if (job.status !== "running") {
1239
+ return;
1240
+ }
735
1241
  const cdJob = job;
736
- if (!cdJob.progress) return;
737
1242
  const baseEvent = {
738
- resourceId: cdJob.resourceId,
739
- userId: cdJob.userId,
1243
+ resourceId: cdJob.params.resourceId,
1244
+ userId: cdJob.metadata.userId,
740
1245
  version: 1
741
1246
  };
742
- const isComplete = cdJob.progress.percentage === 100 && cdJob.result;
1247
+ const isComplete = cdJob.progress.percentage === 100;
743
1248
  if (this.isFirstProgress) {
744
1249
  this.isFirstProgress = false;
745
1250
  await this.eventStore.appendEvent({
746
1251
  type: "job.started",
747
1252
  ...baseEvent,
748
1253
  payload: {
749
- jobId: cdJob.id,
750
- jobType: cdJob.type
1254
+ jobId: cdJob.metadata.id,
1255
+ jobType: cdJob.metadata.type
751
1256
  }
752
1257
  });
753
1258
  } else if (isComplete) {
@@ -755,9 +1260,9 @@ var CommentDetectionWorker = class extends JobWorker {
755
1260
  type: "job.completed",
756
1261
  ...baseEvent,
757
1262
  payload: {
758
- jobId: cdJob.id,
759
- jobType: cdJob.type,
760
- result: cdJob.result
1263
+ jobId: cdJob.metadata.id,
1264
+ jobType: cdJob.metadata.type
1265
+ // Note: result would come from job.result, but that's handled by base class
761
1266
  }
762
1267
  });
763
1268
  } else {
@@ -765,8 +1270,8 @@ var CommentDetectionWorker = class extends JobWorker {
765
1270
  type: "job.progress",
766
1271
  ...baseEvent,
767
1272
  payload: {
768
- jobId: cdJob.id,
769
- jobType: cdJob.type,
1273
+ jobId: cdJob.metadata.id,
1274
+ jobType: cdJob.metadata.type,
770
1275
  progress: cdJob.progress
771
1276
  }
772
1277
  });
@@ -774,72 +1279,81 @@ var CommentDetectionWorker = class extends JobWorker {
774
1279
  }
775
1280
  async handleJobFailure(job, error) {
776
1281
  await super.handleJobFailure(job, error);
777
- if (job.status === "failed" && job.type === "comment-detection") {
1282
+ if (job.status === "failed" && job.metadata.type === "comment-detection") {
778
1283
  const cdJob = job;
779
1284
  await this.eventStore.appendEvent({
780
1285
  type: "job.failed",
781
- resourceId: cdJob.resourceId,
782
- userId: cdJob.userId,
1286
+ resourceId: cdJob.params.resourceId,
1287
+ userId: cdJob.metadata.userId,
783
1288
  version: 1,
784
1289
  payload: {
785
- jobId: cdJob.id,
786
- jobType: cdJob.type,
1290
+ jobId: cdJob.metadata.id,
1291
+ jobType: cdJob.metadata.type,
787
1292
  error: "Comment detection failed. Please try again later."
788
1293
  }
789
1294
  });
790
1295
  }
791
1296
  }
792
1297
  async processCommentDetectionJob(job) {
793
- console.log(`[CommentDetectionWorker] Processing comment detection for resource ${job.resourceId} (job: ${job.id})`);
794
- const resource = await ResourceContext.getResourceMetadata(job.resourceId, this.config);
1298
+ console.log(`[CommentDetectionWorker] Processing comment detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
1299
+ const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
795
1300
  if (!resource) {
796
- throw new Error(`Resource ${job.resourceId} not found`);
1301
+ throw new Error(`Resource ${job.params.resourceId} not found`);
797
1302
  }
798
- job.progress = {
799
- stage: "analyzing",
800
- percentage: 10,
801
- message: "Loading resource..."
1303
+ let updatedJob = {
1304
+ ...job,
1305
+ progress: {
1306
+ stage: "analyzing",
1307
+ percentage: 10,
1308
+ message: "Loading resource..."
1309
+ }
802
1310
  };
803
- await this.updateJobProgress(job);
804
- job.progress = {
805
- stage: "analyzing",
806
- percentage: 30,
807
- message: "Analyzing text and generating comments..."
1311
+ await this.updateJobProgress(updatedJob);
1312
+ updatedJob = {
1313
+ ...updatedJob,
1314
+ progress: {
1315
+ stage: "analyzing",
1316
+ percentage: 30,
1317
+ message: "Analyzing text and generating comments..."
1318
+ }
808
1319
  };
809
- await this.updateJobProgress(job);
1320
+ await this.updateJobProgress(updatedJob);
810
1321
  const comments = await AnnotationDetection.detectComments(
811
- job.resourceId,
1322
+ job.params.resourceId,
812
1323
  this.config,
813
- job.instructions,
814
- job.tone,
815
- job.density
1324
+ this.inferenceClient,
1325
+ job.params.instructions,
1326
+ job.params.tone,
1327
+ job.params.density
816
1328
  );
817
1329
  console.log(`[CommentDetectionWorker] Found ${comments.length} comments to create`);
818
- job.progress = {
819
- stage: "creating",
820
- percentage: 60,
821
- message: `Creating ${comments.length} annotations...`
1330
+ updatedJob = {
1331
+ ...updatedJob,
1332
+ progress: {
1333
+ stage: "creating",
1334
+ percentage: 60,
1335
+ message: `Creating ${comments.length} annotations...`
1336
+ }
822
1337
  };
823
- await this.updateJobProgress(job);
1338
+ await this.updateJobProgress(updatedJob);
824
1339
  let created = 0;
825
1340
  for (const comment of comments) {
826
1341
  try {
827
- await this.createCommentAnnotation(job.resourceId, job.userId, comment);
1342
+ await this.createCommentAnnotation(job.params.resourceId, job.metadata.userId, comment);
828
1343
  created++;
829
1344
  } catch (error) {
830
1345
  console.error(`[CommentDetectionWorker] Failed to create comment:`, error);
831
1346
  }
832
1347
  }
833
- job.result = {
834
- commentsFound: comments.length,
835
- commentsCreated: created
836
- };
837
- job.progress = {
838
- stage: "creating",
839
- percentage: 100,
840
- message: `Complete! Created ${created} comments`
1348
+ updatedJob = {
1349
+ ...updatedJob,
1350
+ progress: {
1351
+ stage: "creating",
1352
+ percentage: 100,
1353
+ message: `Complete! Created ${created} comments`
1354
+ }
841
1355
  };
842
- await this.updateJobProgress(job);
1356
+ await this.updateJobProgress(updatedJob);
843
1357
  console.log(`[CommentDetectionWorker] \u2705 Created ${created}/${comments.length} comments`);
844
1358
  }
845
1359
  async createCommentAnnotation(resourceId2, userId_, comment) {
@@ -847,8 +1361,8 @@ var CommentDetectionWorker = class extends JobWorker {
847
1361
  if (!backendUrl) {
848
1362
  throw new Error("Backend publicURL not configured");
849
1363
  }
850
- const resourceUri2 = resourceIdToURI2(resourceId2, backendUrl);
851
- const annotationId2 = generateAnnotationId(backendUrl);
1364
+ const resourceUri3 = resourceIdToURI4(resourceId2, backendUrl);
1365
+ const annotationId2 = generateAnnotationId4(backendUrl);
852
1366
  const annotation = {
853
1367
  "@context": "http://www.w3.org/ns/anno.jsonld",
854
1368
  type: "Annotation",
@@ -856,7 +1370,7 @@ var CommentDetectionWorker = class extends JobWorker {
856
1370
  motivation: "commenting",
857
1371
  target: {
858
1372
  type: "SpecificResource",
859
- source: resourceUri2,
1373
+ source: resourceUri3,
860
1374
  selector: [
861
1375
  {
862
1376
  type: "TextPositionSelector",
@@ -884,7 +1398,7 @@ var CommentDetectionWorker = class extends JobWorker {
884
1398
  await this.eventStore.appendEvent({
885
1399
  type: "annotation.added",
886
1400
  resourceId: resourceId2,
887
- userId: userId(userId_),
1401
+ userId: userId3(userId_),
888
1402
  version: 1,
889
1403
  payload: {
890
1404
  annotation
@@ -894,53 +1408,60 @@ var CommentDetectionWorker = class extends JobWorker {
894
1408
  }
895
1409
  };
896
1410
 
897
- // src/jobs/workers/highlight-detection-worker.ts
898
- import { JobWorker as JobWorker2 } from "@semiont/jobs";
899
- import { generateAnnotationId as generateAnnotationId2 } from "@semiont/event-sourcing";
900
- import { resourceIdToURI as resourceIdToURI3 } from "@semiont/core";
901
- import { userId as userId2 } from "@semiont/core";
902
- var HighlightDetectionWorker = class extends JobWorker2 {
903
- constructor(jobQueue, config, eventStore) {
1411
+ // src/jobs/tag-detection-worker.ts
1412
+ import { JobWorker as JobWorker6 } from "@semiont/jobs";
1413
+ import { generateAnnotationId as generateAnnotationId5 } from "@semiont/event-sourcing";
1414
+ import { resourceIdToURI as resourceIdToURI5 } from "@semiont/core";
1415
+ import { getTagSchema } from "@semiont/ontology";
1416
+ import { userId as userId4 } from "@semiont/core";
1417
+ var TagDetectionWorker = class extends JobWorker6 {
1418
+ constructor(jobQueue, config, eventStore, inferenceClient) {
904
1419
  super(jobQueue);
905
1420
  this.config = config;
906
1421
  this.eventStore = eventStore;
1422
+ this.inferenceClient = inferenceClient;
907
1423
  }
908
1424
  isFirstProgress = true;
909
1425
  getWorkerName() {
910
- return "HighlightDetectionWorker";
1426
+ return "TagDetectionWorker";
911
1427
  }
912
1428
  canProcessJob(job) {
913
- return job.type === "highlight-detection";
1429
+ return job.metadata.type === "tag-detection";
914
1430
  }
915
1431
  async executeJob(job) {
916
- if (job.type !== "highlight-detection") {
917
- throw new Error(`Invalid job type: ${job.type}`);
1432
+ if (job.metadata.type !== "tag-detection") {
1433
+ throw new Error(`Invalid job type: ${job.metadata.type}`);
1434
+ }
1435
+ if (job.status !== "running") {
1436
+ throw new Error(`Job must be in running state to execute, got: ${job.status}`);
918
1437
  }
919
1438
  this.isFirstProgress = true;
920
- await this.processHighlightDetectionJob(job);
1439
+ await this.processTagDetectionJob(job);
921
1440
  }
922
1441
  /**
923
1442
  * Override updateJobProgress to emit events to Event Store
924
1443
  */
925
1444
  async updateJobProgress(job) {
926
1445
  await super.updateJobProgress(job);
927
- if (job.type !== "highlight-detection") return;
928
- const hlJob = job;
929
- if (!hlJob.progress) return;
1446
+ if (job.metadata.type !== "tag-detection") return;
1447
+ if (job.status !== "running") {
1448
+ return;
1449
+ }
1450
+ const tdJob = job;
930
1451
  const baseEvent = {
931
- resourceId: hlJob.resourceId,
932
- userId: hlJob.userId,
1452
+ resourceId: tdJob.params.resourceId,
1453
+ userId: tdJob.metadata.userId,
933
1454
  version: 1
934
1455
  };
935
- const isComplete = hlJob.progress.percentage === 100 && hlJob.result;
1456
+ const isComplete = tdJob.progress.percentage === 100;
936
1457
  if (this.isFirstProgress) {
937
1458
  this.isFirstProgress = false;
938
1459
  await this.eventStore.appendEvent({
939
1460
  type: "job.started",
940
1461
  ...baseEvent,
941
1462
  payload: {
942
- jobId: hlJob.id,
943
- jobType: hlJob.type
1463
+ jobId: tdJob.metadata.id,
1464
+ jobType: tdJob.metadata.type
944
1465
  }
945
1466
  });
946
1467
  } else if (isComplete) {
@@ -948,9 +1469,9 @@ var HighlightDetectionWorker = class extends JobWorker2 {
948
1469
  type: "job.completed",
949
1470
  ...baseEvent,
950
1471
  payload: {
951
- jobId: hlJob.id,
952
- jobType: hlJob.type,
953
- result: hlJob.result
1472
+ jobId: tdJob.metadata.id,
1473
+ jobType: tdJob.metadata.type
1474
+ // Note: result would come from job.result, but that's handled by base class
954
1475
  }
955
1476
  });
956
1477
  } else {
@@ -958,1020 +1479,1804 @@ var HighlightDetectionWorker = class extends JobWorker2 {
958
1479
  type: "job.progress",
959
1480
  ...baseEvent,
960
1481
  payload: {
961
- jobId: hlJob.id,
962
- jobType: hlJob.type,
963
- progress: hlJob.progress
1482
+ jobId: tdJob.metadata.id,
1483
+ jobType: tdJob.metadata.type,
1484
+ progress: tdJob.progress
964
1485
  }
965
1486
  });
966
1487
  }
967
1488
  }
968
1489
  async handleJobFailure(job, error) {
969
1490
  await super.handleJobFailure(job, error);
970
- if (job.status === "failed" && job.type === "highlight-detection") {
971
- const hlJob = job;
1491
+ if (job.status === "failed" && job.metadata.type === "tag-detection") {
1492
+ const tdJob = job;
972
1493
  await this.eventStore.appendEvent({
973
1494
  type: "job.failed",
974
- resourceId: hlJob.resourceId,
975
- userId: hlJob.userId,
1495
+ resourceId: tdJob.params.resourceId,
1496
+ userId: tdJob.metadata.userId,
976
1497
  version: 1,
977
1498
  payload: {
978
- jobId: hlJob.id,
979
- jobType: hlJob.type,
980
- error: "Highlight detection failed. Please try again later."
1499
+ jobId: tdJob.metadata.id,
1500
+ jobType: tdJob.metadata.type,
1501
+ error: "Tag detection failed. Please try again later."
981
1502
  }
982
1503
  });
983
1504
  }
984
1505
  }
985
- async processHighlightDetectionJob(job) {
986
- console.log(`[HighlightDetectionWorker] Processing highlight detection for resource ${job.resourceId} (job: ${job.id})`);
987
- const resource = await ResourceContext.getResourceMetadata(job.resourceId, this.config);
1506
+ async processTagDetectionJob(job) {
1507
+ console.log(`[TagDetectionWorker] Processing tag detection for resource ${job.params.resourceId} (job: ${job.metadata.id})`);
1508
+ const schema = getTagSchema(job.params.schemaId);
1509
+ if (!schema) {
1510
+ throw new Error(`Invalid tag schema: ${job.params.schemaId}`);
1511
+ }
1512
+ for (const category of job.params.categories) {
1513
+ if (!schema.tags.some((t) => t.name === category)) {
1514
+ throw new Error(`Invalid category "${category}" for schema ${job.params.schemaId}`);
1515
+ }
1516
+ }
1517
+ const resource = await ResourceContext.getResourceMetadata(job.params.resourceId, this.config);
988
1518
  if (!resource) {
989
- throw new Error(`Resource ${job.resourceId} not found`);
1519
+ throw new Error(`Resource ${job.params.resourceId} not found`);
990
1520
  }
991
- job.progress = {
992
- stage: "analyzing",
993
- percentage: 10,
994
- message: "Loading resource..."
995
- };
996
- await this.updateJobProgress(job);
997
- job.progress = {
998
- stage: "analyzing",
999
- percentage: 30,
1000
- message: "Analyzing text..."
1521
+ let updatedJob = {
1522
+ ...job,
1523
+ progress: {
1524
+ stage: "analyzing",
1525
+ percentage: 10,
1526
+ processedCategories: 0,
1527
+ totalCategories: job.params.categories.length,
1528
+ message: "Loading resource..."
1529
+ }
1001
1530
  };
1002
- await this.updateJobProgress(job);
1003
- const highlights = await AnnotationDetection.detectHighlights(
1004
- job.resourceId,
1005
- this.config,
1006
- job.instructions,
1007
- job.density
1008
- );
1009
- console.log(`[HighlightDetectionWorker] Found ${highlights.length} highlights to create`);
1010
- job.progress = {
1011
- stage: "creating",
1012
- percentage: 60,
1013
- message: `Creating ${highlights.length} annotations...`
1531
+ await this.updateJobProgress(updatedJob);
1532
+ const allTags = [];
1533
+ const byCategory = {};
1534
+ for (let i = 0; i < job.params.categories.length; i++) {
1535
+ const category = job.params.categories[i];
1536
+ updatedJob = {
1537
+ ...updatedJob,
1538
+ progress: {
1539
+ stage: "analyzing",
1540
+ percentage: 10 + Math.floor(i / job.params.categories.length * 50),
1541
+ currentCategory: category,
1542
+ processedCategories: i + 1,
1543
+ totalCategories: job.params.categories.length,
1544
+ message: `Analyzing ${category}...`
1545
+ }
1546
+ };
1547
+ await this.updateJobProgress(updatedJob);
1548
+ const tags = await AnnotationDetection.detectTags(
1549
+ job.params.resourceId,
1550
+ this.config,
1551
+ this.inferenceClient,
1552
+ job.params.schemaId,
1553
+ category
1554
+ );
1555
+ console.log(`[TagDetectionWorker] Found ${tags.length} tags for category "${category}"`);
1556
+ allTags.push(...tags);
1557
+ byCategory[category] = tags.length;
1558
+ }
1559
+ updatedJob = {
1560
+ ...updatedJob,
1561
+ progress: {
1562
+ stage: "creating",
1563
+ percentage: 60,
1564
+ processedCategories: job.params.categories.length,
1565
+ totalCategories: job.params.categories.length,
1566
+ message: `Creating ${allTags.length} tag annotations...`
1567
+ }
1014
1568
  };
1015
- await this.updateJobProgress(job);
1569
+ await this.updateJobProgress(updatedJob);
1016
1570
  let created = 0;
1017
- for (const highlight of highlights) {
1571
+ for (const tag of allTags) {
1018
1572
  try {
1019
- await this.createHighlightAnnotation(job.resourceId, job.userId, highlight);
1573
+ await this.createTagAnnotation(job.params.resourceId, job.metadata.userId, job.params.schemaId, tag);
1020
1574
  created++;
1021
1575
  } catch (error) {
1022
- console.error(`[HighlightDetectionWorker] Failed to create highlight:`, error);
1576
+ console.error(`[TagDetectionWorker] Failed to create tag:`, error);
1023
1577
  }
1024
1578
  }
1025
- job.result = {
1026
- highlightsFound: highlights.length,
1027
- highlightsCreated: created
1028
- };
1029
- job.progress = {
1030
- stage: "creating",
1031
- percentage: 100,
1032
- message: `Complete! Created ${created} highlights`
1579
+ updatedJob = {
1580
+ ...updatedJob,
1581
+ progress: {
1582
+ stage: "creating",
1583
+ percentage: 100,
1584
+ processedCategories: job.params.categories.length,
1585
+ totalCategories: job.params.categories.length,
1586
+ message: `Complete! Created ${created} tags`
1587
+ }
1033
1588
  };
1034
- await this.updateJobProgress(job);
1035
- console.log(`[HighlightDetectionWorker] \u2705 Created ${created}/${highlights.length} highlights`);
1589
+ await this.updateJobProgress(updatedJob);
1590
+ console.log(`[TagDetectionWorker] \u2705 Created ${created}/${allTags.length} tags across ${job.params.categories.length} categories`);
1036
1591
  }
1037
- async createHighlightAnnotation(resourceId2, creatorUserId, highlight) {
1592
+ async createTagAnnotation(resourceId2, userId_, schemaId, tag) {
1038
1593
  const backendUrl = this.config.services.backend?.publicURL;
1039
- if (!backendUrl) throw new Error("Backend publicURL not configured");
1040
- const annotationId2 = generateAnnotationId2(backendUrl);
1041
- const resourceUri2 = resourceIdToURI3(resourceId2, backendUrl);
1594
+ if (!backendUrl) {
1595
+ throw new Error("Backend publicURL not configured");
1596
+ }
1597
+ const resourceUri3 = resourceIdToURI5(resourceId2, backendUrl);
1598
+ const annotationId2 = generateAnnotationId5(backendUrl);
1042
1599
  const annotation = {
1043
1600
  "@context": "http://www.w3.org/ns/anno.jsonld",
1044
- "type": "Annotation",
1045
- "id": annotationId2,
1046
- "motivation": "highlighting",
1047
- "creator": userId2(creatorUserId),
1048
- "created": (/* @__PURE__ */ new Date()).toISOString(),
1049
- "target": {
1601
+ type: "Annotation",
1602
+ id: annotationId2,
1603
+ motivation: "tagging",
1604
+ target: {
1050
1605
  type: "SpecificResource",
1051
- source: resourceUri2,
1606
+ source: resourceUri3,
1052
1607
  selector: [
1053
1608
  {
1054
1609
  type: "TextPositionSelector",
1055
- start: highlight.start,
1056
- end: highlight.end
1610
+ start: tag.start,
1611
+ end: tag.end
1057
1612
  },
1058
1613
  {
1059
1614
  type: "TextQuoteSelector",
1060
- exact: highlight.exact,
1061
- ...highlight.prefix && { prefix: highlight.prefix },
1062
- ...highlight.suffix && { suffix: highlight.suffix }
1615
+ exact: tag.exact,
1616
+ prefix: tag.prefix || "",
1617
+ suffix: tag.suffix || ""
1063
1618
  }
1064
1619
  ]
1065
1620
  },
1066
- "body": []
1067
- // Empty body for highlights
1068
- };
1069
- await this.eventStore.appendEvent({
1070
- type: "annotation.added",
1071
- resourceId: resourceId2,
1072
- userId: userId2(creatorUserId),
1073
- version: 1,
1074
- payload: { annotation }
1075
- });
1076
- }
1077
- };
1621
+ body: [
1622
+ {
1623
+ type: "TextualBody",
1624
+ value: tag.category,
1625
+ purpose: "tagging",
1626
+ format: "text/plain",
1627
+ language: "en"
1628
+ },
1629
+ {
1630
+ type: "TextualBody",
1631
+ value: schemaId,
1632
+ purpose: "classifying",
1633
+ format: "text/plain"
1634
+ }
1635
+ ]
1636
+ };
1637
+ await this.eventStore.appendEvent({
1638
+ type: "annotation.added",
1639
+ resourceId: resourceId2,
1640
+ userId: userId4(userId_),
1641
+ version: 1,
1642
+ payload: {
1643
+ annotation
1644
+ }
1645
+ });
1646
+ console.log(`[TagDetectionWorker] Created tag annotation ${annotationId2} for "${tag.category}": "${tag.exact.substring(0, 50)}..."`);
1647
+ }
1648
+ };
1078
1649
 
1079
- // src/jobs/workers/assessment-detection-worker.ts
1080
- import { JobWorker as JobWorker3 } from "@semiont/jobs";
1081
- import { generateAnnotationId as generateAnnotationId3 } from "@semiont/event-sourcing";
1082
- import { resourceIdToURI as resourceIdToURI4 } from "@semiont/core";
1083
- import { userId as userId3 } from "@semiont/core";
1084
- var AssessmentDetectionWorker = class extends JobWorker3 {
1085
- constructor(jobQueue, config, eventStore) {
1086
- super(jobQueue);
1650
+ // src/graph/consumer.ts
1651
+ import { EventQuery } from "@semiont/event-sourcing";
1652
+ import { didToAgent } from "@semiont/core";
1653
+ import { resourceId as makeResourceId, findBodyItem } from "@semiont/core";
1654
+ import { toResourceUri, toAnnotationUri } from "@semiont/event-sourcing";
1655
+ import { resourceUri as resourceUri2 } from "@semiont/api-client";
1656
+ var GraphDBConsumer = class {
1657
+ constructor(config, eventStore, graphDb) {
1087
1658
  this.config = config;
1088
1659
  this.eventStore = eventStore;
1660
+ this.graphDb = graphDb;
1089
1661
  }
1090
- isFirstProgress = true;
1091
- getWorkerName() {
1092
- return "AssessmentDetectionWorker";
1662
+ subscriptions = /* @__PURE__ */ new Map();
1663
+ _globalSubscription = null;
1664
+ // Subscription to system-level events (kept for cleanup)
1665
+ processing = /* @__PURE__ */ new Map();
1666
+ lastProcessed = /* @__PURE__ */ new Map();
1667
+ async initialize() {
1668
+ console.log("[GraphDBConsumer] Initialized");
1669
+ await this.subscribeToGlobalEvents();
1093
1670
  }
1094
- canProcessJob(job) {
1095
- return job.type === "assessment-detection";
1671
+ /**
1672
+ * Subscribe to global system-level events (no resourceId)
1673
+ * This allows the consumer to react to events like entitytype.added
1674
+ */
1675
+ async subscribeToGlobalEvents() {
1676
+ this._globalSubscription = this.eventStore.bus.subscriptions.subscribeGlobal(async (storedEvent) => {
1677
+ console.log(`[GraphDBConsumer] Received global event: ${storedEvent.event.type}`);
1678
+ await this.processEvent(storedEvent);
1679
+ });
1680
+ console.log("[GraphDBConsumer] Subscribed to global system events");
1096
1681
  }
1097
- async executeJob(job) {
1098
- if (job.type !== "assessment-detection") {
1099
- throw new Error(`Invalid job type: ${job.type}`);
1682
+ ensureInitialized() {
1683
+ return this.graphDb;
1684
+ }
1685
+ /**
1686
+ * Subscribe to events for a resource
1687
+ * Apply each event to GraphDB
1688
+ */
1689
+ async subscribeToResource(resourceId2) {
1690
+ this.ensureInitialized();
1691
+ const publicURL = this.config.services.backend.publicURL;
1692
+ const rUri = resourceUri2(`${publicURL}/resources/${resourceId2}`);
1693
+ const subscription = this.eventStore.bus.subscriptions.subscribe(rUri, async (storedEvent) => {
1694
+ await this.processEvent(storedEvent);
1695
+ });
1696
+ this.subscriptions.set(resourceId2, subscription);
1697
+ console.log(`[GraphDBConsumer] Subscribed to ${resourceId2}`);
1698
+ }
1699
+ /**
1700
+ * Stop the consumer and unsubscribe from all events
1701
+ */
1702
+ async stop() {
1703
+ console.log("[GraphDBConsumer] Stopping...");
1704
+ for (const subscription of this.subscriptions.values()) {
1705
+ if (subscription && typeof subscription.unsubscribe === "function") {
1706
+ subscription.unsubscribe();
1707
+ }
1708
+ }
1709
+ this.subscriptions.clear();
1710
+ if (this._globalSubscription && typeof this._globalSubscription.unsubscribe === "function") {
1711
+ this._globalSubscription.unsubscribe();
1712
+ }
1713
+ this._globalSubscription = null;
1714
+ console.log("[GraphDBConsumer] Stopped");
1715
+ }
1716
+ /**
1717
+ * Process event with ordering guarantee (sequential per resource)
1718
+ */
1719
+ async processEvent(storedEvent) {
1720
+ const { resourceId: resourceId2 } = storedEvent.event;
1721
+ if (!resourceId2) {
1722
+ await this.applyEventToGraph(storedEvent);
1723
+ return;
1724
+ }
1725
+ const previousProcessing = this.processing.get(resourceId2);
1726
+ if (previousProcessing) {
1727
+ await previousProcessing;
1728
+ }
1729
+ const processingPromise = this.applyEventToGraph(storedEvent);
1730
+ this.processing.set(resourceId2, processingPromise);
1731
+ try {
1732
+ await processingPromise;
1733
+ this.lastProcessed.set(resourceId2, storedEvent.metadata.sequenceNumber);
1734
+ } catch (error) {
1735
+ console.error(`[GraphDBConsumer] Failed to process event:`, error);
1736
+ throw error;
1737
+ } finally {
1738
+ this.processing.delete(resourceId2);
1739
+ }
1740
+ }
1741
+ /**
1742
+ * Apply event to GraphDB
1743
+ */
1744
+ async applyEventToGraph(storedEvent) {
1745
+ const graphDb = this.ensureInitialized();
1746
+ const event = storedEvent.event;
1747
+ console.log(`[GraphDBConsumer] Applying ${event.type} to GraphDB (seq=${storedEvent.metadata.sequenceNumber})`);
1748
+ switch (event.type) {
1749
+ case "resource.created": {
1750
+ if (!event.resourceId) throw new Error("resource.created requires resourceId");
1751
+ const resourceUri3 = toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId);
1752
+ const resource = {
1753
+ "@context": "https://schema.org/",
1754
+ "@id": resourceUri3,
1755
+ name: event.payload.name,
1756
+ entityTypes: event.payload.entityTypes || [],
1757
+ representations: [{
1758
+ mediaType: event.payload.format,
1759
+ checksum: event.payload.contentChecksum,
1760
+ rel: "original"
1761
+ }],
1762
+ archived: false,
1763
+ dateCreated: (/* @__PURE__ */ new Date()).toISOString(),
1764
+ wasAttributedTo: didToAgent(event.userId),
1765
+ creationMethod: "api"
1766
+ };
1767
+ console.log(`[GraphDBConsumer] Creating resource in graph: ${resourceUri3}`);
1768
+ await graphDb.createResource(resource);
1769
+ console.log(`[GraphDBConsumer] \u2705 Resource created in graph: ${resourceUri3}`);
1770
+ break;
1771
+ }
1772
+ case "resource.cloned": {
1773
+ if (!event.resourceId) throw new Error("resource.cloned requires resourceId");
1774
+ const resourceUri3 = toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId);
1775
+ const resource = {
1776
+ "@context": "https://schema.org/",
1777
+ "@id": resourceUri3,
1778
+ name: event.payload.name,
1779
+ entityTypes: event.payload.entityTypes || [],
1780
+ representations: [{
1781
+ mediaType: event.payload.format,
1782
+ checksum: event.payload.contentChecksum,
1783
+ rel: "original"
1784
+ }],
1785
+ archived: false,
1786
+ dateCreated: (/* @__PURE__ */ new Date()).toISOString(),
1787
+ wasAttributedTo: didToAgent(event.userId),
1788
+ creationMethod: "clone"
1789
+ };
1790
+ console.log(`[GraphDBConsumer] Creating cloned resource in graph: ${resourceUri3}`);
1791
+ await graphDb.createResource(resource);
1792
+ console.log(`[GraphDBConsumer] \u2705 Cloned resource created in graph: ${resourceUri3}`);
1793
+ break;
1794
+ }
1795
+ case "resource.archived":
1796
+ if (!event.resourceId) throw new Error("resource.archived requires resourceId");
1797
+ await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
1798
+ archived: true
1799
+ });
1800
+ break;
1801
+ case "resource.unarchived":
1802
+ if (!event.resourceId) throw new Error("resource.unarchived requires resourceId");
1803
+ await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
1804
+ archived: false
1805
+ });
1806
+ break;
1807
+ case "annotation.added":
1808
+ console.log(`[GraphDBConsumer] \u{1F50D} ENTERED annotation.added case block`);
1809
+ console.log(`[GraphDBConsumer] Annotation ID: ${event.payload.annotation.id}`);
1810
+ await graphDb.createAnnotation({
1811
+ ...event.payload.annotation,
1812
+ creator: didToAgent(event.userId)
1813
+ });
1814
+ console.log(`[GraphDBConsumer] \u2705 Annotation created in graph: ${event.payload.annotation.id}`);
1815
+ break;
1816
+ case "annotation.removed":
1817
+ await graphDb.deleteAnnotation(toAnnotationUri({ baseUrl: this.config.services.backend.publicURL }, event.payload.annotationId));
1818
+ break;
1819
+ case "annotation.body.updated":
1820
+ console.log(`[GraphDBConsumer] \u{1F50D} ENTERED annotation.body.updated case block`);
1821
+ console.log(`[GraphDBConsumer] Event payload:`, JSON.stringify(event.payload));
1822
+ try {
1823
+ console.log(`[GraphDBConsumer] Creating annotation URI for: ${event.payload.annotationId}`);
1824
+ const annotationUri2 = toAnnotationUri({ baseUrl: this.config.services.backend.publicURL }, event.payload.annotationId);
1825
+ console.log(`[GraphDBConsumer] \u2705 Annotation URI created: ${annotationUri2}`);
1826
+ console.log(`[GraphDBConsumer] Processing annotation.body.updated for ${annotationUri2}`);
1827
+ console.log(`[GraphDBConsumer] Operations:`, JSON.stringify(event.payload.operations));
1828
+ const currentAnnotation = await graphDb.getAnnotation(annotationUri2);
1829
+ console.log(`[GraphDBConsumer] Current annotation in graph:`, currentAnnotation ? "FOUND" : "NOT FOUND");
1830
+ if (currentAnnotation) {
1831
+ console.log(`[GraphDBConsumer] Current body:`, JSON.stringify(currentAnnotation.body));
1832
+ let bodyArray = Array.isArray(currentAnnotation.body) ? [...currentAnnotation.body] : currentAnnotation.body ? [currentAnnotation.body] : [];
1833
+ for (const op of event.payload.operations) {
1834
+ console.log(`[GraphDBConsumer] Applying operation:`, JSON.stringify(op));
1835
+ if (op.op === "add") {
1836
+ const exists = findBodyItem(bodyArray, op.item) !== -1;
1837
+ if (!exists) {
1838
+ bodyArray.push(op.item);
1839
+ console.log(`[GraphDBConsumer] Added item to body`);
1840
+ } else {
1841
+ console.log(`[GraphDBConsumer] Item already exists, skipping`);
1842
+ }
1843
+ } else if (op.op === "remove") {
1844
+ const index = findBodyItem(bodyArray, op.item);
1845
+ if (index !== -1) {
1846
+ bodyArray.splice(index, 1);
1847
+ console.log(`[GraphDBConsumer] Removed item from body`);
1848
+ }
1849
+ } else if (op.op === "replace") {
1850
+ const index = findBodyItem(bodyArray, op.oldItem);
1851
+ if (index !== -1) {
1852
+ bodyArray[index] = op.newItem;
1853
+ console.log(`[GraphDBConsumer] Replaced item in body`);
1854
+ }
1855
+ }
1856
+ }
1857
+ console.log(`[GraphDBConsumer] New body array:`, JSON.stringify(bodyArray));
1858
+ console.log(`[GraphDBConsumer] Calling updateAnnotation...`);
1859
+ await graphDb.updateAnnotation(annotationUri2, {
1860
+ body: bodyArray
1861
+ });
1862
+ console.log(`[GraphDBConsumer] \u2705 updateAnnotation completed successfully`);
1863
+ } else {
1864
+ console.log(`[GraphDBConsumer] \u26A0\uFE0F Annotation not found in graph, skipping update`);
1865
+ }
1866
+ } catch (error) {
1867
+ console.error(`[GraphDBConsumer] \u274C ERROR in annotation.body.updated handler`);
1868
+ console.error(`[GraphDBConsumer] Annotation ID: ${event.payload.annotationId}`);
1869
+ console.error(`[GraphDBConsumer] Error:`, error);
1870
+ console.error(`[GraphDBConsumer] Error stack:`, error instanceof Error ? error.stack : "N/A");
1871
+ }
1872
+ break;
1873
+ case "entitytag.added":
1874
+ if (!event.resourceId) throw new Error("entitytag.added requires resourceId");
1875
+ const doc = await graphDb.getResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId));
1876
+ if (doc) {
1877
+ await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
1878
+ entityTypes: [...doc.entityTypes || [], event.payload.entityType]
1879
+ });
1880
+ }
1881
+ break;
1882
+ case "entitytag.removed":
1883
+ if (!event.resourceId) throw new Error("entitytag.removed requires resourceId");
1884
+ const doc2 = await graphDb.getResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId));
1885
+ if (doc2) {
1886
+ await graphDb.updateResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, event.resourceId), {
1887
+ entityTypes: (doc2.entityTypes || []).filter((t) => t !== event.payload.entityType)
1888
+ });
1889
+ }
1890
+ break;
1891
+ case "entitytype.added":
1892
+ await graphDb.addEntityType(event.payload.entityType);
1893
+ break;
1894
+ default:
1895
+ console.warn(`[GraphDBConsumer] Unknown event type: ${event.type}`);
1896
+ }
1897
+ }
1898
+ /**
1899
+ * Rebuild entire resource from events
1900
+ * Useful for recovery or initial sync
1901
+ */
1902
+ async rebuildResource(resourceId2) {
1903
+ const graphDb = this.ensureInitialized();
1904
+ console.log(`[GraphDBConsumer] Rebuilding resource ${resourceId2} from events`);
1905
+ try {
1906
+ await graphDb.deleteResource(toResourceUri({ baseUrl: this.config.services.backend.publicURL }, makeResourceId(resourceId2)));
1907
+ } catch (error) {
1908
+ console.log(`[GraphDBConsumer] No existing resource to delete: ${resourceId2}`);
1909
+ }
1910
+ const query = new EventQuery(this.eventStore.log.storage);
1911
+ const events = await query.getResourceEvents(resourceId2);
1912
+ for (const storedEvent of events) {
1913
+ await this.applyEventToGraph(storedEvent);
1914
+ }
1915
+ console.log(`[GraphDBConsumer] Rebuilt ${resourceId2} from ${events.length} events`);
1916
+ }
1917
+ /**
1918
+ * Rebuild entire GraphDB from all events
1919
+ * Uses two-pass approach to ensure all resources exist before creating REFERENCES edges
1920
+ */
1921
+ async rebuildAll() {
1922
+ const graphDb = this.ensureInitialized();
1923
+ console.log("[GraphDBConsumer] Rebuilding entire GraphDB from events...");
1924
+ console.log("[GraphDBConsumer] Using two-pass approach: nodes first, then edges\n");
1925
+ await graphDb.clearDatabase();
1926
+ const query = new EventQuery(this.eventStore.log.storage);
1927
+ const allResourceIds = await this.eventStore.log.getAllResourceIds();
1928
+ console.log(`[GraphDBConsumer] Found ${allResourceIds.length} resources to rebuild`);
1929
+ console.log("\n[GraphDBConsumer] === PASS 1: Creating all nodes (resources + annotations) ===");
1930
+ for (const resourceId2 of allResourceIds) {
1931
+ const events = await query.getResourceEvents(makeResourceId(resourceId2));
1932
+ for (const storedEvent of events) {
1933
+ if (storedEvent.event.type === "annotation.body.updated") {
1934
+ continue;
1935
+ }
1936
+ await this.applyEventToGraph(storedEvent);
1937
+ }
1938
+ }
1939
+ console.log("[GraphDBConsumer] \u2705 Pass 1 complete - all nodes created\n");
1940
+ console.log("[GraphDBConsumer] === PASS 2: Creating all REFERENCES edges ===");
1941
+ for (const resourceId2 of allResourceIds) {
1942
+ const events = await query.getResourceEvents(makeResourceId(resourceId2));
1943
+ for (const storedEvent of events) {
1944
+ if (storedEvent.event.type === "annotation.body.updated") {
1945
+ await this.applyEventToGraph(storedEvent);
1946
+ }
1947
+ }
1948
+ }
1949
+ console.log("[GraphDBConsumer] \u2705 Pass 2 complete - all edges created\n");
1950
+ console.log("[GraphDBConsumer] Rebuild complete");
1951
+ }
1952
+ /**
1953
+ * Get consumer health metrics
1954
+ */
1955
+ getHealthMetrics() {
1956
+ return {
1957
+ subscriptions: this.subscriptions.size,
1958
+ lastProcessed: Object.fromEntries(this.lastProcessed),
1959
+ processing: Array.from(this.processing.keys())
1960
+ };
1961
+ }
1962
+ /**
1963
+ * Unsubscribe from resource
1964
+ */
1965
+ async unsubscribeFromResource(resourceId2) {
1966
+ const subscription = this.subscriptions.get(resourceId2);
1967
+ if (subscription) {
1968
+ subscription.unsubscribe();
1969
+ this.subscriptions.delete(resourceId2);
1970
+ console.log(`[GraphDBConsumer] Unsubscribed from ${resourceId2}`);
1971
+ }
1972
+ }
1973
+ /**
1974
+ * Unsubscribe from all resources
1975
+ */
1976
+ async unsubscribeAll() {
1977
+ for (const [_resourceId, subscription] of this.subscriptions) {
1978
+ subscription.unsubscribe();
1979
+ }
1980
+ this.subscriptions.clear();
1981
+ console.log("[GraphDBConsumer] Unsubscribed from all resources");
1982
+ }
1983
+ /**
1984
+ * Shutdown consumer
1985
+ */
1986
+ async shutdown() {
1987
+ await this.unsubscribeAll();
1988
+ if (this._globalSubscription) {
1989
+ this._globalSubscription.unsubscribe();
1990
+ this._globalSubscription = null;
1991
+ console.log("[GraphDBConsumer] Unsubscribed from global events");
1992
+ }
1993
+ console.log("[GraphDBConsumer] Shut down");
1994
+ }
1995
+ };
1996
+
1997
+ // src/service.ts
1998
+ async function startMakeMeaning(config) {
1999
+ console.log("\u{1F9E0} Starting Make-Meaning service...");
2000
+ const configuredPath = config.services?.filesystem?.path;
2001
+ if (!configuredPath) {
2002
+ throw new Error("services.filesystem.path is required for make-meaning service");
2003
+ }
2004
+ const baseUrl = config.services?.backend?.publicURL;
2005
+ if (!baseUrl) {
2006
+ throw new Error("services.backend.publicURL is required for make-meaning service");
2007
+ }
2008
+ const projectRoot = config._metadata?.projectRoot;
2009
+ let basePath;
2010
+ if (path.isAbsolute(configuredPath)) {
2011
+ basePath = configuredPath;
2012
+ } else if (projectRoot) {
2013
+ basePath = path.resolve(projectRoot, configuredPath);
2014
+ } else {
2015
+ basePath = path.resolve(configuredPath);
2016
+ }
2017
+ console.log("\u{1F4BC} Initializing job queue...");
2018
+ const jobQueue = new JobQueue({ dataDir: basePath });
2019
+ await jobQueue.initialize();
2020
+ console.log("\u2705 Job queue initialized");
2021
+ console.log("\u{1F4CA} Creating event store connection...");
2022
+ const eventStore = createEventStoreCore(basePath, baseUrl);
2023
+ console.log("\u{1F4E6} Creating representation store...");
2024
+ const repStore = new FilesystemRepresentationStore3({ basePath }, projectRoot);
2025
+ console.log("\u2705 Representation store created");
2026
+ console.log("\u{1F916} Creating inference client...");
2027
+ const inferenceClient = await getInferenceClient(config);
2028
+ console.log("\u2705 Inference client created");
2029
+ console.log("\u{1F4CA} Connecting to graph database...");
2030
+ const graphDb = await getGraphDatabase(config);
2031
+ console.log("\u2705 Graph database connected");
2032
+ console.log("\u{1F504} Starting graph consumer...");
2033
+ const graphConsumer = new GraphDBConsumer(config, eventStore, graphDb);
2034
+ await graphConsumer.initialize();
2035
+ const allResourceIds = await eventStore.log.getAllResourceIds();
2036
+ console.log(`[GraphDBConsumer] Subscribing to ${allResourceIds.length} resources`);
2037
+ for (const resourceId2 of allResourceIds) {
2038
+ await graphConsumer.subscribeToResource(makeResourceId2(resourceId2));
2039
+ }
2040
+ console.log("\u2705 Graph consumer started");
2041
+ console.log("\u{1F477} Creating workers...");
2042
+ const workers = {
2043
+ detection: new ReferenceDetectionWorker(jobQueue, config, eventStore, inferenceClient),
2044
+ generation: new GenerationWorker(jobQueue, config, eventStore, inferenceClient),
2045
+ highlight: new HighlightDetectionWorker(jobQueue, config, eventStore, inferenceClient),
2046
+ assessment: new AssessmentDetectionWorker(jobQueue, config, eventStore, inferenceClient),
2047
+ comment: new CommentDetectionWorker(jobQueue, config, eventStore, inferenceClient),
2048
+ tag: new TagDetectionWorker(jobQueue, config, eventStore, inferenceClient)
2049
+ };
2050
+ console.log("\u{1F680} Starting workers...");
2051
+ workers.detection.start().catch((error) => {
2052
+ console.error("\u26A0\uFE0F Detection worker stopped:", error);
2053
+ });
2054
+ workers.generation.start().catch((error) => {
2055
+ console.error("\u26A0\uFE0F Generation worker stopped:", error);
2056
+ });
2057
+ workers.highlight.start().catch((error) => {
2058
+ console.error("\u26A0\uFE0F Highlight worker stopped:", error);
2059
+ });
2060
+ workers.assessment.start().catch((error) => {
2061
+ console.error("\u26A0\uFE0F Assessment worker stopped:", error);
2062
+ });
2063
+ workers.comment.start().catch((error) => {
2064
+ console.error("\u26A0\uFE0F Comment worker stopped:", error);
2065
+ });
2066
+ workers.tag.start().catch((error) => {
2067
+ console.error("\u26A0\uFE0F Tag worker stopped:", error);
2068
+ });
2069
+ console.log("\u2705 All workers started");
2070
+ console.log("\u2705 Make-Meaning service started");
2071
+ return {
2072
+ jobQueue,
2073
+ eventStore,
2074
+ repStore,
2075
+ inferenceClient,
2076
+ graphDb,
2077
+ workers,
2078
+ graphConsumer,
2079
+ stop: async () => {
2080
+ console.log("\u23F9\uFE0F Stopping Make-Meaning service...");
2081
+ await Promise.all([
2082
+ workers.detection.stop(),
2083
+ workers.generation.stop(),
2084
+ workers.highlight.stop(),
2085
+ workers.assessment.stop(),
2086
+ workers.comment.stop(),
2087
+ workers.tag.stop()
2088
+ ]);
2089
+ await graphConsumer.stop();
2090
+ await graphDb.disconnect();
2091
+ console.log("\u2705 Make-Meaning service stopped");
2092
+ }
2093
+ };
2094
+ }
2095
+
2096
+ // src/resource-context.ts
2097
+ import { FilesystemViewStorage } from "@semiont/event-sourcing";
2098
+ import { FilesystemRepresentationStore as FilesystemRepresentationStore4 } from "@semiont/content";
2099
+ import { getPrimaryRepresentation as getPrimaryRepresentation2, decodeRepresentation as decodeRepresentation2 } from "@semiont/api-client";
2100
+ var ResourceContext = class {
2101
+ /**
2102
+ * Get resource metadata from view storage
2103
+ */
2104
+ static async getResourceMetadata(resourceId2, config) {
2105
+ const basePath = config.services.filesystem.path;
2106
+ const projectRoot = config._metadata?.projectRoot;
2107
+ const viewStorage = new FilesystemViewStorage(basePath, projectRoot);
2108
+ const view = await viewStorage.get(resourceId2);
2109
+ if (!view) {
2110
+ return null;
2111
+ }
2112
+ return view.resource;
2113
+ }
2114
+ /**
2115
+ * List all resources by scanning view storage
2116
+ */
2117
+ static async listResources(filters, config) {
2118
+ const basePath = config.services.filesystem.path;
2119
+ const projectRoot = config._metadata?.projectRoot;
2120
+ const viewStorage = new FilesystemViewStorage(basePath, projectRoot);
2121
+ const allViews = await viewStorage.getAll();
2122
+ const resources = [];
2123
+ for (const view of allViews) {
2124
+ const doc = view.resource;
2125
+ if (filters?.archived !== void 0 && doc.archived !== filters.archived) {
2126
+ continue;
2127
+ }
2128
+ if (filters?.search) {
2129
+ const searchLower = filters.search.toLowerCase();
2130
+ if (!doc.name.toLowerCase().includes(searchLower)) {
2131
+ continue;
2132
+ }
2133
+ }
2134
+ resources.push(doc);
2135
+ }
2136
+ resources.sort((a, b) => {
2137
+ const aTime = a.dateCreated ? new Date(a.dateCreated).getTime() : 0;
2138
+ const bTime = b.dateCreated ? new Date(b.dateCreated).getTime() : 0;
2139
+ return bTime - aTime;
2140
+ });
2141
+ return resources;
2142
+ }
2143
+ /**
2144
+ * Add content previews to resources (for search results)
2145
+ * Retrieves and decodes the first 200 characters of each resource's primary representation
2146
+ */
2147
+ static async addContentPreviews(resources, config) {
2148
+ const basePath = config.services.filesystem.path;
2149
+ const projectRoot = config._metadata?.projectRoot;
2150
+ const repStore = new FilesystemRepresentationStore4({ basePath }, projectRoot);
2151
+ return await Promise.all(
2152
+ resources.map(async (doc) => {
2153
+ try {
2154
+ const primaryRep = getPrimaryRepresentation2(doc);
2155
+ if (primaryRep?.checksum && primaryRep?.mediaType) {
2156
+ const contentBuffer = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
2157
+ const contentPreview = decodeRepresentation2(contentBuffer, primaryRep.mediaType).slice(0, 200);
2158
+ return { ...doc, content: contentPreview };
2159
+ }
2160
+ return { ...doc, content: "" };
2161
+ } catch {
2162
+ return { ...doc, content: "" };
2163
+ }
2164
+ })
2165
+ );
2166
+ }
2167
+ };
2168
+
2169
+ // src/annotation-context.ts
2170
+ import { getInferenceClient as getInferenceClient2 } from "@semiont/inference";
2171
+ import {
2172
+ getBodySource,
2173
+ getTargetSource,
2174
+ getTargetSelector as getTargetSelector2,
2175
+ getResourceEntityTypes,
2176
+ getTextPositionSelector,
2177
+ getPrimaryRepresentation as getPrimaryRepresentation3,
2178
+ decodeRepresentation as decodeRepresentation3
2179
+ } from "@semiont/api-client";
2180
+ import { FilesystemRepresentationStore as FilesystemRepresentationStore5 } from "@semiont/content";
2181
+ import { FilesystemViewStorage as FilesystemViewStorage2 } from "@semiont/event-sourcing";
2182
+ import { resourceId as createResourceId, uriToResourceId } from "@semiont/core";
2183
+ import { getEntityTypes as getEntityTypes2 } from "@semiont/ontology";
2184
+ var AnnotationContext = class {
2185
+ /**
2186
+ * Build LLM context for an annotation
2187
+ *
2188
+ * @param annotationUri - Full annotation URI (e.g., http://localhost:4000/annotations/abc123)
2189
+ * @param resourceId - Source resource ID
2190
+ * @param config - Application configuration
2191
+ * @param options - Context building options
2192
+ * @returns Rich context for LLM processing
2193
+ * @throws Error if annotation or resource not found
2194
+ */
2195
+ static async buildLLMContext(annotationUri2, resourceId2, config, options = {}) {
2196
+ const {
2197
+ includeSourceContext = true,
2198
+ includeTargetContext = true,
2199
+ contextWindow = 1e3
2200
+ } = options;
2201
+ if (contextWindow < 100 || contextWindow > 5e3) {
2202
+ throw new Error("contextWindow must be between 100 and 5000");
2203
+ }
2204
+ console.log(`[AnnotationContext] buildLLMContext called with annotationUri=${annotationUri2}, resourceId=${resourceId2}`);
2205
+ const basePath = config.services.filesystem.path;
2206
+ console.log(`[AnnotationContext] basePath=${basePath}`);
2207
+ const projectRoot = config._metadata?.projectRoot;
2208
+ const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
2209
+ const repStore = new FilesystemRepresentationStore5({ basePath }, projectRoot);
2210
+ console.log(`[AnnotationContext] Getting view for resourceId=${resourceId2}`);
2211
+ let sourceView;
2212
+ try {
2213
+ sourceView = await viewStorage.get(resourceId2);
2214
+ console.log(`[AnnotationContext] Got view:`, !!sourceView);
2215
+ if (!sourceView) {
2216
+ throw new Error("Source resource not found");
2217
+ }
2218
+ } catch (error) {
2219
+ console.error(`[AnnotationContext] Error getting view:`, error);
2220
+ throw error;
2221
+ }
2222
+ console.log(`[AnnotationContext] Looking for annotation ${annotationUri2} in resource ${resourceId2}`);
2223
+ console.log(`[AnnotationContext] View has ${sourceView.annotations.annotations.length} annotations`);
2224
+ console.log(`[AnnotationContext] First 5 annotation IDs:`, sourceView.annotations.annotations.slice(0, 5).map((a) => a.id));
2225
+ const annotation = sourceView.annotations.annotations.find((a) => a.id === annotationUri2);
2226
+ console.log(`[AnnotationContext] Found annotation:`, !!annotation);
2227
+ if (!annotation) {
2228
+ throw new Error("Annotation not found in view");
2229
+ }
2230
+ const targetSource = getTargetSource(annotation.target);
2231
+ const targetResourceId = targetSource.split("/").pop();
2232
+ console.log(`[AnnotationContext] Target source: ${targetSource}, Expected resource ID: ${resourceId2}, Extracted ID: ${targetResourceId}`);
2233
+ if (targetResourceId !== resourceId2) {
2234
+ throw new Error(`Annotation target resource ID (${targetResourceId}) does not match expected resource ID (${resourceId2})`);
2235
+ }
2236
+ const sourceDoc = sourceView.resource;
2237
+ const bodySource = getBodySource(annotation.body);
2238
+ let targetDoc = null;
2239
+ if (bodySource) {
2240
+ const parts = bodySource.split("/");
2241
+ const lastPart = parts[parts.length - 1];
2242
+ if (!lastPart) {
2243
+ throw new Error(`Invalid body source URI: ${bodySource}`);
2244
+ }
2245
+ const targetResourceId2 = createResourceId(lastPart);
2246
+ const targetView = await viewStorage.get(targetResourceId2);
2247
+ targetDoc = targetView?.resource || null;
2248
+ }
2249
+ let sourceContext;
2250
+ if (includeSourceContext) {
2251
+ const primaryRep = getPrimaryRepresentation3(sourceDoc);
2252
+ if (!primaryRep?.checksum || !primaryRep?.mediaType) {
2253
+ throw new Error("Source content not found");
2254
+ }
2255
+ const sourceContent = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
2256
+ const contentStr = decodeRepresentation3(sourceContent, primaryRep.mediaType);
2257
+ const targetSelectorRaw = getTargetSelector2(annotation.target);
2258
+ const targetSelector = Array.isArray(targetSelectorRaw) ? targetSelectorRaw[0] : targetSelectorRaw;
2259
+ console.log(`[AnnotationContext] Target selector type:`, targetSelector?.type);
2260
+ if (!targetSelector) {
2261
+ console.warn(`[AnnotationContext] No target selector found`);
2262
+ } else if (targetSelector.type === "TextPositionSelector") {
2263
+ const selector = targetSelector;
2264
+ const start = selector.start;
2265
+ const end = selector.end;
2266
+ const before = contentStr.slice(Math.max(0, start - contextWindow), start);
2267
+ const selected = contentStr.slice(start, end);
2268
+ const after = contentStr.slice(end, Math.min(contentStr.length, end + contextWindow));
2269
+ sourceContext = { before, selected, after };
2270
+ console.log(`[AnnotationContext] Built source context using TextPositionSelector (${start}-${end})`);
2271
+ } else if (targetSelector.type === "TextQuoteSelector") {
2272
+ const selector = targetSelector;
2273
+ const exact = selector.exact;
2274
+ const index = contentStr.indexOf(exact);
2275
+ if (index !== -1) {
2276
+ const start = index;
2277
+ const end = index + exact.length;
2278
+ const before = contentStr.slice(Math.max(0, start - contextWindow), start);
2279
+ const selected = exact;
2280
+ const after = contentStr.slice(end, Math.min(contentStr.length, end + contextWindow));
2281
+ sourceContext = { before, selected, after };
2282
+ console.log(`[AnnotationContext] Built source context using TextQuoteSelector (found at ${index})`);
2283
+ } else {
2284
+ console.warn(`[AnnotationContext] TextQuoteSelector exact text not found in content: "${exact.substring(0, 50)}..."`);
2285
+ }
2286
+ } else {
2287
+ console.warn(`[AnnotationContext] Unknown selector type: ${targetSelector.type}`);
2288
+ }
2289
+ }
2290
+ let targetContext;
2291
+ if (includeTargetContext && targetDoc) {
2292
+ const targetRep = getPrimaryRepresentation3(targetDoc);
2293
+ if (targetRep?.checksum && targetRep?.mediaType) {
2294
+ const targetContent = await repStore.retrieve(targetRep.checksum, targetRep.mediaType);
2295
+ const contentStr = decodeRepresentation3(targetContent, targetRep.mediaType);
2296
+ const client = await getInferenceClient2(config);
2297
+ targetContext = {
2298
+ content: contentStr.slice(0, contextWindow * 2),
2299
+ summary: await generateResourceSummary(targetDoc.name, contentStr, getResourceEntityTypes(targetDoc), client)
2300
+ };
2301
+ }
2302
+ }
2303
+ const suggestedResolution = void 0;
2304
+ const generationContext = sourceContext ? {
2305
+ sourceContext: {
2306
+ before: sourceContext.before || "",
2307
+ selected: sourceContext.selected,
2308
+ after: sourceContext.after || ""
2309
+ },
2310
+ metadata: {
2311
+ resourceType: "document",
2312
+ language: sourceDoc.language,
2313
+ entityTypes: getEntityTypes2(annotation)
2314
+ }
2315
+ } : void 0;
2316
+ const response = {
2317
+ annotation,
2318
+ sourceResource: sourceDoc,
2319
+ targetResource: targetDoc,
2320
+ ...generationContext ? { context: generationContext } : {},
2321
+ ...sourceContext ? { sourceContext } : {},
2322
+ // Keep for backward compatibility
2323
+ ...targetContext ? { targetContext } : {},
2324
+ ...suggestedResolution ? { suggestedResolution } : {}
2325
+ };
2326
+ return response;
2327
+ }
2328
+ /**
2329
+ * Get resource annotations from view storage (fast path)
2330
+ * Throws if view missing
2331
+ */
2332
+ static async getResourceAnnotations(resourceId2, config) {
2333
+ if (!config.services?.filesystem?.path) {
2334
+ throw new Error("Filesystem path not found in configuration");
1100
2335
  }
1101
- this.isFirstProgress = true;
1102
- await this.processAssessmentDetectionJob(job);
2336
+ const basePath = config.services.filesystem.path;
2337
+ const projectRoot = config._metadata?.projectRoot;
2338
+ const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
2339
+ const view = await viewStorage.get(resourceId2);
2340
+ if (!view) {
2341
+ throw new Error(`Resource ${resourceId2} not found in view storage`);
2342
+ }
2343
+ return view.annotations;
1103
2344
  }
1104
2345
  /**
1105
- * Override updateJobProgress to emit events to Event Store
2346
+ * Get all annotations
2347
+ * @returns Array of all annotation objects
1106
2348
  */
1107
- async updateJobProgress(job) {
1108
- await super.updateJobProgress(job);
1109
- if (job.type !== "assessment-detection") return;
1110
- const assJob = job;
1111
- if (!assJob.progress) return;
1112
- const baseEvent = {
1113
- resourceId: assJob.resourceId,
1114
- userId: assJob.userId,
1115
- version: 1
1116
- };
1117
- const isComplete = assJob.progress.percentage === 100 && assJob.result;
1118
- if (this.isFirstProgress) {
1119
- this.isFirstProgress = false;
1120
- await this.eventStore.appendEvent({
1121
- type: "job.started",
1122
- ...baseEvent,
1123
- payload: {
1124
- jobId: assJob.id,
1125
- jobType: assJob.type
1126
- }
1127
- });
1128
- } else if (isComplete) {
1129
- await this.eventStore.appendEvent({
1130
- type: "job.completed",
1131
- ...baseEvent,
1132
- payload: {
1133
- jobId: assJob.id,
1134
- jobType: assJob.type,
1135
- result: assJob.result
1136
- }
1137
- });
1138
- } else {
1139
- await this.eventStore.appendEvent({
1140
- type: "job.progress",
1141
- ...baseEvent,
1142
- payload: {
1143
- jobId: assJob.id,
1144
- jobType: assJob.type,
1145
- progress: assJob.progress
1146
- }
1147
- });
1148
- }
2349
+ static async getAllAnnotations(resourceId2, config) {
2350
+ const annotations = await this.getResourceAnnotations(resourceId2, config);
2351
+ return await this.enrichResolvedReferences(annotations.annotations, config);
1149
2352
  }
1150
- async handleJobFailure(job, error) {
1151
- await super.handleJobFailure(job, error);
1152
- if (job.status === "failed" && job.type === "assessment-detection") {
1153
- const aJob = job;
1154
- await this.eventStore.appendEvent({
1155
- type: "job.failed",
1156
- resourceId: aJob.resourceId,
1157
- userId: aJob.userId,
1158
- version: 1,
1159
- payload: {
1160
- jobId: aJob.id,
1161
- jobType: aJob.type,
1162
- error: "Assessment detection failed. Please try again later."
2353
+ /**
2354
+ * Enrich reference annotations with resolved document names
2355
+ * Adds _resolvedDocumentName property to annotations that link to documents
2356
+ * @private
2357
+ */
2358
+ static async enrichResolvedReferences(annotations, config) {
2359
+ if (!config.services?.filesystem?.path) {
2360
+ return annotations;
2361
+ }
2362
+ const resolvedUris = /* @__PURE__ */ new Set();
2363
+ for (const ann of annotations) {
2364
+ if (ann.motivation === "linking" && ann.body) {
2365
+ const body = Array.isArray(ann.body) ? ann.body : [ann.body];
2366
+ for (const item of body) {
2367
+ if (item.type === "SpecificResource" && item.purpose === "linking" && item.source) {
2368
+ resolvedUris.add(item.source);
2369
+ }
1163
2370
  }
1164
- });
2371
+ }
1165
2372
  }
1166
- }
1167
- async processAssessmentDetectionJob(job) {
1168
- console.log(`[AssessmentDetectionWorker] Processing assessment detection for resource ${job.resourceId} (job: ${job.id})`);
1169
- const resource = await ResourceContext.getResourceMetadata(job.resourceId, this.config);
1170
- if (!resource) {
1171
- throw new Error(`Resource ${job.resourceId} not found`);
2373
+ if (resolvedUris.size === 0) {
2374
+ return annotations;
1172
2375
  }
1173
- job.progress = {
1174
- stage: "analyzing",
1175
- percentage: 10,
1176
- message: "Loading resource..."
1177
- };
1178
- await this.updateJobProgress(job);
1179
- job.progress = {
1180
- stage: "analyzing",
1181
- percentage: 30,
1182
- message: "Analyzing text..."
1183
- };
1184
- await this.updateJobProgress(job);
1185
- const assessments = await AnnotationDetection.detectAssessments(
1186
- job.resourceId,
1187
- this.config,
1188
- job.instructions,
1189
- job.tone,
1190
- job.density
1191
- );
1192
- console.log(`[AssessmentDetectionWorker] Found ${assessments.length} assessments to create`);
1193
- job.progress = {
1194
- stage: "creating",
1195
- percentage: 60,
1196
- message: `Creating ${assessments.length} annotations...`
1197
- };
1198
- await this.updateJobProgress(job);
1199
- let created = 0;
1200
- for (const assessment of assessments) {
2376
+ const basePath = config.services.filesystem.path;
2377
+ const projectRoot = config._metadata?.projectRoot;
2378
+ const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
2379
+ const metadataPromises = Array.from(resolvedUris).map(async (uri) => {
2380
+ const docId = uri.split("/resources/")[1];
2381
+ if (!docId) return null;
1201
2382
  try {
1202
- await this.createAssessmentAnnotation(job.resourceId, job.userId, assessment);
1203
- created++;
1204
- } catch (error) {
1205
- console.error(`[AssessmentDetectionWorker] Failed to create assessment:`, error);
2383
+ const view = await viewStorage.get(docId);
2384
+ if (view?.resource?.name) {
2385
+ return {
2386
+ uri,
2387
+ metadata: {
2388
+ name: view.resource.name,
2389
+ mediaType: view.resource.mediaType
2390
+ }
2391
+ };
2392
+ }
2393
+ } catch (e) {
2394
+ }
2395
+ return null;
2396
+ });
2397
+ const results = await Promise.all(metadataPromises);
2398
+ const uriToMetadata = /* @__PURE__ */ new Map();
2399
+ for (const result of results) {
2400
+ if (result) {
2401
+ uriToMetadata.set(result.uri, result.metadata);
1206
2402
  }
1207
2403
  }
1208
- job.result = {
1209
- assessmentsFound: assessments.length,
1210
- assessmentsCreated: created
1211
- };
1212
- job.progress = {
1213
- stage: "creating",
1214
- percentage: 100,
1215
- message: `Complete! Created ${created} assessments`
1216
- };
1217
- await this.updateJobProgress(job);
1218
- console.log(`[AssessmentDetectionWorker] \u2705 Created ${created}/${assessments.length} assessments`);
1219
- }
1220
- async createAssessmentAnnotation(resourceId2, creatorUserId, assessment) {
1221
- const backendUrl = this.config.services.backend?.publicURL;
1222
- if (!backendUrl) throw new Error("Backend publicURL not configured");
1223
- const annotationId2 = generateAnnotationId3(backendUrl);
1224
- const resourceUri2 = resourceIdToURI4(resourceId2, backendUrl);
1225
- const annotation = {
1226
- "@context": "http://www.w3.org/ns/anno.jsonld",
1227
- "type": "Annotation",
1228
- "id": annotationId2,
1229
- "motivation": "assessing",
1230
- "creator": userId3(creatorUserId),
1231
- "created": (/* @__PURE__ */ new Date()).toISOString(),
1232
- "target": {
1233
- type: "SpecificResource",
1234
- source: resourceUri2,
1235
- selector: [
1236
- {
1237
- type: "TextPositionSelector",
1238
- start: assessment.start,
1239
- end: assessment.end
1240
- },
1241
- {
1242
- type: "TextQuoteSelector",
1243
- exact: assessment.exact,
1244
- ...assessment.prefix && { prefix: assessment.prefix },
1245
- ...assessment.suffix && { suffix: assessment.suffix }
2404
+ return annotations.map((ann) => {
2405
+ if (ann.motivation === "linking" && ann.body) {
2406
+ const body = Array.isArray(ann.body) ? ann.body : [ann.body];
2407
+ for (const item of body) {
2408
+ if (item.type === "SpecificResource" && item.purpose === "linking" && item.source) {
2409
+ const metadata = uriToMetadata.get(item.source);
2410
+ if (metadata) {
2411
+ return {
2412
+ ...ann,
2413
+ _resolvedDocumentName: metadata.name,
2414
+ _resolvedDocumentMediaType: metadata.mediaType
2415
+ };
2416
+ }
1246
2417
  }
1247
- ]
1248
- },
1249
- "body": {
1250
- type: "TextualBody",
1251
- value: assessment.assessment,
1252
- format: "text/plain"
2418
+ }
1253
2419
  }
1254
- };
1255
- await this.eventStore.appendEvent({
1256
- type: "annotation.added",
1257
- resourceId: resourceId2,
1258
- userId: userId3(creatorUserId),
1259
- version: 1,
1260
- payload: { annotation }
2420
+ return ann;
1261
2421
  });
1262
2422
  }
1263
- };
1264
-
1265
- // src/jobs/workers/tag-detection-worker.ts
1266
- import { JobWorker as JobWorker4 } from "@semiont/jobs";
1267
- import { generateAnnotationId as generateAnnotationId4 } from "@semiont/event-sourcing";
1268
- import { resourceIdToURI as resourceIdToURI5 } from "@semiont/core";
1269
- import { getTagSchema as getTagSchema2 } from "@semiont/ontology";
1270
- import { userId as userId4 } from "@semiont/core";
1271
- var TagDetectionWorker = class extends JobWorker4 {
1272
- constructor(jobQueue, config, eventStore) {
1273
- super(jobQueue);
1274
- this.config = config;
1275
- this.eventStore = eventStore;
1276
- }
1277
- isFirstProgress = true;
1278
- getWorkerName() {
1279
- return "TagDetectionWorker";
1280
- }
1281
- canProcessJob(job) {
1282
- return job.type === "tag-detection";
2423
+ /**
2424
+ * Get resource stats (version info)
2425
+ * @returns Version and timestamp info for the annotations
2426
+ */
2427
+ static async getResourceStats(resourceId2, config) {
2428
+ const annotations = await this.getResourceAnnotations(resourceId2, config);
2429
+ return {
2430
+ resourceId: annotations.resourceId,
2431
+ version: annotations.version,
2432
+ updatedAt: annotations.updatedAt
2433
+ };
1283
2434
  }
1284
- async executeJob(job) {
1285
- if (job.type !== "tag-detection") {
1286
- throw new Error(`Invalid job type: ${job.type}`);
2435
+ /**
2436
+ * Check if resource exists in view storage
2437
+ */
2438
+ static async resourceExists(resourceId2, config) {
2439
+ if (!config.services?.filesystem?.path) {
2440
+ throw new Error("Filesystem path not found in configuration");
1287
2441
  }
1288
- this.isFirstProgress = true;
1289
- await this.processTagDetectionJob(job);
2442
+ const basePath = config.services.filesystem.path;
2443
+ const projectRoot = config._metadata?.projectRoot;
2444
+ const viewStorage = new FilesystemViewStorage2(basePath, projectRoot);
2445
+ return await viewStorage.exists(resourceId2);
1290
2446
  }
1291
2447
  /**
1292
- * Override updateJobProgress to emit events to Event Store
2448
+ * Get a single annotation by ID
2449
+ * O(1) lookup using resource ID to access view storage
1293
2450
  */
1294
- async updateJobProgress(job) {
1295
- await super.updateJobProgress(job);
1296
- if (job.type !== "tag-detection") return;
1297
- const tdJob = job;
1298
- if (!tdJob.progress) return;
1299
- const baseEvent = {
1300
- resourceId: tdJob.resourceId,
1301
- userId: tdJob.userId,
1302
- version: 1
1303
- };
1304
- const isComplete = tdJob.progress.percentage === 100 && tdJob.result;
1305
- if (this.isFirstProgress) {
1306
- this.isFirstProgress = false;
1307
- await this.eventStore.appendEvent({
1308
- type: "job.started",
1309
- ...baseEvent,
1310
- payload: {
1311
- jobId: tdJob.id,
1312
- jobType: tdJob.type
1313
- }
1314
- });
1315
- } else if (isComplete) {
1316
- await this.eventStore.appendEvent({
1317
- type: "job.completed",
1318
- ...baseEvent,
1319
- payload: {
1320
- jobId: tdJob.id,
1321
- jobType: tdJob.type,
1322
- result: tdJob.result
1323
- }
1324
- });
1325
- } else {
1326
- await this.eventStore.appendEvent({
1327
- type: "job.progress",
1328
- ...baseEvent,
1329
- payload: {
1330
- jobId: tdJob.id,
1331
- jobType: tdJob.type,
1332
- progress: tdJob.progress
1333
- }
1334
- });
2451
+ static async getAnnotation(annotationId2, resourceId2, config) {
2452
+ const annotations = await this.getResourceAnnotations(resourceId2, config);
2453
+ return annotations.annotations.find((a) => {
2454
+ const shortId = a.id.split("/").pop();
2455
+ return shortId === annotationId2;
2456
+ }) || null;
2457
+ }
2458
+ /**
2459
+ * List annotations with optional filtering
2460
+ * @param filters - Optional filters like resourceId and type
2461
+ * @throws Error if resourceId not provided (cross-resource queries not supported in view storage)
2462
+ */
2463
+ static async listAnnotations(filters, config) {
2464
+ if (!filters?.resourceId) {
2465
+ throw new Error("resourceId is required for annotation listing - cross-resource queries not supported in view storage");
1335
2466
  }
2467
+ return await this.getAllAnnotations(filters.resourceId, config);
1336
2468
  }
1337
- async handleJobFailure(job, error) {
1338
- await super.handleJobFailure(job, error);
1339
- if (job.status === "failed" && job.type === "tag-detection") {
1340
- const tdJob = job;
1341
- await this.eventStore.appendEvent({
1342
- type: "job.failed",
1343
- resourceId: tdJob.resourceId,
1344
- userId: tdJob.userId,
1345
- version: 1,
1346
- payload: {
1347
- jobId: tdJob.id,
1348
- jobType: tdJob.type,
1349
- error: "Tag detection failed. Please try again later."
1350
- }
1351
- });
2469
+ /**
2470
+ * Get annotation context (selected text with surrounding context)
2471
+ */
2472
+ static async getAnnotationContext(annotationId2, resourceId2, contextBefore, contextAfter, config) {
2473
+ const basePath = config.services.filesystem.path;
2474
+ const projectRoot = config._metadata?.projectRoot;
2475
+ const repStore = new FilesystemRepresentationStore5({ basePath }, projectRoot);
2476
+ const annotation = await this.getAnnotation(annotationId2, resourceId2, config);
2477
+ if (!annotation) {
2478
+ throw new Error("Annotation not found");
1352
2479
  }
1353
- }
1354
- async processTagDetectionJob(job) {
1355
- console.log(`[TagDetectionWorker] Processing tag detection for resource ${job.resourceId} (job: ${job.id})`);
1356
- const schema = getTagSchema2(job.schemaId);
1357
- if (!schema) {
1358
- throw new Error(`Invalid tag schema: ${job.schemaId}`);
2480
+ const resource = await ResourceContext.getResourceMetadata(
2481
+ uriToResourceId(getTargetSource(annotation.target)),
2482
+ config
2483
+ );
2484
+ if (!resource) {
2485
+ throw new Error("Resource not found");
1359
2486
  }
1360
- for (const category of job.categories) {
1361
- if (!schema.tags.some((t) => t.name === category)) {
1362
- throw new Error(`Invalid category "${category}" for schema ${job.schemaId}`);
2487
+ const contentStr = await this.getResourceContent(resource, repStore);
2488
+ const context = this.extractAnnotationContext(annotation, contentStr, contextBefore, contextAfter);
2489
+ return {
2490
+ annotation,
2491
+ context,
2492
+ resource: {
2493
+ "@context": resource["@context"],
2494
+ "@id": resource["@id"],
2495
+ name: resource.name,
2496
+ entityTypes: resource.entityTypes,
2497
+ representations: resource.representations,
2498
+ archived: resource.archived,
2499
+ creationMethod: resource.creationMethod,
2500
+ wasAttributedTo: resource.wasAttributedTo,
2501
+ dateCreated: resource.dateCreated
1363
2502
  }
2503
+ };
2504
+ }
2505
+ /**
2506
+ * Generate AI summary of annotation in context
2507
+ */
2508
+ static async generateAnnotationSummary(annotationId2, resourceId2, config) {
2509
+ const basePath = config.services.filesystem.path;
2510
+ const projectRoot = config._metadata?.projectRoot;
2511
+ const repStore = new FilesystemRepresentationStore5({ basePath }, projectRoot);
2512
+ const annotation = await this.getAnnotation(annotationId2, resourceId2, config);
2513
+ if (!annotation) {
2514
+ throw new Error("Annotation not found");
1364
2515
  }
1365
- const resource = await ResourceContext.getResourceMetadata(job.resourceId, this.config);
2516
+ const resource = await ResourceContext.getResourceMetadata(
2517
+ uriToResourceId(getTargetSource(annotation.target)),
2518
+ config
2519
+ );
1366
2520
  if (!resource) {
1367
- throw new Error(`Resource ${job.resourceId} not found`);
1368
- }
1369
- job.progress = {
1370
- stage: "analyzing",
1371
- percentage: 10,
1372
- processedCategories: 0,
1373
- totalCategories: job.categories.length,
1374
- message: "Loading resource..."
1375
- };
1376
- await this.updateJobProgress(job);
1377
- const allTags = [];
1378
- const byCategory = {};
1379
- for (let i = 0; i < job.categories.length; i++) {
1380
- const category = job.categories[i];
1381
- job.progress = {
1382
- stage: "analyzing",
1383
- percentage: 10 + Math.floor(i / job.categories.length * 50),
1384
- currentCategory: category,
1385
- processedCategories: i + 1,
1386
- totalCategories: job.categories.length,
1387
- message: `Analyzing ${category}...`
1388
- };
1389
- await this.updateJobProgress(job);
1390
- const tags = await AnnotationDetection.detectTags(
1391
- job.resourceId,
1392
- this.config,
1393
- job.schemaId,
1394
- category
1395
- );
1396
- console.log(`[TagDetectionWorker] Found ${tags.length} tags for category "${category}"`);
1397
- allTags.push(...tags);
1398
- byCategory[category] = tags.length;
2521
+ throw new Error("Resource not found");
1399
2522
  }
1400
- job.progress = {
1401
- stage: "creating",
1402
- percentage: 60,
1403
- processedCategories: job.categories.length,
1404
- totalCategories: job.categories.length,
1405
- message: `Creating ${allTags.length} tag annotations...`
1406
- };
1407
- await this.updateJobProgress(job);
1408
- let created = 0;
1409
- for (const tag of allTags) {
1410
- try {
1411
- await this.createTagAnnotation(job.resourceId, job.userId, job.schemaId, tag);
1412
- created++;
1413
- } catch (error) {
1414
- console.error(`[TagDetectionWorker] Failed to create tag:`, error);
2523
+ const contentStr = await this.getResourceContent(resource, repStore);
2524
+ const contextSize = 500;
2525
+ const context = this.extractAnnotationContext(annotation, contentStr, contextSize, contextSize);
2526
+ const annotationEntityTypes = getEntityTypes2(annotation);
2527
+ const summary = await this.generateSummary(resource, context, annotationEntityTypes, config);
2528
+ return {
2529
+ summary,
2530
+ relevantFields: {
2531
+ resourceId: resource.id,
2532
+ resourceName: resource.name,
2533
+ entityTypes: annotationEntityTypes
2534
+ },
2535
+ context: {
2536
+ before: context.before.substring(Math.max(0, context.before.length - 200)),
2537
+ // Last 200 chars
2538
+ selected: context.selected,
2539
+ after: context.after.substring(0, 200)
2540
+ // First 200 chars
1415
2541
  }
1416
- }
1417
- job.result = {
1418
- tagsFound: allTags.length,
1419
- tagsCreated: created,
1420
- byCategory
1421
2542
  };
1422
- job.progress = {
1423
- stage: "creating",
1424
- percentage: 100,
1425
- processedCategories: job.categories.length,
1426
- totalCategories: job.categories.length,
1427
- message: `Complete! Created ${created} tags`
1428
- };
1429
- await this.updateJobProgress(job);
1430
- console.log(`[TagDetectionWorker] \u2705 Created ${created}/${allTags.length} tags across ${job.categories.length} categories`);
1431
2543
  }
1432
- async createTagAnnotation(resourceId2, userId_, schemaId, tag) {
1433
- const backendUrl = this.config.services.backend?.publicURL;
1434
- if (!backendUrl) {
1435
- throw new Error("Backend publicURL not configured");
2544
+ /**
2545
+ * Get resource content as string
2546
+ */
2547
+ static async getResourceContent(resource, repStore) {
2548
+ const primaryRep = getPrimaryRepresentation3(resource);
2549
+ if (!primaryRep?.checksum || !primaryRep?.mediaType) {
2550
+ throw new Error("Resource content not found");
1436
2551
  }
1437
- const resourceUri2 = resourceIdToURI5(resourceId2, backendUrl);
1438
- const annotationId2 = generateAnnotationId4(backendUrl);
1439
- const annotation = {
1440
- "@context": "http://www.w3.org/ns/anno.jsonld",
1441
- type: "Annotation",
1442
- id: annotationId2,
1443
- motivation: "tagging",
1444
- target: {
1445
- type: "SpecificResource",
1446
- source: resourceUri2,
1447
- selector: [
1448
- {
1449
- type: "TextPositionSelector",
1450
- start: tag.start,
1451
- end: tag.end
1452
- },
1453
- {
1454
- type: "TextQuoteSelector",
1455
- exact: tag.exact,
1456
- prefix: tag.prefix || "",
1457
- suffix: tag.suffix || ""
1458
- }
1459
- ]
1460
- },
1461
- body: [
1462
- {
1463
- type: "TextualBody",
1464
- value: tag.category,
1465
- purpose: "tagging",
1466
- format: "text/plain",
1467
- language: "en"
1468
- },
1469
- {
1470
- type: "TextualBody",
1471
- value: schemaId,
1472
- purpose: "classifying",
1473
- format: "text/plain"
1474
- }
1475
- ]
2552
+ const content = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
2553
+ return decodeRepresentation3(content, primaryRep.mediaType);
2554
+ }
2555
+ /**
2556
+ * Extract annotation context from resource content
2557
+ */
2558
+ static extractAnnotationContext(annotation, contentStr, contextBefore, contextAfter) {
2559
+ const targetSelector = getTargetSelector2(annotation.target);
2560
+ const posSelector = targetSelector ? getTextPositionSelector(targetSelector) : null;
2561
+ if (!posSelector) {
2562
+ throw new Error("TextPositionSelector required for context");
2563
+ }
2564
+ const selStart = posSelector.start;
2565
+ const selEnd = posSelector.end;
2566
+ const start = Math.max(0, selStart - contextBefore);
2567
+ const end = Math.min(contentStr.length, selEnd + contextAfter);
2568
+ return {
2569
+ before: contentStr.substring(start, selStart),
2570
+ selected: contentStr.substring(selStart, selEnd),
2571
+ after: contentStr.substring(selEnd, end)
1476
2572
  };
1477
- await this.eventStore.appendEvent({
1478
- type: "annotation.added",
1479
- resourceId: resourceId2,
1480
- userId: userId4(userId_),
1481
- version: 1,
1482
- payload: {
1483
- annotation
1484
- }
1485
- });
1486
- console.log(`[TagDetectionWorker] Created tag annotation ${annotationId2} for "${tag.category}": "${tag.exact.substring(0, 50)}..."`);
2573
+ }
2574
+ /**
2575
+ * Generate LLM summary of annotation in context
2576
+ * Creates inference client per-request (HTTP handler context)
2577
+ */
2578
+ static async generateSummary(resource, context, entityTypes, config) {
2579
+ const summaryPrompt = `Summarize this text in context:
2580
+
2581
+ Context before: "${context.before.substring(Math.max(0, context.before.length - 200))}"
2582
+ Selected exact: "${context.selected}"
2583
+ Context after: "${context.after.substring(0, 200)}"
2584
+
2585
+ Resource: ${resource.name}
2586
+ Entity types: ${entityTypes.join(", ")}`;
2587
+ const client = await getInferenceClient2(config);
2588
+ return await client.generateText(summaryPrompt, 500, 0.5);
1487
2589
  }
1488
2590
  };
1489
2591
 
1490
- // src/jobs/workers/reference-detection-worker.ts
1491
- import { JobWorker as JobWorker5 } from "@semiont/jobs";
1492
- import { generateAnnotationId as generateAnnotationId5 } from "@semiont/event-sourcing";
2592
+ // src/graph-context.ts
2593
+ import { getGraphDatabase as getGraphDatabase2 } from "@semiont/graph";
1493
2594
  import { resourceIdToURI as resourceIdToURI6 } from "@semiont/core";
1494
- import {
1495
- getPrimaryRepresentation as getPrimaryRepresentation4,
1496
- decodeRepresentation as decodeRepresentation4,
1497
- validateAndCorrectOffsets
1498
- } from "@semiont/api-client";
1499
- import { extractEntities } from "@semiont/inference";
1500
- import { FilesystemRepresentationStore as FilesystemRepresentationStore4 } from "@semiont/content";
1501
- var ReferenceDetectionWorker = class extends JobWorker5 {
1502
- constructor(jobQueue, config, eventStore) {
1503
- super(jobQueue);
1504
- this.config = config;
1505
- this.eventStore = eventStore;
2595
+ var GraphContext = class {
2596
+ /**
2597
+ * Get all resources referencing this resource (backlinks)
2598
+ * Requires graph traversal - must use graph database
2599
+ */
2600
+ static async getBacklinks(resourceId2, config) {
2601
+ const graphDb = await getGraphDatabase2(config);
2602
+ const resourceUri3 = resourceIdToURI6(resourceId2, config.services.backend.publicURL);
2603
+ return await graphDb.getResourceReferencedBy(resourceUri3);
1506
2604
  }
1507
- getWorkerName() {
1508
- return "ReferenceDetectionWorker";
2605
+ /**
2606
+ * Find shortest path between two resources
2607
+ * Requires graph traversal - must use graph database
2608
+ */
2609
+ static async findPath(fromResourceId, toResourceId, config, maxDepth) {
2610
+ const graphDb = await getGraphDatabase2(config);
2611
+ return await graphDb.findPath(fromResourceId, toResourceId, maxDepth);
1509
2612
  }
1510
- canProcessJob(job) {
1511
- return job.type === "detection";
2613
+ /**
2614
+ * Get resource connections (graph edges)
2615
+ * Requires graph traversal - must use graph database
2616
+ */
2617
+ static async getResourceConnections(resourceId2, config) {
2618
+ const graphDb = await getGraphDatabase2(config);
2619
+ return await graphDb.getResourceConnections(resourceId2);
2620
+ }
2621
+ /**
2622
+ * Search resources by name (cross-resource query)
2623
+ * Requires full-text search - must use graph database
2624
+ */
2625
+ static async searchResources(query, config, limit) {
2626
+ const graphDb = await getGraphDatabase2(config);
2627
+ return await graphDb.searchResources(query, limit);
2628
+ }
2629
+ };
2630
+
2631
+ // src/annotation-detection.ts
2632
+ import { FilesystemRepresentationStore as FilesystemRepresentationStore6 } from "@semiont/content";
2633
+ import { getPrimaryRepresentation as getPrimaryRepresentation4, decodeRepresentation as decodeRepresentation4 } from "@semiont/api-client";
2634
+
2635
+ // src/detection/motivation-prompts.ts
2636
+ var MotivationPrompts = class {
2637
+ /**
2638
+ * Build a prompt for detecting comment-worthy passages
2639
+ *
2640
+ * @param content - The text content to analyze (will be truncated to 8000 chars)
2641
+ * @param instructions - Optional user-provided instructions
2642
+ * @param tone - Optional tone guidance (e.g., "academic", "conversational")
2643
+ * @param density - Optional target number of comments per 2000 words
2644
+ * @returns Formatted prompt string
2645
+ */
2646
+ static buildCommentPrompt(content, instructions, tone, density) {
2647
+ let prompt;
2648
+ if (instructions) {
2649
+ const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
2650
+ const densityGuidance = density ? `
2651
+
2652
+ Aim for approximately ${density} comments per 2000 words of text.` : "";
2653
+ prompt = `Add comments to passages in this text following these instructions:
2654
+
2655
+ ${instructions}${toneGuidance}${densityGuidance}
2656
+
2657
+ Text to analyze:
2658
+ ---
2659
+ ${content.substring(0, 8e3)}
2660
+ ---
2661
+
2662
+ Return a JSON array of comments. Each comment must have:
2663
+ - "exact": the exact text passage being commented on (quoted verbatim from source)
2664
+ - "start": character offset where the passage starts
2665
+ - "end": character offset where the passage ends
2666
+ - "prefix": up to 32 characters of text immediately before the passage
2667
+ - "suffix": up to 32 characters of text immediately after the passage
2668
+ - "comment": your comment following the instructions above
2669
+
2670
+ Return ONLY a valid JSON array, no additional text or explanation.
2671
+
2672
+ Example:
2673
+ [
2674
+ {"exact": "the quarterly review meeting", "start": 142, "end": 169, "prefix": "We need to schedule ", "suffix": " for next month.", "comment": "Who will lead this? Should we invite the external auditors?"}
2675
+ ]`;
2676
+ } else {
2677
+ const toneGuidance = tone ? `
2678
+
2679
+ Tone: Use a ${tone} style in your comments.` : "";
2680
+ const densityGuidance = density ? `
2681
+ - Aim for approximately ${density} comments per 2000 words` : `
2682
+ - Aim for 3-8 comments per 2000 words (not too sparse or dense)`;
2683
+ prompt = `Identify passages in this text that would benefit from explanatory comments.
2684
+ For each passage, provide contextual information, clarification, or background.${toneGuidance}
2685
+
2686
+ Guidelines:
2687
+ - Select passages that reference technical terms, historical figures, complex concepts, or unclear references
2688
+ - Provide comments that ADD VALUE beyond restating the text
2689
+ - Focus on explanation, background, or connections to other ideas
2690
+ - Avoid obvious or trivial comments
2691
+ - Keep comments concise (1-3 sentences typically)${densityGuidance}
2692
+
2693
+ Text to analyze:
2694
+ ---
2695
+ ${content.substring(0, 8e3)}
2696
+ ---
2697
+
2698
+ Return a JSON array of comments. Each comment should have:
2699
+ - "exact": the exact text passage being commented on (quoted verbatim from source)
2700
+ - "start": character offset where the passage starts
2701
+ - "end": character offset where the passage ends
2702
+ - "prefix": up to 32 characters of text immediately before the passage
2703
+ - "suffix": up to 32 characters of text immediately after the passage
2704
+ - "comment": your explanatory comment (1-3 sentences, provide context/background/clarification)
2705
+
2706
+ Return ONLY a valid JSON array, no additional text or explanation.
2707
+
2708
+ Example format:
2709
+ [
2710
+ {"exact": "Ouranos", "start": 52, "end": 59, "prefix": "In the beginning, ", "suffix": " ruled the universe", "comment": "Ouranos (also spelled Uranus) is the primordial Greek deity personifying the sky. In Hesiod's Theogony, he is the son and husband of Gaia (Earth) and father of the Titans."}
2711
+ ]`;
2712
+ }
2713
+ return prompt;
2714
+ }
2715
+ /**
2716
+ * Build a prompt for detecting highlight-worthy passages
2717
+ *
2718
+ * @param content - The text content to analyze (will be truncated to 8000 chars)
2719
+ * @param instructions - Optional user-provided instructions
2720
+ * @param density - Optional target number of highlights per 2000 words
2721
+ * @returns Formatted prompt string
2722
+ */
2723
+ static buildHighlightPrompt(content, instructions, density) {
2724
+ let prompt;
2725
+ if (instructions) {
2726
+ const densityGuidance = density ? `
2727
+
2728
+ Aim for approximately ${density} highlights per 2000 words of text.` : "";
2729
+ prompt = `Identify passages in this text to highlight following these instructions:
2730
+
2731
+ ${instructions}${densityGuidance}
2732
+
2733
+ Text to analyze:
2734
+ ---
2735
+ ${content.substring(0, 8e3)}
2736
+ ---
2737
+
2738
+ Return a JSON array of highlights. Each highlight must have:
2739
+ - "exact": the exact text passage to highlight (quoted verbatim from source)
2740
+ - "start": character offset where the passage starts
2741
+ - "end": character offset where the passage ends
2742
+ - "prefix": up to 32 characters of text immediately before the passage
2743
+ - "suffix": up to 32 characters of text immediately after the passage
2744
+
2745
+ Return ONLY a valid JSON array, no additional text or explanation.
2746
+
2747
+ Example:
2748
+ [
2749
+ {"exact": "revenue grew 45% year-over-year", "start": 142, "end": 174, "prefix": "In Q3 2024, ", "suffix": ", exceeding all forecasts."}
2750
+ ]`;
2751
+ } else {
2752
+ const densityGuidance = density ? `
2753
+ - Aim for approximately ${density} highlights per 2000 words` : `
2754
+ - Aim for 3-8 highlights per 2000 words (be selective)`;
2755
+ prompt = `Identify passages in this text that merit highlighting for their importance or salience.
2756
+ Focus on content that readers should notice and remember.
2757
+
2758
+ Guidelines:
2759
+ - Highlight key claims, findings, or conclusions
2760
+ - Highlight important definitions, terminology, or concepts
2761
+ - Highlight notable quotes or particularly striking statements
2762
+ - Highlight critical decisions, action items, or turning points
2763
+ - Select passages that are SIGNIFICANT, not just interesting
2764
+ - Avoid trivial or obvious content${densityGuidance}
2765
+
2766
+ Text to analyze:
2767
+ ---
2768
+ ${content.substring(0, 8e3)}
2769
+ ---
2770
+
2771
+ Return a JSON array of highlights. Each highlight should have:
2772
+ - "exact": the exact text passage to highlight (quoted verbatim from source)
2773
+ - "start": character offset where the passage starts
2774
+ - "end": character offset where the passage ends
2775
+ - "prefix": up to 32 characters of text immediately before the passage
2776
+ - "suffix": up to 32 characters of text immediately after the passage
2777
+
2778
+ Return ONLY a valid JSON array, no additional text or explanation.
2779
+
2780
+ Example format:
2781
+ [
2782
+ {"exact": "we will discontinue support for legacy systems by March 2025", "start": 52, "end": 113, "prefix": "After careful consideration, ", "suffix": ". This decision affects"}
2783
+ ]`;
2784
+ }
2785
+ return prompt;
1512
2786
  }
1513
- async executeJob(job) {
1514
- if (job.type !== "detection") {
1515
- throw new Error(`Invalid job type: ${job.type}`);
2787
+ /**
2788
+ * Build a prompt for detecting assessment-worthy passages
2789
+ *
2790
+ * @param content - The text content to analyze (will be truncated to 8000 chars)
2791
+ * @param instructions - Optional user-provided instructions
2792
+ * @param tone - Optional tone guidance (e.g., "critical", "supportive")
2793
+ * @param density - Optional target number of assessments per 2000 words
2794
+ * @returns Formatted prompt string
2795
+ */
2796
+ static buildAssessmentPrompt(content, instructions, tone, density) {
2797
+ let prompt;
2798
+ if (instructions) {
2799
+ const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
2800
+ const densityGuidance = density ? `
2801
+
2802
+ Aim for approximately ${density} assessments per 2000 words of text.` : "";
2803
+ prompt = `Assess passages in this text following these instructions:
2804
+
2805
+ ${instructions}${toneGuidance}${densityGuidance}
2806
+
2807
+ Text to analyze:
2808
+ ---
2809
+ ${content.substring(0, 8e3)}
2810
+ ---
2811
+
2812
+ Return a JSON array of assessments. Each assessment must have:
2813
+ - "exact": the exact text passage being assessed (quoted verbatim from source)
2814
+ - "start": character offset where the passage starts
2815
+ - "end": character offset where the passage ends
2816
+ - "prefix": up to 32 characters of text immediately before the passage
2817
+ - "suffix": up to 32 characters of text immediately after the passage
2818
+ - "assessment": your assessment following the instructions above
2819
+
2820
+ Return ONLY a valid JSON array, no additional text or explanation.
2821
+
2822
+ Example:
2823
+ [
2824
+ {"exact": "the quarterly revenue target", "start": 142, "end": 169, "prefix": "We established ", "suffix": " for Q4 2024.", "assessment": "This target seems ambitious given market conditions. Consider revising based on recent trends."}
2825
+ ]`;
2826
+ } else {
2827
+ const toneGuidance = tone ? `
2828
+
2829
+ Tone: Use a ${tone} style in your assessments.` : "";
2830
+ const densityGuidance = density ? `
2831
+ - Aim for approximately ${density} assessments per 2000 words` : `
2832
+ - Aim for 2-6 assessments per 2000 words (focus on key passages)`;
2833
+ prompt = `Identify passages in this text that merit critical assessment or evaluation.
2834
+ For each passage, provide analysis of its validity, strength, or implications.${toneGuidance}
2835
+
2836
+ Guidelines:
2837
+ - Select passages containing claims, arguments, conclusions, or assertions
2838
+ - Assess evidence quality, logical soundness, or practical implications
2839
+ - Provide assessments that ADD INSIGHT beyond restating the text
2840
+ - Focus on passages where evaluation would help readers form judgments
2841
+ - Keep assessments concise yet substantive (1-3 sentences typically)${densityGuidance}
2842
+
2843
+ Text to analyze:
2844
+ ---
2845
+ ${content.substring(0, 8e3)}
2846
+ ---
2847
+
2848
+ Return a JSON array of assessments. Each assessment should have:
2849
+ - "exact": the exact text passage being assessed (quoted verbatim from source)
2850
+ - "start": character offset where the passage starts
2851
+ - "end": character offset where the passage ends
2852
+ - "prefix": up to 32 characters of text immediately before the passage
2853
+ - "suffix": up to 32 characters of text immediately after the passage
2854
+ - "assessment": your analytical assessment (1-3 sentences, evaluate validity/strength/implications)
2855
+
2856
+ Return ONLY a valid JSON array, no additional text or explanation.
2857
+
2858
+ Example format:
2859
+ [
2860
+ {"exact": "AI will replace most jobs by 2030", "start": 52, "end": 89, "prefix": "Many experts predict that ", "suffix": ", fundamentally reshaping", "assessment": "This claim lacks nuance and supporting evidence. Employment patterns historically show job transformation rather than wholesale replacement. The timeline appears speculative without specific sector analysis."}
2861
+ ]`;
1516
2862
  }
1517
- await this.processDetectionJob(job);
2863
+ return prompt;
1518
2864
  }
1519
2865
  /**
1520
- * Detect entity references in resource using AI
1521
- * Self-contained implementation for reference detection
2866
+ * Build a prompt for detecting structural tags
1522
2867
  *
1523
- * Public for testing charset handling - see entity-detection-charset.test.ts
2868
+ * @param content - The full text content to analyze (NOT truncated for structural analysis)
2869
+ * @param category - The specific category to detect
2870
+ * @param schemaName - Human-readable schema name
2871
+ * @param schemaDescription - Schema description
2872
+ * @param schemaDomain - Schema domain
2873
+ * @param categoryDescription - Category description
2874
+ * @param categoryExamples - Example questions/guidance for this category
2875
+ * @returns Formatted prompt string
1524
2876
  */
1525
- async detectReferences(resource, entityTypes, includeDescriptiveReferences = false) {
1526
- console.log(`Detecting entities of types: ${entityTypes.join(", ")}${includeDescriptiveReferences ? " (including descriptive references)" : ""}`);
1527
- const detectedAnnotations = [];
1528
- const primaryRep = getPrimaryRepresentation4(resource);
1529
- if (!primaryRep) return detectedAnnotations;
1530
- const mediaType = primaryRep.mediaType;
1531
- const baseMediaType = mediaType?.split(";")[0]?.trim() || "";
1532
- if (baseMediaType === "text/plain" || baseMediaType === "text/markdown") {
1533
- if (!primaryRep.checksum || !primaryRep.mediaType) return detectedAnnotations;
1534
- const basePath = this.config.services.filesystem.path;
1535
- const projectRoot = this.config._metadata?.projectRoot;
1536
- const repStore = new FilesystemRepresentationStore4({ basePath }, projectRoot);
1537
- const contentBuffer = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
1538
- const content = decodeRepresentation4(contentBuffer, primaryRep.mediaType);
1539
- const extractedEntities = await extractEntities(content, entityTypes, this.config, includeDescriptiveReferences);
1540
- for (const entity of extractedEntities) {
2877
+ static buildTagPrompt(content, category, schemaName, schemaDescription, schemaDomain, categoryDescription, categoryExamples) {
2878
+ const prompt = `You are analyzing a text using the ${schemaName} framework.
2879
+
2880
+ Schema: ${schemaDescription}
2881
+ Domain: ${schemaDomain}
2882
+
2883
+ Your task: Identify passages that serve the structural role of "${category}".
2884
+
2885
+ Category: ${category}
2886
+ Description: ${categoryDescription}
2887
+ Key questions:
2888
+ ${categoryExamples.map((ex) => `- ${ex}`).join("\n")}
2889
+
2890
+ Guidelines:
2891
+ - Focus on STRUCTURAL FUNCTION, not semantic content
2892
+ - A passage serves the "${category}" role if it performs this function in the document's structure
2893
+ - Look for passages that explicitly fulfill this role
2894
+ - Passages can be sentences, paragraphs, or sections
2895
+ - Aim for precision - only tag passages that clearly serve this structural role
2896
+ - Typical documents have 1-5 instances of each category (some may have 0)
2897
+
2898
+ Text to analyze:
2899
+ ---
2900
+ ${content}
2901
+ ---
2902
+
2903
+ Return a JSON array of tags. Each tag should have:
2904
+ - "exact": the exact text passage (quoted verbatim from source)
2905
+ - "start": character offset where the passage starts
2906
+ - "end": character offset where the passage ends
2907
+ - "prefix": up to 32 characters of text immediately before the passage
2908
+ - "suffix": up to 32 characters of text immediately after the passage
2909
+
2910
+ Return ONLY a valid JSON array, no additional text or explanation.
2911
+
2912
+ Example format:
2913
+ [
2914
+ {"exact": "What duty did the defendant owe?", "start": 142, "end": 175, "prefix": "The central question is: ", "suffix": " This question must be"},
2915
+ {"exact": "In tort law, a duty of care is established when...", "start": 412, "end": 520, "prefix": "Legal framework:\\n", "suffix": "\\n\\nApplying this standard"}
2916
+ ]`;
2917
+ return prompt;
2918
+ }
2919
+ };
2920
+
2921
+ // src/detection/motivation-parsers.ts
2922
+ import { validateAndCorrectOffsets as validateAndCorrectOffsets2 } from "@semiont/api-client";
2923
+ var MotivationParsers = class {
2924
+ /**
2925
+ * Parse and validate AI response for comment detection
2926
+ *
2927
+ * @param response - Raw AI response string (may include markdown code fences)
2928
+ * @param content - Original content to validate offsets against
2929
+ * @returns Array of validated comment matches
2930
+ */
2931
+ static parseComments(response, content) {
2932
+ try {
2933
+ let cleaned = response.trim();
2934
+ if (cleaned.startsWith("```")) {
2935
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
2936
+ }
2937
+ const parsed = JSON.parse(cleaned);
2938
+ if (!Array.isArray(parsed)) {
2939
+ console.warn("[MotivationParsers] Comment response is not an array");
2940
+ return [];
2941
+ }
2942
+ const valid = parsed.filter(
2943
+ (c) => c && typeof c.exact === "string" && typeof c.start === "number" && typeof c.end === "number" && typeof c.comment === "string" && c.comment.trim().length > 0
2944
+ );
2945
+ console.log(`[MotivationParsers] Parsed ${valid.length} valid comments from ${parsed.length} total`);
2946
+ const validatedComments = [];
2947
+ for (const comment of valid) {
1541
2948
  try {
1542
- const validated = validateAndCorrectOffsets(
1543
- content,
1544
- entity.startOffset,
1545
- entity.endOffset,
1546
- entity.exact
1547
- );
1548
- const annotation = {
1549
- annotation: {
1550
- selector: {
1551
- start: validated.start,
1552
- end: validated.end,
1553
- exact: validated.exact,
1554
- prefix: validated.prefix,
1555
- suffix: validated.suffix
1556
- },
1557
- entityTypes: [entity.entityType]
1558
- }
1559
- };
1560
- detectedAnnotations.push(annotation);
2949
+ const validated = validateAndCorrectOffsets2(content, comment.start, comment.end, comment.exact);
2950
+ validatedComments.push({
2951
+ ...comment,
2952
+ start: validated.start,
2953
+ end: validated.end,
2954
+ prefix: validated.prefix,
2955
+ suffix: validated.suffix
2956
+ });
1561
2957
  } catch (error) {
1562
- console.warn(`[ReferenceDetectionWorker] Skipping invalid entity "${entity.exact}":`, error);
2958
+ console.warn(`[MotivationParsers] Skipping invalid comment "${comment.exact}":`, error);
1563
2959
  }
1564
2960
  }
2961
+ return validatedComments;
2962
+ } catch (error) {
2963
+ console.error("[MotivationParsers] Failed to parse AI comment response:", error);
2964
+ return [];
1565
2965
  }
1566
- return detectedAnnotations;
1567
2966
  }
1568
- async processDetectionJob(job) {
1569
- console.log(`[ReferenceDetectionWorker] Processing detection for resource ${job.resourceId} (job: ${job.id})`);
1570
- console.log(`[ReferenceDetectionWorker] \u{1F50D} Entity types: ${job.entityTypes.join(", ")}`);
1571
- const resource = await ResourceContext.getResourceMetadata(job.resourceId, this.config);
1572
- if (!resource) {
1573
- throw new Error(`Resource ${job.resourceId} not found`);
1574
- }
1575
- let totalFound = 0;
1576
- let totalEmitted = 0;
1577
- let totalErrors = 0;
1578
- job.progress = {
1579
- totalEntityTypes: job.entityTypes.length,
1580
- processedEntityTypes: 0,
1581
- entitiesFound: 0,
1582
- entitiesEmitted: 0
1583
- };
1584
- await this.updateJobProgress(job);
1585
- for (let i = 0; i < job.entityTypes.length; i++) {
1586
- const entityType = job.entityTypes[i];
1587
- if (!entityType) continue;
1588
- console.log(`[ReferenceDetectionWorker] \u{1F916} [${i + 1}/${job.entityTypes.length}] Detecting ${entityType}...`);
1589
- const detectedAnnotations = await this.detectReferences(resource, [entityType], job.includeDescriptiveReferences);
1590
- totalFound += detectedAnnotations.length;
1591
- console.log(`[ReferenceDetectionWorker] \u2705 Found ${detectedAnnotations.length} ${entityType} entities`);
1592
- for (let idx = 0; idx < detectedAnnotations.length; idx++) {
1593
- const detected = detectedAnnotations[idx];
1594
- if (!detected) {
1595
- console.warn(`[ReferenceDetectionWorker] Skipping undefined entity at index ${idx}`);
1596
- continue;
1597
- }
1598
- let referenceId;
1599
- try {
1600
- const backendUrl = this.config.services.backend?.publicURL;
1601
- if (!backendUrl) {
1602
- throw new Error("Backend publicURL not configured");
1603
- }
1604
- referenceId = generateAnnotationId5(backendUrl);
1605
- } catch (error) {
1606
- console.error(`[ReferenceDetectionWorker] Failed to generate annotation ID:`, error);
1607
- job.status = "failed";
1608
- job.error = "Configuration error: Backend publicURL not set";
1609
- await this.updateJobProgress(job);
1610
- return;
2967
+ /**
2968
+ * Parse and validate AI response for highlight detection
2969
+ *
2970
+ * @param response - Raw AI response string (may include markdown code fences)
2971
+ * @param content - Original content to validate offsets against
2972
+ * @returns Array of validated highlight matches
2973
+ */
2974
+ static parseHighlights(response, content) {
2975
+ try {
2976
+ let cleaned = response.trim();
2977
+ if (cleaned.startsWith("```json") || cleaned.startsWith("```")) {
2978
+ cleaned = cleaned.slice(cleaned.indexOf("\n") + 1);
2979
+ const endIndex = cleaned.lastIndexOf("```");
2980
+ if (endIndex !== -1) {
2981
+ cleaned = cleaned.slice(0, endIndex);
1611
2982
  }
2983
+ }
2984
+ const parsed = JSON.parse(cleaned);
2985
+ if (!Array.isArray(parsed)) {
2986
+ console.warn("[MotivationParsers] Highlight response was not an array");
2987
+ return [];
2988
+ }
2989
+ const highlights = parsed.filter(
2990
+ (h) => h && typeof h.exact === "string" && typeof h.start === "number" && typeof h.end === "number"
2991
+ );
2992
+ const validatedHighlights = [];
2993
+ for (const highlight of highlights) {
1612
2994
  try {
1613
- await this.eventStore.appendEvent({
1614
- type: "annotation.added",
1615
- resourceId: job.resourceId,
1616
- userId: job.userId,
1617
- version: 1,
1618
- payload: {
1619
- annotation: {
1620
- "@context": "http://www.w3.org/ns/anno.jsonld",
1621
- "type": "Annotation",
1622
- id: referenceId,
1623
- motivation: "linking",
1624
- target: {
1625
- source: resourceIdToURI6(job.resourceId, this.config.services.backend.publicURL),
1626
- // Convert to full URI
1627
- selector: [
1628
- {
1629
- type: "TextPositionSelector",
1630
- start: detected.annotation.selector.start,
1631
- end: detected.annotation.selector.end
1632
- },
1633
- {
1634
- type: "TextQuoteSelector",
1635
- exact: detected.annotation.selector.exact,
1636
- ...detected.annotation.selector.prefix && { prefix: detected.annotation.selector.prefix },
1637
- ...detected.annotation.selector.suffix && { suffix: detected.annotation.selector.suffix }
1638
- }
1639
- ]
1640
- },
1641
- body: (detected.annotation.entityTypes || []).map((et) => ({
1642
- type: "TextualBody",
1643
- value: et,
1644
- purpose: "tagging"
1645
- })),
1646
- modified: (/* @__PURE__ */ new Date()).toISOString()
1647
- }
1648
- }
2995
+ const validated = validateAndCorrectOffsets2(content, highlight.start, highlight.end, highlight.exact);
2996
+ validatedHighlights.push({
2997
+ ...highlight,
2998
+ start: validated.start,
2999
+ end: validated.end,
3000
+ prefix: validated.prefix,
3001
+ suffix: validated.suffix
1649
3002
  });
1650
- totalEmitted++;
1651
- if ((idx + 1) % 10 === 0 || idx === detectedAnnotations.length - 1) {
1652
- console.log(`[ReferenceDetectionWorker] \u{1F4E4} Emitted ${idx + 1}/${detectedAnnotations.length} events for ${entityType}`);
1653
- }
1654
3003
  } catch (error) {
1655
- totalErrors++;
1656
- console.error(`[ReferenceDetectionWorker] \u274C Failed to emit event for ${referenceId}:`, error);
3004
+ console.warn(`[MotivationParsers] Skipping invalid highlight "${highlight.exact}":`, error);
1657
3005
  }
1658
3006
  }
1659
- console.log(`[ReferenceDetectionWorker] \u2705 Completed ${entityType}: ${detectedAnnotations.length} found, ${detectedAnnotations.length - (totalErrors - (totalFound - totalEmitted))} emitted`);
1660
- job.progress = {
1661
- totalEntityTypes: job.entityTypes.length,
1662
- processedEntityTypes: i + 1,
1663
- currentEntityType: entityType,
1664
- entitiesFound: totalFound,
1665
- entitiesEmitted: totalEmitted
1666
- };
1667
- await this.updateJobProgress(job);
1668
- }
1669
- job.result = {
1670
- totalFound,
1671
- totalEmitted,
1672
- errors: totalErrors
1673
- };
1674
- console.log(`[ReferenceDetectionWorker] \u2705 Detection complete: ${totalFound} entities found, ${totalEmitted} events emitted, ${totalErrors} errors`);
1675
- }
1676
- async handleJobFailure(job, error) {
1677
- await super.handleJobFailure(job, error);
1678
- if (job.status === "failed" && job.type === "detection") {
1679
- const detJob = job;
1680
- await this.eventStore.appendEvent({
1681
- type: "job.failed",
1682
- resourceId: detJob.resourceId,
1683
- userId: detJob.userId,
1684
- version: 1,
1685
- payload: {
1686
- jobId: detJob.id,
1687
- jobType: detJob.type,
1688
- error: "Entity detection failed. Please try again later."
1689
- }
1690
- });
3007
+ return validatedHighlights;
3008
+ } catch (error) {
3009
+ console.error("[MotivationParsers] Failed to parse AI highlight response:", error);
3010
+ console.error("Raw response:", response);
3011
+ return [];
1691
3012
  }
1692
3013
  }
1693
3014
  /**
1694
- * Update job progress and emit events to Event Store
1695
- * Overrides base class to also emit job progress events
3015
+ * Parse and validate AI response for assessment detection
3016
+ *
3017
+ * @param response - Raw AI response string (may include markdown code fences)
3018
+ * @param content - Original content to validate offsets against
3019
+ * @returns Array of validated assessment matches
1696
3020
  */
1697
- async updateJobProgress(job) {
1698
- await super.updateJobProgress(job);
1699
- if (job.type !== "detection") {
1700
- return;
1701
- }
1702
- const detJob = job;
1703
- const baseEvent = {
1704
- resourceId: detJob.resourceId,
1705
- userId: detJob.userId,
1706
- version: 1
1707
- };
1708
- if (!detJob.progress) {
1709
- return;
1710
- }
1711
- const isFirstUpdate = detJob.progress.processedEntityTypes === 0;
1712
- const isFinalUpdate = detJob.progress.processedEntityTypes === detJob.progress.totalEntityTypes && detJob.progress.totalEntityTypes > 0;
1713
- if (isFirstUpdate) {
1714
- await this.eventStore.appendEvent({
1715
- type: "job.started",
1716
- ...baseEvent,
1717
- payload: {
1718
- jobId: detJob.id,
1719
- jobType: detJob.type,
1720
- totalSteps: detJob.entityTypes.length
1721
- }
1722
- });
1723
- } else if (isFinalUpdate) {
1724
- await this.eventStore.appendEvent({
1725
- type: "job.completed",
1726
- ...baseEvent,
1727
- payload: {
1728
- jobId: detJob.id,
1729
- jobType: detJob.type,
1730
- foundCount: detJob.progress.entitiesFound
3021
+ static parseAssessments(response, content) {
3022
+ try {
3023
+ let cleaned = response.trim();
3024
+ if (cleaned.startsWith("```json") || cleaned.startsWith("```")) {
3025
+ cleaned = cleaned.slice(cleaned.indexOf("\n") + 1);
3026
+ const endIndex = cleaned.lastIndexOf("```");
3027
+ if (endIndex !== -1) {
3028
+ cleaned = cleaned.slice(0, endIndex);
1731
3029
  }
1732
- });
1733
- } else {
1734
- const percentage = Math.round(detJob.progress.processedEntityTypes / detJob.progress.totalEntityTypes * 100);
1735
- await this.eventStore.appendEvent({
1736
- type: "job.progress",
1737
- ...baseEvent,
1738
- payload: {
1739
- jobId: detJob.id,
1740
- jobType: detJob.type,
1741
- percentage,
1742
- currentStep: detJob.progress.currentEntityType,
1743
- processedSteps: detJob.progress.processedEntityTypes,
1744
- totalSteps: detJob.progress.totalEntityTypes,
1745
- foundCount: detJob.progress.entitiesFound
3030
+ }
3031
+ const parsed = JSON.parse(cleaned);
3032
+ if (!Array.isArray(parsed)) {
3033
+ console.warn("[MotivationParsers] Assessment response was not an array");
3034
+ return [];
3035
+ }
3036
+ const assessments = parsed.filter(
3037
+ (a) => a && typeof a.exact === "string" && typeof a.start === "number" && typeof a.end === "number" && typeof a.assessment === "string"
3038
+ );
3039
+ const validatedAssessments = [];
3040
+ for (const assessment of assessments) {
3041
+ try {
3042
+ const validated = validateAndCorrectOffsets2(content, assessment.start, assessment.end, assessment.exact);
3043
+ validatedAssessments.push({
3044
+ ...assessment,
3045
+ start: validated.start,
3046
+ end: validated.end,
3047
+ prefix: validated.prefix,
3048
+ suffix: validated.suffix
3049
+ });
3050
+ } catch (error) {
3051
+ console.warn(`[MotivationParsers] Skipping invalid assessment "${assessment.exact}":`, error);
1746
3052
  }
1747
- });
3053
+ }
3054
+ return validatedAssessments;
3055
+ } catch (error) {
3056
+ console.error("[MotivationParsers] Failed to parse AI assessment response:", error);
3057
+ console.error("Raw response:", response);
3058
+ return [];
1748
3059
  }
1749
3060
  }
1750
- };
1751
-
1752
- // src/jobs/workers/generation-worker.ts
1753
- import { JobWorker as JobWorker6 } from "@semiont/jobs";
1754
- import { FilesystemRepresentationStore as FilesystemRepresentationStore5 } from "@semiont/content";
1755
- import { generateResourceFromTopic } from "@semiont/inference";
1756
- import {
1757
- getTargetSelector as getTargetSelector2,
1758
- getExactText,
1759
- resourceUri,
1760
- annotationUri
1761
- } from "@semiont/api-client";
1762
- import { getEntityTypes as getEntityTypes2 } from "@semiont/ontology";
1763
- import {
1764
- CREATION_METHODS,
1765
- generateUuid,
1766
- resourceId,
1767
- annotationId
1768
- } from "@semiont/core";
1769
- var GenerationWorker = class extends JobWorker6 {
1770
- constructor(jobQueue, config, eventStore) {
1771
- super(jobQueue);
1772
- this.config = config;
1773
- this.eventStore = eventStore;
3061
+ /**
3062
+ * Parse and validate AI response for tag detection
3063
+ * Note: Does NOT validate offsets - caller must do that with content
3064
+ *
3065
+ * @param response - Raw AI response string (may include markdown code fences)
3066
+ * @returns Array of tag matches (offsets not yet validated)
3067
+ */
3068
+ static parseTags(response) {
3069
+ try {
3070
+ let cleaned = response.trim();
3071
+ if (cleaned.startsWith("```")) {
3072
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
3073
+ }
3074
+ const parsed = JSON.parse(cleaned);
3075
+ if (!Array.isArray(parsed)) {
3076
+ console.warn("[MotivationParsers] Tag response is not an array");
3077
+ return [];
3078
+ }
3079
+ const valid = parsed.filter(
3080
+ (t) => t && typeof t.exact === "string" && typeof t.start === "number" && typeof t.end === "number" && t.exact.trim().length > 0
3081
+ );
3082
+ console.log(`[MotivationParsers] Parsed ${valid.length} valid tags from ${parsed.length} total`);
3083
+ return valid;
3084
+ } catch (error) {
3085
+ console.error("[MotivationParsers] Failed to parse AI tag response:", error);
3086
+ return [];
3087
+ }
1774
3088
  }
1775
- getWorkerName() {
1776
- return "GenerationWorker";
3089
+ /**
3090
+ * Validate tag offsets against content and add category
3091
+ * Helper for tag detection after initial parsing
3092
+ *
3093
+ * @param tags - Parsed tags without validated offsets
3094
+ * @param content - Original content to validate against
3095
+ * @param category - Category to assign to validated tags
3096
+ * @returns Array of validated tag matches
3097
+ */
3098
+ static validateTagOffsets(tags, content, category) {
3099
+ const validatedTags = [];
3100
+ for (const tag of tags) {
3101
+ try {
3102
+ const validated = validateAndCorrectOffsets2(content, tag.start, tag.end, tag.exact);
3103
+ validatedTags.push({
3104
+ ...tag,
3105
+ category,
3106
+ start: validated.start,
3107
+ end: validated.end,
3108
+ prefix: validated.prefix,
3109
+ suffix: validated.suffix
3110
+ });
3111
+ } catch (error) {
3112
+ console.warn(`[MotivationParsers] Skipping invalid tag for category "${category}":`, error);
3113
+ }
3114
+ }
3115
+ return validatedTags;
1777
3116
  }
1778
- canProcessJob(job) {
1779
- return job.type === "generation";
3117
+ };
3118
+
3119
+ // src/annotation-detection.ts
3120
+ import { getTagSchema as getTagSchema2, getSchemaCategory } from "@semiont/ontology";
3121
+ var AnnotationDetection = class {
3122
+ /**
3123
+ * Detect comments in a resource
3124
+ *
3125
+ * @param resourceId - The resource to analyze
3126
+ * @param config - Environment configuration
3127
+ * @param client - Inference client for AI operations
3128
+ * @param instructions - Optional user instructions for comment generation
3129
+ * @param tone - Optional tone guidance (e.g., "academic", "conversational")
3130
+ * @param density - Optional target number of comments per 2000 words
3131
+ * @returns Array of validated comment matches
3132
+ */
3133
+ static async detectComments(resourceId2, config, client, instructions, tone, density) {
3134
+ const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
3135
+ if (!resource) {
3136
+ throw new Error(`Resource ${resourceId2} not found`);
3137
+ }
3138
+ const content = await this.loadResourceContent(resourceId2, config);
3139
+ if (!content) {
3140
+ throw new Error(`Could not load content for resource ${resourceId2}`);
3141
+ }
3142
+ const prompt = MotivationPrompts.buildCommentPrompt(content, instructions, tone, density);
3143
+ const response = await client.generateText(
3144
+ prompt,
3145
+ 3e3,
3146
+ // maxTokens: Higher than highlights/assessments due to comment text
3147
+ 0.4
3148
+ // temperature: Slightly higher to allow creative context
3149
+ );
3150
+ return MotivationParsers.parseComments(response, content);
1780
3151
  }
1781
- async executeJob(job) {
1782
- if (job.type !== "generation") {
1783
- throw new Error(`Invalid job type: ${job.type}`);
3152
+ /**
3153
+ * Detect highlights in a resource
3154
+ *
3155
+ * @param resourceId - The resource to analyze
3156
+ * @param config - Environment configuration
3157
+ * @param client - Inference client for AI operations
3158
+ * @param instructions - Optional user instructions for highlight selection
3159
+ * @param density - Optional target number of highlights per 2000 words
3160
+ * @returns Array of validated highlight matches
3161
+ */
3162
+ static async detectHighlights(resourceId2, config, client, instructions, density) {
3163
+ const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
3164
+ if (!resource) {
3165
+ throw new Error(`Resource ${resourceId2} not found`);
1784
3166
  }
1785
- await this.processGenerationJob(job);
3167
+ const content = await this.loadResourceContent(resourceId2, config);
3168
+ if (!content) {
3169
+ throw new Error(`Could not load content for resource ${resourceId2}`);
3170
+ }
3171
+ const prompt = MotivationPrompts.buildHighlightPrompt(content, instructions, density);
3172
+ const response = await client.generateText(
3173
+ prompt,
3174
+ 2e3,
3175
+ // maxTokens: Lower than comments/assessments (no body text)
3176
+ 0.3
3177
+ // temperature: Low for consistent importance judgments
3178
+ );
3179
+ return MotivationParsers.parseHighlights(response, content);
1786
3180
  }
1787
- async processGenerationJob(job) {
1788
- console.log(`[GenerationWorker] Processing generation for reference ${job.referenceId} (job: ${job.id})`);
1789
- const basePath = this.config.services.filesystem.path;
1790
- const projectRoot = this.config._metadata?.projectRoot;
1791
- const repStore = new FilesystemRepresentationStore5({ basePath }, projectRoot);
1792
- job.progress = {
1793
- stage: "fetching",
1794
- percentage: 20,
1795
- message: "Fetching source resource..."
1796
- };
1797
- console.log(`[GenerationWorker] \u{1F4E5} ${job.progress.message}`);
1798
- await this.updateJobProgress(job);
1799
- const { FilesystemViewStorage: FilesystemViewStorage3 } = await import("@semiont/event-sourcing");
1800
- const viewStorage = new FilesystemViewStorage3(basePath, projectRoot);
1801
- const view = await viewStorage.get(job.sourceResourceId);
1802
- if (!view) {
1803
- throw new Error(`Resource ${job.sourceResourceId} not found`);
3181
+ /**
3182
+ * Detect assessments in a resource
3183
+ *
3184
+ * @param resourceId - The resource to analyze
3185
+ * @param config - Environment configuration
3186
+ * @param client - Inference client for AI operations
3187
+ * @param instructions - Optional user instructions for assessment generation
3188
+ * @param tone - Optional tone guidance (e.g., "critical", "supportive")
3189
+ * @param density - Optional target number of assessments per 2000 words
3190
+ * @returns Array of validated assessment matches
3191
+ */
3192
+ static async detectAssessments(resourceId2, config, client, instructions, tone, density) {
3193
+ const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
3194
+ if (!resource) {
3195
+ throw new Error(`Resource ${resourceId2} not found`);
1804
3196
  }
1805
- const projection = view.annotations;
1806
- const expectedAnnotationUri = `${this.config.services.backend.publicURL}/annotations/${job.referenceId}`;
1807
- const annotation = projection.annotations.find(
1808
- (a) => a.id === expectedAnnotationUri && a.motivation === "linking"
3197
+ const content = await this.loadResourceContent(resourceId2, config);
3198
+ if (!content) {
3199
+ throw new Error(`Could not load content for resource ${resourceId2}`);
3200
+ }
3201
+ const prompt = MotivationPrompts.buildAssessmentPrompt(content, instructions, tone, density);
3202
+ const response = await client.generateText(
3203
+ prompt,
3204
+ 3e3,
3205
+ // maxTokens: Higher for assessment text
3206
+ 0.3
3207
+ // temperature: Lower for analytical consistency
1809
3208
  );
1810
- if (!annotation) {
1811
- throw new Error(`Annotation ${job.referenceId} not found in resource ${job.sourceResourceId}`);
3209
+ return MotivationParsers.parseAssessments(response, content);
3210
+ }
3211
+ /**
3212
+ * Detect tags in a resource for a specific category
3213
+ *
3214
+ * @param resourceId - The resource to analyze
3215
+ * @param config - Environment configuration
3216
+ * @param client - Inference client for AI operations
3217
+ * @param schemaId - The tag schema identifier (e.g., "irac", "imrad")
3218
+ * @param category - The specific category to detect
3219
+ * @returns Array of validated tag matches
3220
+ */
3221
+ static async detectTags(resourceId2, config, client, schemaId, category) {
3222
+ const schema = getTagSchema2(schemaId);
3223
+ if (!schema) {
3224
+ throw new Error(`Invalid tag schema: ${schemaId}`);
1812
3225
  }
1813
- const sourceResource = await ResourceContext.getResourceMetadata(job.sourceResourceId, this.config);
1814
- if (!sourceResource) {
1815
- throw new Error(`Source resource ${job.sourceResourceId} not found`);
3226
+ const categoryInfo = getSchemaCategory(schemaId, category);
3227
+ if (!categoryInfo) {
3228
+ throw new Error(`Invalid category "${category}" for schema ${schemaId}`);
1816
3229
  }
1817
- const targetSelector = getTargetSelector2(annotation.target);
1818
- const resourceName = job.title || (targetSelector ? getExactText(targetSelector) : "") || "New Resource";
1819
- console.log(`[GenerationWorker] Generating resource: "${resourceName}"`);
1820
- if (!job.context) {
1821
- throw new Error("Generation context is required but was not provided in job");
3230
+ const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
3231
+ if (!resource) {
3232
+ throw new Error(`Resource ${resourceId2} not found`);
1822
3233
  }
1823
- console.log(`[GenerationWorker] Using pre-fetched context: ${job.context.sourceContext?.before?.length || 0} chars before, ${job.context.sourceContext?.selected?.length || 0} chars selected, ${job.context.sourceContext?.after?.length || 0} chars after`);
1824
- job.progress = {
1825
- stage: "generating",
1826
- percentage: 40,
1827
- message: "Creating content with AI..."
1828
- };
1829
- console.log(`[GenerationWorker] \u{1F916} ${job.progress.message}`);
1830
- await this.updateJobProgress(job);
1831
- const prompt = job.prompt || `Create a comprehensive resource about "${resourceName}"`;
1832
- const annotationEntityTypes = getEntityTypes2({ body: annotation.body });
1833
- const generatedContent = await generateResourceFromTopic(
1834
- resourceName,
1835
- job.entityTypes || annotationEntityTypes,
1836
- this.config,
3234
+ const content = await this.loadResourceContent(resourceId2, config);
3235
+ if (!content) {
3236
+ throw new Error(`Could not load content for resource ${resourceId2}`);
3237
+ }
3238
+ const prompt = MotivationPrompts.buildTagPrompt(
3239
+ content,
3240
+ category,
3241
+ schema.name,
3242
+ schema.description,
3243
+ schema.domain,
3244
+ categoryInfo.description,
3245
+ categoryInfo.examples
3246
+ );
3247
+ const response = await client.generateText(
1837
3248
  prompt,
1838
- job.language,
1839
- job.context,
1840
- // NEW - context from job (passed from modal)
1841
- job.temperature,
1842
- // NEW - from job
1843
- job.maxTokens
1844
- // NEW - from job
3249
+ 4e3,
3250
+ // maxTokens: Higher for full document analysis
3251
+ 0.2
3252
+ // temperature: Lower for structural consistency
1845
3253
  );
1846
- console.log(`[GenerationWorker] \u2705 Generated ${generatedContent.content.length} bytes of content`);
1847
- job.progress = {
1848
- stage: "generating",
1849
- percentage: 70,
1850
- message: "Content ready, creating resource..."
1851
- };
1852
- await this.updateJobProgress(job);
1853
- const rId = resourceId(generateUuid());
1854
- job.progress = {
1855
- stage: "creating",
1856
- percentage: 85,
1857
- message: "Saving resource..."
1858
- };
1859
- console.log(`[GenerationWorker] \u{1F4BE} ${job.progress.message}`);
1860
- await this.updateJobProgress(job);
1861
- const storedRep = await repStore.store(Buffer.from(generatedContent.content), {
1862
- mediaType: "text/markdown",
1863
- rel: "original"
1864
- });
1865
- console.log(`[GenerationWorker] \u2705 Saved resource representation to filesystem: ${rId}`);
1866
- await this.eventStore.appendEvent({
1867
- type: "resource.created",
1868
- resourceId: rId,
1869
- userId: job.userId,
1870
- version: 1,
1871
- payload: {
1872
- name: resourceName,
1873
- format: "text/markdown",
1874
- contentChecksum: storedRep.checksum,
1875
- creationMethod: CREATION_METHODS.GENERATED,
1876
- entityTypes: job.entityTypes || annotationEntityTypes,
1877
- language: job.language,
1878
- isDraft: true,
1879
- generatedFrom: job.referenceId,
1880
- generationPrompt: void 0
1881
- // Could be added if we track the prompt
1882
- }
1883
- });
1884
- console.log(`[GenerationWorker] Emitted resource.created event for ${rId}`);
1885
- job.progress = {
1886
- stage: "linking",
1887
- percentage: 95,
1888
- message: "Linking reference..."
1889
- };
1890
- console.log(`[GenerationWorker] \u{1F517} ${job.progress.message}`);
1891
- await this.updateJobProgress(job);
1892
- const newResourceUri = resourceUri(`${this.config.services.backend.publicURL}/resources/${rId}`);
1893
- const operations = [{
1894
- op: "add",
1895
- item: {
1896
- type: "SpecificResource",
1897
- source: newResourceUri,
1898
- purpose: "linking"
1899
- }
1900
- }];
1901
- const annotationIdSegment = job.referenceId.split("/").pop();
1902
- await this.eventStore.appendEvent({
1903
- type: "annotation.body.updated",
1904
- resourceId: job.sourceResourceId,
1905
- userId: job.userId,
1906
- version: 1,
1907
- payload: {
1908
- annotationId: annotationId(annotationIdSegment),
1909
- operations
1910
- }
1911
- });
1912
- console.log(`[GenerationWorker] \u2705 Emitted annotation.body.updated event linking ${job.referenceId} \u2192 ${rId}`);
1913
- job.result = {
1914
- resourceId: rId,
1915
- resourceName
1916
- };
1917
- job.progress = {
1918
- stage: "linking",
1919
- percentage: 100,
1920
- message: "Complete!"
1921
- };
1922
- await this.updateJobProgress(job);
1923
- console.log(`[GenerationWorker] \u2705 Generation complete: created resource ${rId}`);
3254
+ const parsedTags = MotivationParsers.parseTags(response);
3255
+ return MotivationParsers.validateTagOffsets(parsedTags, content, category);
1924
3256
  }
1925
3257
  /**
1926
- * Update job progress and emit events to Event Store
1927
- * Overrides base class to also emit job progress events
3258
+ * Load resource content from representation store
3259
+ * Helper method used by all detection methods
3260
+ *
3261
+ * @param resourceId - The resource ID to load
3262
+ * @param config - Environment configuration
3263
+ * @returns Resource content as string, or null if not available
1928
3264
  */
1929
- async updateJobProgress(job) {
1930
- await super.updateJobProgress(job);
1931
- if (job.type !== "generation") {
1932
- return;
1933
- }
1934
- const genJob = job;
1935
- const baseEvent = {
1936
- resourceId: genJob.sourceResourceId,
1937
- userId: genJob.userId,
1938
- version: 1
1939
- };
1940
- if (genJob.progress?.stage === "fetching" && genJob.progress?.percentage === 20) {
1941
- await this.eventStore.appendEvent({
1942
- type: "job.started",
1943
- ...baseEvent,
1944
- payload: {
1945
- jobId: genJob.id,
1946
- jobType: genJob.type,
1947
- totalSteps: 5
1948
- // fetching, generating, creating, linking, complete
1949
- }
1950
- });
1951
- } else if (genJob.progress?.stage === "linking" && genJob.progress?.percentage === 100) {
1952
- await this.eventStore.appendEvent({
1953
- type: "job.completed",
1954
- ...baseEvent,
1955
- payload: {
1956
- jobId: genJob.id,
1957
- jobType: genJob.type,
1958
- resultResourceId: genJob.result?.resourceId,
1959
- annotationUri: annotationUri(`${this.config.services.backend.publicURL}/annotations/${genJob.referenceId}`)
1960
- }
1961
- });
1962
- } else if (genJob.progress) {
1963
- await this.eventStore.appendEvent({
1964
- type: "job.progress",
1965
- ...baseEvent,
1966
- payload: {
1967
- jobId: genJob.id,
1968
- jobType: genJob.type,
1969
- currentStep: genJob.progress.stage,
1970
- percentage: genJob.progress.percentage,
1971
- message: genJob.progress.message
1972
- }
1973
- });
3265
+ static async loadResourceContent(resourceId2, config) {
3266
+ const resource = await ResourceContext.getResourceMetadata(resourceId2, config);
3267
+ if (!resource) return null;
3268
+ const primaryRep = getPrimaryRepresentation4(resource);
3269
+ if (!primaryRep) return null;
3270
+ const baseMediaType = primaryRep.mediaType?.split(";")[0]?.trim() || "";
3271
+ if (baseMediaType !== "text/plain" && baseMediaType !== "text/markdown") {
3272
+ return null;
1974
3273
  }
3274
+ if (!primaryRep.checksum || !primaryRep.mediaType) return null;
3275
+ const basePath = config.services.filesystem.path;
3276
+ const projectRoot = config._metadata?.projectRoot;
3277
+ const repStore = new FilesystemRepresentationStore6({ basePath }, projectRoot);
3278
+ const contentBuffer = await repStore.retrieve(primaryRep.checksum, primaryRep.mediaType);
3279
+ return decodeRepresentation4(contentBuffer, primaryRep.mediaType);
1975
3280
  }
1976
3281
  };
1977
3282
 
@@ -1985,11 +3290,19 @@ export {
1985
3290
  CommentDetectionWorker,
1986
3291
  GenerationWorker,
1987
3292
  GraphContext,
3293
+ GraphDBConsumer,
1988
3294
  HighlightDetectionWorker,
3295
+ MotivationParsers,
3296
+ MotivationPrompts,
1989
3297
  PACKAGE_NAME,
1990
3298
  ReferenceDetectionWorker,
1991
3299
  ResourceContext,
1992
3300
  TagDetectionWorker,
1993
- VERSION
3301
+ VERSION,
3302
+ extractEntities,
3303
+ generateReferenceSuggestions,
3304
+ generateResourceFromTopic,
3305
+ generateResourceSummary,
3306
+ startMakeMeaning
1994
3307
  };
1995
3308
  //# sourceMappingURL=index.js.map