@semiont/inference 0.2.28-build.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,850 @@
1
+ // src/factory.ts
2
+ import Anthropic from "@anthropic-ai/sdk";
3
+ import { getLocaleEnglishName } from "@semiont/api-client";
4
+ function getLanguageName(locale) {
5
+ return getLocaleEnglishName(locale) || locale;
6
+ }
7
+ var inferenceClient = null;
8
+ async function getInferenceClient(config) {
9
+ if (inferenceClient) {
10
+ return inferenceClient;
11
+ }
12
+ const inferenceConfig = config.services.inference;
13
+ if (!inferenceConfig) {
14
+ throw new Error("services.inference is required in environment config");
15
+ }
16
+ let apiKey = inferenceConfig.apiKey;
17
+ if (apiKey?.startsWith("${") && apiKey.endsWith("}")) {
18
+ const envVarName = apiKey.slice(2, -1);
19
+ const envValue = process.env[envVarName];
20
+ if (!envValue) {
21
+ throw new Error(`Environment variable ${envVarName} is not set`);
22
+ }
23
+ apiKey = envValue;
24
+ }
25
+ console.log("Inference config loaded:", {
26
+ type: inferenceConfig.type,
27
+ model: inferenceConfig.model,
28
+ endpoint: inferenceConfig.endpoint,
29
+ hasApiKey: !!apiKey
30
+ });
31
+ inferenceClient = new Anthropic({
32
+ apiKey,
33
+ baseURL: inferenceConfig.endpoint || inferenceConfig.baseURL || "https://api.anthropic.com"
34
+ });
35
+ console.log(`Initialized ${inferenceConfig.type} inference client with model ${inferenceConfig.model}`);
36
+ return inferenceClient;
37
+ }
38
+ function getInferenceModel(config) {
39
+ const inferenceConfig = config.services.inference;
40
+ if (!inferenceConfig?.model) {
41
+ throw new Error("Inference model not configured! Set it in your environment configuration.");
42
+ }
43
+ return inferenceConfig.model;
44
+ }
45
+ async function generateText(prompt, config, maxTokens = 500, temperature = 0.7) {
46
+ console.log("generateText called with prompt length:", prompt.length, "maxTokens:", maxTokens, "temp:", temperature);
47
+ const client = await getInferenceClient(config);
48
+ const response = await client.messages.create({
49
+ model: getInferenceModel(config),
50
+ max_tokens: maxTokens,
51
+ temperature,
52
+ messages: [
53
+ {
54
+ role: "user",
55
+ content: prompt
56
+ }
57
+ ]
58
+ });
59
+ console.log("Inference response received, content blocks:", response.content.length);
60
+ const textContent = response.content.find((c) => c.type === "text");
61
+ if (!textContent || textContent.type !== "text") {
62
+ console.error("No text content in response:", response.content);
63
+ throw new Error("No text content in inference response");
64
+ }
65
+ console.log("Returning text content of length:", textContent.text.length);
66
+ return textContent.text;
67
+ }
68
+ async function generateResourceFromTopic(topic, entityTypes, config, userPrompt, locale, context, temperature, maxTokens) {
69
+ console.log("generateResourceFromTopic called with:", {
70
+ topic: topic.substring(0, 100),
71
+ entityTypes,
72
+ hasUserPrompt: !!userPrompt,
73
+ locale,
74
+ hasContext: !!context,
75
+ temperature,
76
+ maxTokens
77
+ });
78
+ const inferenceConfig = config.services.inference;
79
+ const provider = inferenceConfig?.type || "anthropic";
80
+ console.log("Using provider:", provider, "with model:", inferenceConfig?.model);
81
+ const finalTemperature = temperature ?? 0.7;
82
+ const finalMaxTokens = maxTokens ?? 500;
83
+ const languageInstruction = locale && locale !== "en" ? `
84
+
85
+ IMPORTANT: Write the entire resource in ${getLanguageName(locale)}.` : "";
86
+ let contextSection = "";
87
+ if (context?.sourceContext) {
88
+ const { before, selected, after } = context.sourceContext;
89
+ contextSection = `
90
+
91
+ Source document context:
92
+ ---
93
+ ${before ? `...${before}` : ""}
94
+ **[${selected}]**
95
+ ${after ? `${after}...` : ""}
96
+ ---
97
+ `;
98
+ }
99
+ const prompt = `Generate a concise, informative resource about "${topic}".
100
+ ${entityTypes.length > 0 ? `Focus on these entity types: ${entityTypes.join(", ")}.` : ""}
101
+ ${userPrompt ? `Additional context: ${userPrompt}` : ""}${contextSection}${languageInstruction}
102
+
103
+ Requirements:
104
+ - Start with a clear heading (# Title)
105
+ - Write 2-3 paragraphs of substantive content
106
+ - Be factual and informative
107
+ - Use markdown formatting
108
+ - Return ONLY the markdown content, no JSON, no code fences, no additional wrapper`;
109
+ const parseResponse = (response2) => {
110
+ let content = response2.trim();
111
+ if (content.startsWith("```markdown") || content.startsWith("```md")) {
112
+ content = content.slice(content.indexOf("\n") + 1);
113
+ const endIndex = content.lastIndexOf("```");
114
+ if (endIndex !== -1) {
115
+ content = content.slice(0, endIndex);
116
+ }
117
+ } else if (content.startsWith("```")) {
118
+ content = content.slice(3);
119
+ const endIndex = content.lastIndexOf("```");
120
+ if (endIndex !== -1) {
121
+ content = content.slice(0, endIndex);
122
+ }
123
+ }
124
+ content = content.trim();
125
+ return {
126
+ title: topic,
127
+ content
128
+ };
129
+ };
130
+ console.log("Sending prompt to inference (length:", prompt.length, "chars)", "temp:", finalTemperature, "maxTokens:", finalMaxTokens);
131
+ const response = await generateText(prompt, config, finalMaxTokens, finalTemperature);
132
+ console.log("Got raw response (length:", response.length, "chars)");
133
+ const result = parseResponse(response);
134
+ console.log("Parsed result:", {
135
+ hasTitle: !!result.title,
136
+ titleLength: result.title?.length,
137
+ hasContent: !!result.content,
138
+ contentLength: result.content?.length
139
+ });
140
+ return result;
141
+ }
142
+ async function generateResourceSummary(resourceName, content, entityTypes, config) {
143
+ const truncatedContent = content.length > 2e3 ? content.substring(0, 2e3) + "..." : content;
144
+ const prompt = `Create a brief, intelligent summary of this resource titled "${resourceName}".
145
+ ${entityTypes.length > 0 ? `Key entity types: ${entityTypes.join(", ")}` : ""}
146
+
147
+ Resource content:
148
+ ${truncatedContent}
149
+
150
+ Write a 2-3 sentence summary that captures the key points and would help someone understand what this resource contains.`;
151
+ return await generateText(prompt, config, 150, 0.5);
152
+ }
153
+ async function generateReferenceSuggestions(referenceTitle, config, entityType, currentContent) {
154
+ const prompt = `For a reference titled "${referenceTitle}"${entityType ? ` (type: ${entityType})` : ""}${currentContent ? ` with current stub: "${currentContent}"` : ""}, suggest 3 specific, actionable next steps or related topics to explore.
155
+
156
+ Format as a simple list, one suggestion per line.`;
157
+ const response = await generateText(prompt, config, 200, 0.8);
158
+ if (!response) {
159
+ return null;
160
+ }
161
+ return response.split("\n").map((line) => line.replace(/^[-*•]\s*/, "").trim()).filter((line) => line.length > 0).slice(0, 3);
162
+ }
163
+
164
+ // src/entity-extractor.ts
165
+ async function extractEntities(exact, entityTypes, config, includeDescriptiveReferences = false) {
166
+ console.log("extractEntities called with:", {
167
+ textLength: exact.length,
168
+ entityTypes: Array.isArray(entityTypes) ? entityTypes.map((et) => typeof et === "string" ? et : et.type) : []
169
+ });
170
+ const client = await getInferenceClient(config);
171
+ const entityTypesDescription = entityTypes.map((et) => {
172
+ if (typeof et === "string") {
173
+ return et;
174
+ }
175
+ return et.examples && et.examples.length > 0 ? `${et.type} (examples: ${et.examples.slice(0, 3).join(", ")})` : et.type;
176
+ }).join(", ");
177
+ const descriptiveReferenceGuidance = includeDescriptiveReferences ? `
178
+ Include both:
179
+ - Direct mentions (names, proper nouns)
180
+ - Descriptive references (substantive phrases that refer to entities)
181
+
182
+ For descriptive references, include:
183
+ - Definite descriptions: "the Nobel laureate", "the tech giant", "the former president"
184
+ - Role-based references: "the CEO", "the physicist", "the author", "the owner", "the contractor"
185
+ - Epithets with context: "the Cupertino-based company", "the iPhone maker"
186
+ - References to entities even when identity is unknown or unspecified
187
+
188
+ Do NOT include:
189
+ - Simple pronouns alone: he, she, it, they, him, her, them
190
+ - Generic determiners alone: this, that, these, those
191
+ - Possessives without substance: his, her, their, its
192
+
193
+ Examples:
194
+ - For "Marie Curie", include "the Nobel laureate" and "the physicist" but NOT "she"
195
+ - For an unknown person, include "the owner" or "the contractor" (role-based references count even when identity is unspecified)
196
+ ` : `
197
+ Find direct mentions only (names, proper nouns). Do not include pronouns or descriptive references.
198
+ `;
199
+ const prompt = `Identify entity references in the following text. Look for mentions of: ${entityTypesDescription}.
200
+ ${descriptiveReferenceGuidance}
201
+ Text to analyze:
202
+ """
203
+ ${exact}
204
+ """
205
+
206
+ Return ONLY a JSON array of entities found. Each entity should have:
207
+ - exact: the exact text span from the input
208
+ - entityType: one of the provided entity types
209
+ - startOffset: character position where the entity starts (0-indexed)
210
+ - endOffset: character position where the entity ends
211
+ - prefix: up to 32 characters of text immediately before the entity (helps identify correct occurrence)
212
+ - suffix: up to 32 characters of text immediately after the entity (helps identify correct occurrence)
213
+
214
+ Return empty array [] if no entities found.
215
+ Do not include markdown formatting or code fences, just the raw JSON array.
216
+
217
+ Example output:
218
+ [{"exact":"Alice","entityType":"Person","startOffset":0,"endOffset":5,"prefix":"","suffix":" went to"},{"exact":"Paris","entityType":"Location","startOffset":20,"endOffset":25,"prefix":"went to ","suffix":" yesterday"}]`;
219
+ console.log("Sending entity extraction request to model:", getInferenceModel(config));
220
+ const response = await client.messages.create({
221
+ model: getInferenceModel(config),
222
+ max_tokens: 4e3,
223
+ // Increased to handle many entities without truncation
224
+ temperature: 0.3,
225
+ // Lower temperature for more consistent extraction
226
+ messages: [
227
+ {
228
+ role: "user",
229
+ content: prompt
230
+ }
231
+ ]
232
+ });
233
+ console.log("Got entity extraction response");
234
+ const textContent = response.content.find((c) => c.type === "text");
235
+ if (!textContent || textContent.type !== "text") {
236
+ console.warn("No text content in entity extraction response");
237
+ return [];
238
+ }
239
+ console.log("Entity extraction raw response length:", textContent.text.length);
240
+ try {
241
+ let jsonStr = textContent.text.trim();
242
+ if (jsonStr.startsWith("```")) {
243
+ jsonStr = jsonStr.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
244
+ }
245
+ const entities = JSON.parse(jsonStr);
246
+ console.log("Parsed", entities.length, "entities from response");
247
+ if (response.stop_reason === "max_tokens") {
248
+ const errorMsg = `AI response truncated: Found ${entities.length} entities but response hit max_tokens limit. Increase max_tokens or reduce resource size.`;
249
+ console.error(`\u274C ${errorMsg}`);
250
+ throw new Error(errorMsg);
251
+ }
252
+ return entities.map((entity, idx) => {
253
+ let startOffset = entity.startOffset;
254
+ let endOffset = entity.endOffset;
255
+ console.log(`
256
+ [Entity ${idx + 1}/${entities.length}]`);
257
+ console.log(` Type: ${entity.entityType}`);
258
+ console.log(` Text: "${entity.exact}"`);
259
+ console.log(` Offsets from AI: [${startOffset}, ${endOffset}]`);
260
+ const extractedText = exact.substring(startOffset, endOffset);
261
+ if (extractedText !== entity.exact) {
262
+ console.log(` \u26A0\uFE0F Offset mismatch!`);
263
+ console.log(` Expected: "${entity.exact}"`);
264
+ console.log(` Found at AI offsets [${startOffset}:${endOffset}]: "${extractedText}"`);
265
+ const contextStart = Math.max(0, startOffset - 50);
266
+ const contextEnd = Math.min(exact.length, endOffset + 50);
267
+ const contextBefore = exact.substring(contextStart, startOffset);
268
+ const contextAfter = exact.substring(endOffset, contextEnd);
269
+ console.log(` Context: "...${contextBefore}[${extractedText}]${contextAfter}..."`);
270
+ console.log(` Searching for exact match in resource...`);
271
+ let found = false;
272
+ if (entity.prefix || entity.suffix) {
273
+ console.log(` Using LLM-provided context for disambiguation:`);
274
+ if (entity.prefix) console.log(` Prefix: "${entity.prefix}"`);
275
+ if (entity.suffix) console.log(` Suffix: "${entity.suffix}"`);
276
+ let searchPos = 0;
277
+ while ((searchPos = exact.indexOf(entity.exact, searchPos)) !== -1) {
278
+ const candidatePrefix = exact.substring(Math.max(0, searchPos - 32), searchPos);
279
+ const candidateSuffix = exact.substring(
280
+ searchPos + entity.exact.length,
281
+ Math.min(exact.length, searchPos + entity.exact.length + 32)
282
+ );
283
+ const prefixMatch = !entity.prefix || candidatePrefix.endsWith(entity.prefix);
284
+ const suffixMatch = !entity.suffix || candidateSuffix.startsWith(entity.suffix);
285
+ if (prefixMatch && suffixMatch) {
286
+ console.log(` \u2705 Found match using context at offset ${searchPos} (diff: ${searchPos - startOffset})`);
287
+ console.log(` Candidate prefix: "${candidatePrefix}"`);
288
+ console.log(` Candidate suffix: "${candidateSuffix}"`);
289
+ startOffset = searchPos;
290
+ endOffset = searchPos + entity.exact.length;
291
+ found = true;
292
+ break;
293
+ }
294
+ searchPos++;
295
+ }
296
+ if (!found) {
297
+ console.log(` \u26A0\uFE0F No occurrence found with matching context`);
298
+ }
299
+ }
300
+ if (!found) {
301
+ const index = exact.indexOf(entity.exact);
302
+ if (index !== -1) {
303
+ console.log(` \u26A0\uFE0F Using first occurrence at offset ${index} (diff: ${index - startOffset})`);
304
+ startOffset = index;
305
+ endOffset = index + entity.exact.length;
306
+ } else {
307
+ console.log(` \u274C Cannot find "${entity.exact}" anywhere in resource`);
308
+ console.log(` Resource starts with: "${exact.substring(0, 200)}..."`);
309
+ return null;
310
+ }
311
+ }
312
+ } else {
313
+ console.log(` \u2705 Offsets correct`);
314
+ }
315
+ return {
316
+ exact: entity.exact,
317
+ entityType: entity.entityType,
318
+ startOffset,
319
+ endOffset,
320
+ prefix: entity.prefix,
321
+ suffix: entity.suffix
322
+ };
323
+ }).filter((entity) => {
324
+ if (entity === null) {
325
+ console.log("\u274C Filtered entity: null");
326
+ return false;
327
+ }
328
+ if (entity.startOffset === void 0 || entity.endOffset === void 0) {
329
+ console.log(`\u274C Filtered entity "${entity.exact}": missing offsets`);
330
+ return false;
331
+ }
332
+ if (entity.startOffset < 0) {
333
+ console.log(`\u274C Filtered entity "${entity.exact}": negative startOffset (${entity.startOffset})`);
334
+ return false;
335
+ }
336
+ if (entity.endOffset > exact.length) {
337
+ console.log(`\u274C Filtered entity "${entity.exact}": endOffset (${entity.endOffset}) > text length (${exact.length})`);
338
+ return false;
339
+ }
340
+ const extractedText = exact.substring(entity.startOffset, entity.endOffset);
341
+ if (extractedText !== entity.exact) {
342
+ console.log(`\u274C Filtered entity "${entity.exact}": offset mismatch`);
343
+ console.log(` Expected: "${entity.exact}"`);
344
+ console.log(` Got at [${entity.startOffset}:${entity.endOffset}]: "${extractedText}"`);
345
+ return false;
346
+ }
347
+ console.log(`\u2705 Accepted entity "${entity.exact}" at [${entity.startOffset}:${entity.endOffset}]`);
348
+ return true;
349
+ });
350
+ } catch (error) {
351
+ console.error("Failed to parse entity extraction response:", error);
352
+ return [];
353
+ }
354
+ }
355
+
356
+ // src/motivation-prompts.ts
357
+ var MotivationPrompts = class {
358
+ /**
359
+ * Build a prompt for detecting comment-worthy passages
360
+ *
361
+ * @param content - The text content to analyze (will be truncated to 8000 chars)
362
+ * @param instructions - Optional user-provided instructions
363
+ * @param tone - Optional tone guidance (e.g., "academic", "conversational")
364
+ * @param density - Optional target number of comments per 2000 words
365
+ * @returns Formatted prompt string
366
+ */
367
+ static buildCommentPrompt(content, instructions, tone, density) {
368
+ let prompt;
369
+ if (instructions) {
370
+ const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
371
+ const densityGuidance = density ? `
372
+
373
+ Aim for approximately ${density} comments per 2000 words of text.` : "";
374
+ prompt = `Add comments to passages in this text following these instructions:
375
+
376
+ ${instructions}${toneGuidance}${densityGuidance}
377
+
378
+ Text to analyze:
379
+ ---
380
+ ${content.substring(0, 8e3)}
381
+ ---
382
+
383
+ Return a JSON array of comments. Each comment must have:
384
+ - "exact": the exact text passage being commented on (quoted verbatim from source)
385
+ - "start": character offset where the passage starts
386
+ - "end": character offset where the passage ends
387
+ - "prefix": up to 32 characters of text immediately before the passage
388
+ - "suffix": up to 32 characters of text immediately after the passage
389
+ - "comment": your comment following the instructions above
390
+
391
+ Return ONLY a valid JSON array, no additional text or explanation.
392
+
393
+ Example:
394
+ [
395
+ {"exact": "the quarterly review meeting", "start": 142, "end": 169, "prefix": "We need to schedule ", "suffix": " for next month.", "comment": "Who will lead this? Should we invite the external auditors?"}
396
+ ]`;
397
+ } else {
398
+ const toneGuidance = tone ? `
399
+
400
+ Tone: Use a ${tone} style in your comments.` : "";
401
+ const densityGuidance = density ? `
402
+ - Aim for approximately ${density} comments per 2000 words` : `
403
+ - Aim for 3-8 comments per 2000 words (not too sparse or dense)`;
404
+ prompt = `Identify passages in this text that would benefit from explanatory comments.
405
+ For each passage, provide contextual information, clarification, or background.${toneGuidance}
406
+
407
+ Guidelines:
408
+ - Select passages that reference technical terms, historical figures, complex concepts, or unclear references
409
+ - Provide comments that ADD VALUE beyond restating the text
410
+ - Focus on explanation, background, or connections to other ideas
411
+ - Avoid obvious or trivial comments
412
+ - Keep comments concise (1-3 sentences typically)${densityGuidance}
413
+
414
+ Text to analyze:
415
+ ---
416
+ ${content.substring(0, 8e3)}
417
+ ---
418
+
419
+ Return a JSON array of comments. Each comment should have:
420
+ - "exact": the exact text passage being commented on (quoted verbatim from source)
421
+ - "start": character offset where the passage starts
422
+ - "end": character offset where the passage ends
423
+ - "prefix": up to 32 characters of text immediately before the passage
424
+ - "suffix": up to 32 characters of text immediately after the passage
425
+ - "comment": your explanatory comment (1-3 sentences, provide context/background/clarification)
426
+
427
+ Return ONLY a valid JSON array, no additional text or explanation.
428
+
429
+ Example format:
430
+ [
431
+ {"exact": "Ouranos", "start": 52, "end": 59, "prefix": "In the beginning, ", "suffix": " ruled the universe", "comment": "Ouranos (also spelled Uranus) is the primordial Greek deity personifying the sky. In Hesiod's Theogony, he is the son and husband of Gaia (Earth) and father of the Titans."}
432
+ ]`;
433
+ }
434
+ return prompt;
435
+ }
436
+ /**
437
+ * Build a prompt for detecting highlight-worthy passages
438
+ *
439
+ * @param content - The text content to analyze (will be truncated to 8000 chars)
440
+ * @param instructions - Optional user-provided instructions
441
+ * @param density - Optional target number of highlights per 2000 words
442
+ * @returns Formatted prompt string
443
+ */
444
+ static buildHighlightPrompt(content, instructions, density) {
445
+ let prompt;
446
+ if (instructions) {
447
+ const densityGuidance = density ? `
448
+
449
+ Aim for approximately ${density} highlights per 2000 words of text.` : "";
450
+ prompt = `Identify passages in this text to highlight following these instructions:
451
+
452
+ ${instructions}${densityGuidance}
453
+
454
+ Text to analyze:
455
+ ---
456
+ ${content.substring(0, 8e3)}
457
+ ---
458
+
459
+ Return a JSON array of highlights. Each highlight must have:
460
+ - "exact": the exact text passage to highlight (quoted verbatim from source)
461
+ - "start": character offset where the passage starts
462
+ - "end": character offset where the passage ends
463
+ - "prefix": up to 32 characters of text immediately before the passage
464
+ - "suffix": up to 32 characters of text immediately after the passage
465
+
466
+ Return ONLY a valid JSON array, no additional text or explanation.
467
+
468
+ Example:
469
+ [
470
+ {"exact": "revenue grew 45% year-over-year", "start": 142, "end": 174, "prefix": "In Q3 2024, ", "suffix": ", exceeding all forecasts."}
471
+ ]`;
472
+ } else {
473
+ const densityGuidance = density ? `
474
+ - Aim for approximately ${density} highlights per 2000 words` : `
475
+ - Aim for 3-8 highlights per 2000 words (be selective)`;
476
+ prompt = `Identify passages in this text that merit highlighting for their importance or salience.
477
+ Focus on content that readers should notice and remember.
478
+
479
+ Guidelines:
480
+ - Highlight key claims, findings, or conclusions
481
+ - Highlight important definitions, terminology, or concepts
482
+ - Highlight notable quotes or particularly striking statements
483
+ - Highlight critical decisions, action items, or turning points
484
+ - Select passages that are SIGNIFICANT, not just interesting
485
+ - Avoid trivial or obvious content${densityGuidance}
486
+
487
+ Text to analyze:
488
+ ---
489
+ ${content.substring(0, 8e3)}
490
+ ---
491
+
492
+ Return a JSON array of highlights. Each highlight should have:
493
+ - "exact": the exact text passage to highlight (quoted verbatim from source)
494
+ - "start": character offset where the passage starts
495
+ - "end": character offset where the passage ends
496
+ - "prefix": up to 32 characters of text immediately before the passage
497
+ - "suffix": up to 32 characters of text immediately after the passage
498
+
499
+ Return ONLY a valid JSON array, no additional text or explanation.
500
+
501
+ Example format:
502
+ [
503
+ {"exact": "we will discontinue support for legacy systems by March 2025", "start": 52, "end": 113, "prefix": "After careful consideration, ", "suffix": ". This decision affects"}
504
+ ]`;
505
+ }
506
+ return prompt;
507
+ }
508
+ /**
509
+ * Build a prompt for detecting assessment-worthy passages
510
+ *
511
+ * @param content - The text content to analyze (will be truncated to 8000 chars)
512
+ * @param instructions - Optional user-provided instructions
513
+ * @param tone - Optional tone guidance (e.g., "critical", "supportive")
514
+ * @param density - Optional target number of assessments per 2000 words
515
+ * @returns Formatted prompt string
516
+ */
517
+ static buildAssessmentPrompt(content, instructions, tone, density) {
518
+ let prompt;
519
+ if (instructions) {
520
+ const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
521
+ const densityGuidance = density ? `
522
+
523
+ Aim for approximately ${density} assessments per 2000 words of text.` : "";
524
+ prompt = `Assess passages in this text following these instructions:
525
+
526
+ ${instructions}${toneGuidance}${densityGuidance}
527
+
528
+ Text to analyze:
529
+ ---
530
+ ${content.substring(0, 8e3)}
531
+ ---
532
+
533
+ Return a JSON array of assessments. Each assessment must have:
534
+ - "exact": the exact text passage being assessed (quoted verbatim from source)
535
+ - "start": character offset where the passage starts
536
+ - "end": character offset where the passage ends
537
+ - "prefix": up to 32 characters of text immediately before the passage
538
+ - "suffix": up to 32 characters of text immediately after the passage
539
+ - "assessment": your assessment following the instructions above
540
+
541
+ Return ONLY a valid JSON array, no additional text or explanation.
542
+
543
+ Example:
544
+ [
545
+ {"exact": "the quarterly revenue target", "start": 142, "end": 169, "prefix": "We established ", "suffix": " for Q4 2024.", "assessment": "This target seems ambitious given market conditions. Consider revising based on recent trends."}
546
+ ]`;
547
+ } else {
548
+ const toneGuidance = tone ? `
549
+
550
+ Tone: Use a ${tone} style in your assessments.` : "";
551
+ const densityGuidance = density ? `
552
+ - Aim for approximately ${density} assessments per 2000 words` : `
553
+ - Aim for 2-6 assessments per 2000 words (focus on key passages)`;
554
+ prompt = `Identify passages in this text that merit critical assessment or evaluation.
555
+ For each passage, provide analysis of its validity, strength, or implications.${toneGuidance}
556
+
557
+ Guidelines:
558
+ - Select passages containing claims, arguments, conclusions, or assertions
559
+ - Assess evidence quality, logical soundness, or practical implications
560
+ - Provide assessments that ADD INSIGHT beyond restating the text
561
+ - Focus on passages where evaluation would help readers form judgments
562
+ - Keep assessments concise yet substantive (1-3 sentences typically)${densityGuidance}
563
+
564
+ Text to analyze:
565
+ ---
566
+ ${content.substring(0, 8e3)}
567
+ ---
568
+
569
+ Return a JSON array of assessments. Each assessment should have:
570
+ - "exact": the exact text passage being assessed (quoted verbatim from source)
571
+ - "start": character offset where the passage starts
572
+ - "end": character offset where the passage ends
573
+ - "prefix": up to 32 characters of text immediately before the passage
574
+ - "suffix": up to 32 characters of text immediately after the passage
575
+ - "assessment": your analytical assessment (1-3 sentences, evaluate validity/strength/implications)
576
+
577
+ Return ONLY a valid JSON array, no additional text or explanation.
578
+
579
+ Example format:
580
+ [
581
+ {"exact": "AI will replace most jobs by 2030", "start": 52, "end": 89, "prefix": "Many experts predict that ", "suffix": ", fundamentally reshaping", "assessment": "This claim lacks nuance and supporting evidence. Employment patterns historically show job transformation rather than wholesale replacement. The timeline appears speculative without specific sector analysis."}
582
+ ]`;
583
+ }
584
+ return prompt;
585
+ }
586
+ /**
587
+ * Build a prompt for detecting structural tags
588
+ *
589
+ * @param content - The full text content to analyze (NOT truncated for structural analysis)
590
+ * @param category - The specific category to detect
591
+ * @param schemaName - Human-readable schema name
592
+ * @param schemaDescription - Schema description
593
+ * @param schemaDomain - Schema domain
594
+ * @param categoryDescription - Category description
595
+ * @param categoryExamples - Example questions/guidance for this category
596
+ * @returns Formatted prompt string
597
+ */
598
+ static buildTagPrompt(content, category, schemaName, schemaDescription, schemaDomain, categoryDescription, categoryExamples) {
599
+ const prompt = `You are analyzing a text using the ${schemaName} framework.
600
+
601
+ Schema: ${schemaDescription}
602
+ Domain: ${schemaDomain}
603
+
604
+ Your task: Identify passages that serve the structural role of "${category}".
605
+
606
+ Category: ${category}
607
+ Description: ${categoryDescription}
608
+ Key questions:
609
+ ${categoryExamples.map((ex) => `- ${ex}`).join("\n")}
610
+
611
+ Guidelines:
612
+ - Focus on STRUCTURAL FUNCTION, not semantic content
613
+ - A passage serves the "${category}" role if it performs this function in the document's structure
614
+ - Look for passages that explicitly fulfill this role
615
+ - Passages can be sentences, paragraphs, or sections
616
+ - Aim for precision - only tag passages that clearly serve this structural role
617
+ - Typical documents have 1-5 instances of each category (some may have 0)
618
+
619
+ Text to analyze:
620
+ ---
621
+ ${content}
622
+ ---
623
+
624
+ Return a JSON array of tags. Each tag should have:
625
+ - "exact": the exact text passage (quoted verbatim from source)
626
+ - "start": character offset where the passage starts
627
+ - "end": character offset where the passage ends
628
+ - "prefix": up to 32 characters of text immediately before the passage
629
+ - "suffix": up to 32 characters of text immediately after the passage
630
+
631
+ Return ONLY a valid JSON array, no additional text or explanation.
632
+
633
+ Example format:
634
+ [
635
+ {"exact": "What duty did the defendant owe?", "start": 142, "end": 175, "prefix": "The central question is: ", "suffix": " This question must be"},
636
+ {"exact": "In tort law, a duty of care is established when...", "start": 412, "end": 520, "prefix": "Legal framework:\\n", "suffix": "\\n\\nApplying this standard"}
637
+ ]`;
638
+ return prompt;
639
+ }
640
+ };
641
+
642
+ // src/motivation-parsers.ts
643
+ import { validateAndCorrectOffsets } from "@semiont/api-client";
644
+ var MotivationParsers = class {
645
+ /**
646
+ * Parse and validate AI response for comment detection
647
+ *
648
+ * @param response - Raw AI response string (may include markdown code fences)
649
+ * @param content - Original content to validate offsets against
650
+ * @returns Array of validated comment matches
651
+ */
652
+ static parseComments(response, content) {
653
+ try {
654
+ let cleaned = response.trim();
655
+ if (cleaned.startsWith("```")) {
656
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
657
+ }
658
+ const parsed = JSON.parse(cleaned);
659
+ if (!Array.isArray(parsed)) {
660
+ console.warn("[MotivationParsers] Comment response is not an array");
661
+ return [];
662
+ }
663
+ const valid = parsed.filter(
664
+ (c) => c && typeof c.exact === "string" && typeof c.start === "number" && typeof c.end === "number" && typeof c.comment === "string" && c.comment.trim().length > 0
665
+ );
666
+ console.log(`[MotivationParsers] Parsed ${valid.length} valid comments from ${parsed.length} total`);
667
+ const validatedComments = [];
668
+ for (const comment of valid) {
669
+ try {
670
+ const validated = validateAndCorrectOffsets(content, comment.start, comment.end, comment.exact);
671
+ validatedComments.push({
672
+ ...comment,
673
+ start: validated.start,
674
+ end: validated.end,
675
+ prefix: validated.prefix,
676
+ suffix: validated.suffix
677
+ });
678
+ } catch (error) {
679
+ console.warn(`[MotivationParsers] Skipping invalid comment "${comment.exact}":`, error);
680
+ }
681
+ }
682
+ return validatedComments;
683
+ } catch (error) {
684
+ console.error("[MotivationParsers] Failed to parse AI comment response:", error);
685
+ return [];
686
+ }
687
+ }
688
+ /**
689
+ * Parse and validate AI response for highlight detection
690
+ *
691
+ * @param response - Raw AI response string (may include markdown code fences)
692
+ * @param content - Original content to validate offsets against
693
+ * @returns Array of validated highlight matches
694
+ */
695
+ static parseHighlights(response, content) {
696
+ try {
697
+ let cleaned = response.trim();
698
+ if (cleaned.startsWith("```json") || cleaned.startsWith("```")) {
699
+ cleaned = cleaned.slice(cleaned.indexOf("\n") + 1);
700
+ const endIndex = cleaned.lastIndexOf("```");
701
+ if (endIndex !== -1) {
702
+ cleaned = cleaned.slice(0, endIndex);
703
+ }
704
+ }
705
+ const parsed = JSON.parse(cleaned);
706
+ if (!Array.isArray(parsed)) {
707
+ console.warn("[MotivationParsers] Highlight response was not an array");
708
+ return [];
709
+ }
710
+ const highlights = parsed.filter(
711
+ (h) => h && typeof h.exact === "string" && typeof h.start === "number" && typeof h.end === "number"
712
+ );
713
+ const validatedHighlights = [];
714
+ for (const highlight of highlights) {
715
+ try {
716
+ const validated = validateAndCorrectOffsets(content, highlight.start, highlight.end, highlight.exact);
717
+ validatedHighlights.push({
718
+ ...highlight,
719
+ start: validated.start,
720
+ end: validated.end,
721
+ prefix: validated.prefix,
722
+ suffix: validated.suffix
723
+ });
724
+ } catch (error) {
725
+ console.warn(`[MotivationParsers] Skipping invalid highlight "${highlight.exact}":`, error);
726
+ }
727
+ }
728
+ return validatedHighlights;
729
+ } catch (error) {
730
+ console.error("[MotivationParsers] Failed to parse AI highlight response:", error);
731
+ console.error("Raw response:", response);
732
+ return [];
733
+ }
734
+ }
735
+ /**
736
+ * Parse and validate AI response for assessment detection
737
+ *
738
+ * @param response - Raw AI response string (may include markdown code fences)
739
+ * @param content - Original content to validate offsets against
740
+ * @returns Array of validated assessment matches
741
+ */
742
+ static parseAssessments(response, content) {
743
+ try {
744
+ let cleaned = response.trim();
745
+ if (cleaned.startsWith("```json") || cleaned.startsWith("```")) {
746
+ cleaned = cleaned.slice(cleaned.indexOf("\n") + 1);
747
+ const endIndex = cleaned.lastIndexOf("```");
748
+ if (endIndex !== -1) {
749
+ cleaned = cleaned.slice(0, endIndex);
750
+ }
751
+ }
752
+ const parsed = JSON.parse(cleaned);
753
+ if (!Array.isArray(parsed)) {
754
+ console.warn("[MotivationParsers] Assessment response was not an array");
755
+ return [];
756
+ }
757
+ const assessments = parsed.filter(
758
+ (a) => a && typeof a.exact === "string" && typeof a.start === "number" && typeof a.end === "number" && typeof a.assessment === "string"
759
+ );
760
+ const validatedAssessments = [];
761
+ for (const assessment of assessments) {
762
+ try {
763
+ const validated = validateAndCorrectOffsets(content, assessment.start, assessment.end, assessment.exact);
764
+ validatedAssessments.push({
765
+ ...assessment,
766
+ start: validated.start,
767
+ end: validated.end,
768
+ prefix: validated.prefix,
769
+ suffix: validated.suffix
770
+ });
771
+ } catch (error) {
772
+ console.warn(`[MotivationParsers] Skipping invalid assessment "${assessment.exact}":`, error);
773
+ }
774
+ }
775
+ return validatedAssessments;
776
+ } catch (error) {
777
+ console.error("[MotivationParsers] Failed to parse AI assessment response:", error);
778
+ console.error("Raw response:", response);
779
+ return [];
780
+ }
781
+ }
782
+ /**
783
+ * Parse and validate AI response for tag detection
784
+ * Note: Does NOT validate offsets - caller must do that with content
785
+ *
786
+ * @param response - Raw AI response string (may include markdown code fences)
787
+ * @returns Array of tag matches (offsets not yet validated)
788
+ */
789
+ static parseTags(response) {
790
+ try {
791
+ let cleaned = response.trim();
792
+ if (cleaned.startsWith("```")) {
793
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
794
+ }
795
+ const parsed = JSON.parse(cleaned);
796
+ if (!Array.isArray(parsed)) {
797
+ console.warn("[MotivationParsers] Tag response is not an array");
798
+ return [];
799
+ }
800
+ const valid = parsed.filter(
801
+ (t) => t && typeof t.exact === "string" && typeof t.start === "number" && typeof t.end === "number" && t.exact.trim().length > 0
802
+ );
803
+ console.log(`[MotivationParsers] Parsed ${valid.length} valid tags from ${parsed.length} total`);
804
+ return valid;
805
+ } catch (error) {
806
+ console.error("[MotivationParsers] Failed to parse AI tag response:", error);
807
+ return [];
808
+ }
809
+ }
810
+ /**
811
+ * Validate tag offsets against content and add category
812
+ * Helper for tag detection after initial parsing
813
+ *
814
+ * @param tags - Parsed tags without validated offsets
815
+ * @param content - Original content to validate against
816
+ * @param category - Category to assign to validated tags
817
+ * @returns Array of validated tag matches
818
+ */
819
+ static validateTagOffsets(tags, content, category) {
820
+ const validatedTags = [];
821
+ for (const tag of tags) {
822
+ try {
823
+ const validated = validateAndCorrectOffsets(content, tag.start, tag.end, tag.exact);
824
+ validatedTags.push({
825
+ ...tag,
826
+ category,
827
+ start: validated.start,
828
+ end: validated.end,
829
+ prefix: validated.prefix,
830
+ suffix: validated.suffix
831
+ });
832
+ } catch (error) {
833
+ console.warn(`[MotivationParsers] Skipping invalid tag for category "${category}":`, error);
834
+ }
835
+ }
836
+ return validatedTags;
837
+ }
838
+ };
839
+ export {
840
+ MotivationParsers,
841
+ MotivationPrompts,
842
+ extractEntities,
843
+ generateReferenceSuggestions,
844
+ generateResourceFromTopic,
845
+ generateResourceSummary,
846
+ generateText,
847
+ getInferenceClient,
848
+ getInferenceModel
849
+ };
850
+ //# sourceMappingURL=index.js.map