npm - @semiont/inference - Versions diffs - 0.2.28-build.40 - Mend

@semiont/inference 0.2.28-build.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js ADDED Viewed

@@ -0,0 +1,850 @@
+// src/factory.ts
+import Anthropic from "@anthropic-ai/sdk";
+import { getLocaleEnglishName } from "@semiont/api-client";
+function getLanguageName(locale) {
+  return getLocaleEnglishName(locale) || locale;
+}
+var inferenceClient = null;
+async function getInferenceClient(config) {
+  if (inferenceClient) {
+    return inferenceClient;
+  }
+  const inferenceConfig = config.services.inference;
+  if (!inferenceConfig) {
+    throw new Error("services.inference is required in environment config");
+  }
+  let apiKey = inferenceConfig.apiKey;
+  if (apiKey?.startsWith("${") && apiKey.endsWith("}")) {
+    const envVarName = apiKey.slice(2, -1);
+    const envValue = process.env[envVarName];
+    if (!envValue) {
+      throw new Error(`Environment variable ${envVarName} is not set`);
+    }
+    apiKey = envValue;
+  }
+  console.log("Inference config loaded:", {
+    type: inferenceConfig.type,
+    model: inferenceConfig.model,
+    endpoint: inferenceConfig.endpoint,
+    hasApiKey: !!apiKey
+  });
+  inferenceClient = new Anthropic({
+    apiKey,
+    baseURL: inferenceConfig.endpoint || inferenceConfig.baseURL || "https://api.anthropic.com"
+  });
+  console.log(`Initialized ${inferenceConfig.type} inference client with model ${inferenceConfig.model}`);
+  return inferenceClient;
+}
+function getInferenceModel(config) {
+  const inferenceConfig = config.services.inference;
+  if (!inferenceConfig?.model) {
+    throw new Error("Inference model not configured! Set it in your environment configuration.");
+  }
+  return inferenceConfig.model;
+}
+async function generateText(prompt, config, maxTokens = 500, temperature = 0.7) {
+  console.log("generateText called with prompt length:", prompt.length, "maxTokens:", maxTokens, "temp:", temperature);
+  const client = await getInferenceClient(config);
+  const response = await client.messages.create({
+    model: getInferenceModel(config),
+    max_tokens: maxTokens,
+    temperature,
+    messages: [
+      {
+        role: "user",
+        content: prompt
+      }
+    ]
+  });
+  console.log("Inference response received, content blocks:", response.content.length);
+  const textContent = response.content.find((c) => c.type === "text");
+  if (!textContent || textContent.type !== "text") {
+    console.error("No text content in response:", response.content);
+    throw new Error("No text content in inference response");
+  }
+  console.log("Returning text content of length:", textContent.text.length);
+  return textContent.text;
+}
+async function generateResourceFromTopic(topic, entityTypes, config, userPrompt, locale, context, temperature, maxTokens) {
+  console.log("generateResourceFromTopic called with:", {
+    topic: topic.substring(0, 100),
+    entityTypes,
+    hasUserPrompt: !!userPrompt,
+    locale,
+    hasContext: !!context,
+    temperature,
+    maxTokens
+  });
+  const inferenceConfig = config.services.inference;
+  const provider = inferenceConfig?.type || "anthropic";
+  console.log("Using provider:", provider, "with model:", inferenceConfig?.model);
+  const finalTemperature = temperature ?? 0.7;
+  const finalMaxTokens = maxTokens ?? 500;
+  const languageInstruction = locale && locale !== "en" ? `
+IMPORTANT: Write the entire resource in ${getLanguageName(locale)}.` : "";
+  let contextSection = "";
+  if (context?.sourceContext) {
+    const { before, selected, after } = context.sourceContext;
+    contextSection = `
+Source document context:
+---
+${before ? `...${before}` : ""}
+**[${selected}]**
+${after ? `${after}...` : ""}
+---
+`;
+  }
+  const prompt = `Generate a concise, informative resource about "${topic}".
+${entityTypes.length > 0 ? `Focus on these entity types: ${entityTypes.join(", ")}.` : ""}
+${userPrompt ? `Additional context: ${userPrompt}` : ""}${contextSection}${languageInstruction}
+Requirements:
+- Start with a clear heading (# Title)
+- Write 2-3 paragraphs of substantive content
+- Be factual and informative
+- Use markdown formatting
+- Return ONLY the markdown content, no JSON, no code fences, no additional wrapper`;
+  const parseResponse = (response2) => {
+    let content = response2.trim();
+    if (content.startsWith("```markdown") || content.startsWith("```md")) {
+      content = content.slice(content.indexOf("\n") + 1);
+      const endIndex = content.lastIndexOf("```");
+      if (endIndex !== -1) {
+        content = content.slice(0, endIndex);
+      }
+    } else if (content.startsWith("```")) {
+      content = content.slice(3);
+      const endIndex = content.lastIndexOf("```");
+      if (endIndex !== -1) {
+        content = content.slice(0, endIndex);
+      }
+    }
+    content = content.trim();
+    return {
+      title: topic,
+      content
+    };
+  };
+  console.log("Sending prompt to inference (length:", prompt.length, "chars)", "temp:", finalTemperature, "maxTokens:", finalMaxTokens);
+  const response = await generateText(prompt, config, finalMaxTokens, finalTemperature);
+  console.log("Got raw response (length:", response.length, "chars)");
+  const result = parseResponse(response);
+  console.log("Parsed result:", {
+    hasTitle: !!result.title,
+    titleLength: result.title?.length,
+    hasContent: !!result.content,
+    contentLength: result.content?.length
+  });
+  return result;
+}
+async function generateResourceSummary(resourceName, content, entityTypes, config) {
+  const truncatedContent = content.length > 2e3 ? content.substring(0, 2e3) + "..." : content;
+  const prompt = `Create a brief, intelligent summary of this resource titled "${resourceName}".
+${entityTypes.length > 0 ? `Key entity types: ${entityTypes.join(", ")}` : ""}
+Resource content:
+${truncatedContent}
+Write a 2-3 sentence summary that captures the key points and would help someone understand what this resource contains.`;
+  return await generateText(prompt, config, 150, 0.5);
+}
+async function generateReferenceSuggestions(referenceTitle, config, entityType, currentContent) {
+  const prompt = `For a reference titled "${referenceTitle}"${entityType ? ` (type: ${entityType})` : ""}${currentContent ? ` with current stub: "${currentContent}"` : ""}, suggest 3 specific, actionable next steps or related topics to explore.
+Format as a simple list, one suggestion per line.`;
+  const response = await generateText(prompt, config, 200, 0.8);
+  if (!response) {
+    return null;
+  }
+  return response.split("\n").map((line) => line.replace(/^[-*•]\s*/, "").trim()).filter((line) => line.length > 0).slice(0, 3);
+}
+// src/entity-extractor.ts
+async function extractEntities(exact, entityTypes, config, includeDescriptiveReferences = false) {
+  console.log("extractEntities called with:", {
+    textLength: exact.length,
+    entityTypes: Array.isArray(entityTypes) ? entityTypes.map((et) => typeof et === "string" ? et : et.type) : []
+  });
+  const client = await getInferenceClient(config);
+  const entityTypesDescription = entityTypes.map((et) => {
+    if (typeof et === "string") {
+      return et;
+    }
+    return et.examples && et.examples.length > 0 ? `${et.type} (examples: ${et.examples.slice(0, 3).join(", ")})` : et.type;
+  }).join(", ");
+  const descriptiveReferenceGuidance = includeDescriptiveReferences ? `
+Include both:
+- Direct mentions (names, proper nouns)
+- Descriptive references (substantive phrases that refer to entities)
+For descriptive references, include:
+- Definite descriptions: "the Nobel laureate", "the tech giant", "the former president"
+- Role-based references: "the CEO", "the physicist", "the author", "the owner", "the contractor"
+- Epithets with context: "the Cupertino-based company", "the iPhone maker"
+- References to entities even when identity is unknown or unspecified
+Do NOT include:
+- Simple pronouns alone: he, she, it, they, him, her, them
+- Generic determiners alone: this, that, these, those
+- Possessives without substance: his, her, their, its
+Examples:
+- For "Marie Curie", include "the Nobel laureate" and "the physicist" but NOT "she"
+- For an unknown person, include "the owner" or "the contractor" (role-based references count even when identity is unspecified)
+` : `
+Find direct mentions only (names, proper nouns). Do not include pronouns or descriptive references.
+`;
+  const prompt = `Identify entity references in the following text. Look for mentions of: ${entityTypesDescription}.
+${descriptiveReferenceGuidance}
+Text to analyze:
+"""
+${exact}
+"""
+Return ONLY a JSON array of entities found. Each entity should have:
+- exact: the exact text span from the input
+- entityType: one of the provided entity types
+- startOffset: character position where the entity starts (0-indexed)
+- endOffset: character position where the entity ends
+- prefix: up to 32 characters of text immediately before the entity (helps identify correct occurrence)
+- suffix: up to 32 characters of text immediately after the entity (helps identify correct occurrence)
+Return empty array [] if no entities found.
+Do not include markdown formatting or code fences, just the raw JSON array.
+Example output:
+[{"exact":"Alice","entityType":"Person","startOffset":0,"endOffset":5,"prefix":"","suffix":" went to"},{"exact":"Paris","entityType":"Location","startOffset":20,"endOffset":25,"prefix":"went to ","suffix":" yesterday"}]`;
+  console.log("Sending entity extraction request to model:", getInferenceModel(config));
+  const response = await client.messages.create({
+    model: getInferenceModel(config),
+    max_tokens: 4e3,
+    // Increased to handle many entities without truncation
+    temperature: 0.3,
+    // Lower temperature for more consistent extraction
+    messages: [
+      {
+        role: "user",
+        content: prompt
+      }
+    ]
+  });
+  console.log("Got entity extraction response");
+  const textContent = response.content.find((c) => c.type === "text");
+  if (!textContent || textContent.type !== "text") {
+    console.warn("No text content in entity extraction response");
+    return [];
+  }
+  console.log("Entity extraction raw response length:", textContent.text.length);
+  try {
+    let jsonStr = textContent.text.trim();
+    if (jsonStr.startsWith("```")) {
+      jsonStr = jsonStr.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+    }
+    const entities = JSON.parse(jsonStr);
+    console.log("Parsed", entities.length, "entities from response");
+    if (response.stop_reason === "max_tokens") {
+      const errorMsg = `AI response truncated: Found ${entities.length} entities but response hit max_tokens limit. Increase max_tokens or reduce resource size.`;
+      console.error(`\u274C ${errorMsg}`);
+      throw new Error(errorMsg);
+    }
+    return entities.map((entity, idx) => {
+      let startOffset = entity.startOffset;
+      let endOffset = entity.endOffset;
+      console.log(`
+[Entity ${idx + 1}/${entities.length}]`);
+      console.log(`  Type: ${entity.entityType}`);
+      console.log(`  Text: "${entity.exact}"`);
+      console.log(`  Offsets from AI: [${startOffset}, ${endOffset}]`);
+      const extractedText = exact.substring(startOffset, endOffset);
+      if (extractedText !== entity.exact) {
+        console.log(`  \u26A0\uFE0F  Offset mismatch!`);
+        console.log(`  Expected: "${entity.exact}"`);
+        console.log(`  Found at AI offsets [${startOffset}:${endOffset}]: "${extractedText}"`);
+        const contextStart = Math.max(0, startOffset - 50);
+        const contextEnd = Math.min(exact.length, endOffset + 50);
+        const contextBefore = exact.substring(contextStart, startOffset);
+        const contextAfter = exact.substring(endOffset, contextEnd);
+        console.log(`  Context: "...${contextBefore}[${extractedText}]${contextAfter}..."`);
+        console.log(`  Searching for exact match in resource...`);
+        let found = false;
+        if (entity.prefix || entity.suffix) {
+          console.log(`  Using LLM-provided context for disambiguation:`);
+          if (entity.prefix) console.log(`    Prefix: "${entity.prefix}"`);
+          if (entity.suffix) console.log(`    Suffix: "${entity.suffix}"`);
+          let searchPos = 0;
+          while ((searchPos = exact.indexOf(entity.exact, searchPos)) !== -1) {
+            const candidatePrefix = exact.substring(Math.max(0, searchPos - 32), searchPos);
+            const candidateSuffix = exact.substring(
+              searchPos + entity.exact.length,
+              Math.min(exact.length, searchPos + entity.exact.length + 32)
+            );
+            const prefixMatch = !entity.prefix || candidatePrefix.endsWith(entity.prefix);
+            const suffixMatch = !entity.suffix || candidateSuffix.startsWith(entity.suffix);
+            if (prefixMatch && suffixMatch) {
+              console.log(`  \u2705 Found match using context at offset ${searchPos} (diff: ${searchPos - startOffset})`);
+              console.log(`    Candidate prefix: "${candidatePrefix}"`);
+              console.log(`    Candidate suffix: "${candidateSuffix}"`);
+              startOffset = searchPos;
+              endOffset = searchPos + entity.exact.length;
+              found = true;
+              break;
+            }
+            searchPos++;
+          }
+          if (!found) {
+            console.log(`  \u26A0\uFE0F  No occurrence found with matching context`);
+          }
+        }
+        if (!found) {
+          const index = exact.indexOf(entity.exact);
+          if (index !== -1) {
+            console.log(`  \u26A0\uFE0F  Using first occurrence at offset ${index} (diff: ${index - startOffset})`);
+            startOffset = index;
+            endOffset = index + entity.exact.length;
+          } else {
+            console.log(`  \u274C Cannot find "${entity.exact}" anywhere in resource`);
+            console.log(`  Resource starts with: "${exact.substring(0, 200)}..."`);
+            return null;
+          }
+        }
+      } else {
+        console.log(`  \u2705 Offsets correct`);
+      }
+      return {
+        exact: entity.exact,
+        entityType: entity.entityType,
+        startOffset,
+        endOffset,
+        prefix: entity.prefix,
+        suffix: entity.suffix
+      };
+    }).filter((entity) => {
+      if (entity === null) {
+        console.log("\u274C Filtered entity: null");
+        return false;
+      }
+      if (entity.startOffset === void 0 || entity.endOffset === void 0) {
+        console.log(`\u274C Filtered entity "${entity.exact}": missing offsets`);
+        return false;
+      }
+      if (entity.startOffset < 0) {
+        console.log(`\u274C Filtered entity "${entity.exact}": negative startOffset (${entity.startOffset})`);
+        return false;
+      }
+      if (entity.endOffset > exact.length) {
+        console.log(`\u274C Filtered entity "${entity.exact}": endOffset (${entity.endOffset}) > text length (${exact.length})`);
+        return false;
+      }
+      const extractedText = exact.substring(entity.startOffset, entity.endOffset);
+      if (extractedText !== entity.exact) {
+        console.log(`\u274C Filtered entity "${entity.exact}": offset mismatch`);
+        console.log(`   Expected: "${entity.exact}"`);
+        console.log(`   Got at [${entity.startOffset}:${entity.endOffset}]: "${extractedText}"`);
+        return false;
+      }
+      console.log(`\u2705 Accepted entity "${entity.exact}" at [${entity.startOffset}:${entity.endOffset}]`);
+      return true;
+    });
+  } catch (error) {
+    console.error("Failed to parse entity extraction response:", error);
+    return [];
+  }
+}
+// src/motivation-prompts.ts
+var MotivationPrompts = class {
+  /**
+   * Build a prompt for detecting comment-worthy passages
+   *
+   * @param content - The text content to analyze (will be truncated to 8000 chars)
+   * @param instructions - Optional user-provided instructions
+   * @param tone - Optional tone guidance (e.g., "academic", "conversational")
+   * @param density - Optional target number of comments per 2000 words
+   * @returns Formatted prompt string
+   */
+  static buildCommentPrompt(content, instructions, tone, density) {
+    let prompt;
+    if (instructions) {
+      const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
+      const densityGuidance = density ? `
+Aim for approximately ${density} comments per 2000 words of text.` : "";
+      prompt = `Add comments to passages in this text following these instructions:
+${instructions}${toneGuidance}${densityGuidance}
+Text to analyze:
+---
+${content.substring(0, 8e3)}
+---
+Return a JSON array of comments. Each comment must have:
+- "exact": the exact text passage being commented on (quoted verbatim from source)
+- "start": character offset where the passage starts
+- "end": character offset where the passage ends
+- "prefix": up to 32 characters of text immediately before the passage
+- "suffix": up to 32 characters of text immediately after the passage
+- "comment": your comment following the instructions above
+Return ONLY a valid JSON array, no additional text or explanation.
+Example:
+[
+  {"exact": "the quarterly review meeting", "start": 142, "end": 169, "prefix": "We need to schedule ", "suffix": " for next month.", "comment": "Who will lead this? Should we invite the external auditors?"}
+]`;
+    } else {
+      const toneGuidance = tone ? `
+Tone: Use a ${tone} style in your comments.` : "";
+      const densityGuidance = density ? `
+- Aim for approximately ${density} comments per 2000 words` : `
+- Aim for 3-8 comments per 2000 words (not too sparse or dense)`;
+      prompt = `Identify passages in this text that would benefit from explanatory comments.
+For each passage, provide contextual information, clarification, or background.${toneGuidance}
+Guidelines:
+- Select passages that reference technical terms, historical figures, complex concepts, or unclear references
+- Provide comments that ADD VALUE beyond restating the text
+- Focus on explanation, background, or connections to other ideas
+- Avoid obvious or trivial comments
+- Keep comments concise (1-3 sentences typically)${densityGuidance}
+Text to analyze:
+---
+${content.substring(0, 8e3)}
+---
+Return a JSON array of comments. Each comment should have:
+- "exact": the exact text passage being commented on (quoted verbatim from source)
+- "start": character offset where the passage starts
+- "end": character offset where the passage ends
+- "prefix": up to 32 characters of text immediately before the passage
+- "suffix": up to 32 characters of text immediately after the passage
+- "comment": your explanatory comment (1-3 sentences, provide context/background/clarification)
+Return ONLY a valid JSON array, no additional text or explanation.
+Example format:
+[
+  {"exact": "Ouranos", "start": 52, "end": 59, "prefix": "In the beginning, ", "suffix": " ruled the universe", "comment": "Ouranos (also spelled Uranus) is the primordial Greek deity personifying the sky. In Hesiod's Theogony, he is the son and husband of Gaia (Earth) and father of the Titans."}
+]`;
+    }
+    return prompt;
+  }
+  /**
+   * Build a prompt for detecting highlight-worthy passages
+   *
+   * @param content - The text content to analyze (will be truncated to 8000 chars)
+   * @param instructions - Optional user-provided instructions
+   * @param density - Optional target number of highlights per 2000 words
+   * @returns Formatted prompt string
+   */
+  static buildHighlightPrompt(content, instructions, density) {
+    let prompt;
+    if (instructions) {
+      const densityGuidance = density ? `
+Aim for approximately ${density} highlights per 2000 words of text.` : "";
+      prompt = `Identify passages in this text to highlight following these instructions:
+${instructions}${densityGuidance}
+Text to analyze:
+---
+${content.substring(0, 8e3)}
+---
+Return a JSON array of highlights. Each highlight must have:
+- "exact": the exact text passage to highlight (quoted verbatim from source)
+- "start": character offset where the passage starts
+- "end": character offset where the passage ends
+- "prefix": up to 32 characters of text immediately before the passage
+- "suffix": up to 32 characters of text immediately after the passage
+Return ONLY a valid JSON array, no additional text or explanation.
+Example:
+[
+  {"exact": "revenue grew 45% year-over-year", "start": 142, "end": 174, "prefix": "In Q3 2024, ", "suffix": ", exceeding all forecasts."}
+]`;
+    } else {
+      const densityGuidance = density ? `
+- Aim for approximately ${density} highlights per 2000 words` : `
+- Aim for 3-8 highlights per 2000 words (be selective)`;
+      prompt = `Identify passages in this text that merit highlighting for their importance or salience.
+Focus on content that readers should notice and remember.
+Guidelines:
+- Highlight key claims, findings, or conclusions
+- Highlight important definitions, terminology, or concepts
+- Highlight notable quotes or particularly striking statements
+- Highlight critical decisions, action items, or turning points
+- Select passages that are SIGNIFICANT, not just interesting
+- Avoid trivial or obvious content${densityGuidance}
+Text to analyze:
+---
+${content.substring(0, 8e3)}
+---
+Return a JSON array of highlights. Each highlight should have:
+- "exact": the exact text passage to highlight (quoted verbatim from source)
+- "start": character offset where the passage starts
+- "end": character offset where the passage ends
+- "prefix": up to 32 characters of text immediately before the passage
+- "suffix": up to 32 characters of text immediately after the passage
+Return ONLY a valid JSON array, no additional text or explanation.
+Example format:
+[
+  {"exact": "we will discontinue support for legacy systems by March 2025", "start": 52, "end": 113, "prefix": "After careful consideration, ", "suffix": ". This decision affects"}
+]`;
+    }
+    return prompt;
+  }
+  /**
+   * Build a prompt for detecting assessment-worthy passages
+   *
+   * @param content - The text content to analyze (will be truncated to 8000 chars)
+   * @param instructions - Optional user-provided instructions
+   * @param tone - Optional tone guidance (e.g., "critical", "supportive")
+   * @param density - Optional target number of assessments per 2000 words
+   * @returns Formatted prompt string
+   */
+  static buildAssessmentPrompt(content, instructions, tone, density) {
+    let prompt;
+    if (instructions) {
+      const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
+      const densityGuidance = density ? `
+Aim for approximately ${density} assessments per 2000 words of text.` : "";
+      prompt = `Assess passages in this text following these instructions:
+${instructions}${toneGuidance}${densityGuidance}
+Text to analyze:
+---
+${content.substring(0, 8e3)}
+---
+Return a JSON array of assessments. Each assessment must have:
+- "exact": the exact text passage being assessed (quoted verbatim from source)
+- "start": character offset where the passage starts
+- "end": character offset where the passage ends
+- "prefix": up to 32 characters of text immediately before the passage
+- "suffix": up to 32 characters of text immediately after the passage
+- "assessment": your assessment following the instructions above
+Return ONLY a valid JSON array, no additional text or explanation.
+Example:
+[
+  {"exact": "the quarterly revenue target", "start": 142, "end": 169, "prefix": "We established ", "suffix": " for Q4 2024.", "assessment": "This target seems ambitious given market conditions. Consider revising based on recent trends."}
+]`;
+    } else {
+      const toneGuidance = tone ? `
+Tone: Use a ${tone} style in your assessments.` : "";
+      const densityGuidance = density ? `
+- Aim for approximately ${density} assessments per 2000 words` : `
+- Aim for 2-6 assessments per 2000 words (focus on key passages)`;
+      prompt = `Identify passages in this text that merit critical assessment or evaluation.
+For each passage, provide analysis of its validity, strength, or implications.${toneGuidance}
+Guidelines:
+- Select passages containing claims, arguments, conclusions, or assertions
+- Assess evidence quality, logical soundness, or practical implications
+- Provide assessments that ADD INSIGHT beyond restating the text
+- Focus on passages where evaluation would help readers form judgments
+- Keep assessments concise yet substantive (1-3 sentences typically)${densityGuidance}
+Text to analyze:
+---
+${content.substring(0, 8e3)}
+---
+Return a JSON array of assessments. Each assessment should have:
+- "exact": the exact text passage being assessed (quoted verbatim from source)
+- "start": character offset where the passage starts
+- "end": character offset where the passage ends
+- "prefix": up to 32 characters of text immediately before the passage
+- "suffix": up to 32 characters of text immediately after the passage
+- "assessment": your analytical assessment (1-3 sentences, evaluate validity/strength/implications)
+Return ONLY a valid JSON array, no additional text or explanation.
+Example format:
+[
+  {"exact": "AI will replace most jobs by 2030", "start": 52, "end": 89, "prefix": "Many experts predict that ", "suffix": ", fundamentally reshaping", "assessment": "This claim lacks nuance and supporting evidence. Employment patterns historically show job transformation rather than wholesale replacement. The timeline appears speculative without specific sector analysis."}
+]`;
+    }
+    return prompt;
+  }
+  /**
+   * Build a prompt for detecting structural tags
+   *
+   * @param content - The full text content to analyze (NOT truncated for structural analysis)
+   * @param category - The specific category to detect
+   * @param schemaName - Human-readable schema name
+   * @param schemaDescription - Schema description
+   * @param schemaDomain - Schema domain
+   * @param categoryDescription - Category description
+   * @param categoryExamples - Example questions/guidance for this category
+   * @returns Formatted prompt string
+   */
+  static buildTagPrompt(content, category, schemaName, schemaDescription, schemaDomain, categoryDescription, categoryExamples) {
+    const prompt = `You are analyzing a text using the ${schemaName} framework.
+Schema: ${schemaDescription}
+Domain: ${schemaDomain}
+Your task: Identify passages that serve the structural role of "${category}".
+Category: ${category}
+Description: ${categoryDescription}
+Key questions:
+${categoryExamples.map((ex) => `- ${ex}`).join("\n")}
+Guidelines:
+- Focus on STRUCTURAL FUNCTION, not semantic content
+- A passage serves the "${category}" role if it performs this function in the document's structure
+- Look for passages that explicitly fulfill this role
+- Passages can be sentences, paragraphs, or sections
+- Aim for precision - only tag passages that clearly serve this structural role
+- Typical documents have 1-5 instances of each category (some may have 0)
+Text to analyze:
+---
+${content}
+---
+Return a JSON array of tags. Each tag should have:
+- "exact": the exact text passage (quoted verbatim from source)
+- "start": character offset where the passage starts
+- "end": character offset where the passage ends
+- "prefix": up to 32 characters of text immediately before the passage
+- "suffix": up to 32 characters of text immediately after the passage
+Return ONLY a valid JSON array, no additional text or explanation.
+Example format:
+[
+  {"exact": "What duty did the defendant owe?", "start": 142, "end": 175, "prefix": "The central question is: ", "suffix": " This question must be"},
+  {"exact": "In tort law, a duty of care is established when...", "start": 412, "end": 520, "prefix": "Legal framework:\\n", "suffix": "\\n\\nApplying this standard"}
+]`;
+    return prompt;
+  }
+};
+// src/motivation-parsers.ts
+import { validateAndCorrectOffsets } from "@semiont/api-client";
+var MotivationParsers = class {
+  /**
+   * Parse and validate AI response for comment detection
+   *
+   * @param response - Raw AI response string (may include markdown code fences)
+   * @param content - Original content to validate offsets against
+   * @returns Array of validated comment matches
+   */
+  static parseComments(response, content) {
+    try {
+      let cleaned = response.trim();
+      if (cleaned.startsWith("```")) {
+        cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+      }
+      const parsed = JSON.parse(cleaned);
+      if (!Array.isArray(parsed)) {
+        console.warn("[MotivationParsers] Comment response is not an array");
+        return [];
+      }
+      const valid = parsed.filter(
+        (c) => c && typeof c.exact === "string" && typeof c.start === "number" && typeof c.end === "number" && typeof c.comment === "string" && c.comment.trim().length > 0
+      );
+      console.log(`[MotivationParsers] Parsed ${valid.length} valid comments from ${parsed.length} total`);
+      const validatedComments = [];
+      for (const comment of valid) {
+        try {
+          const validated = validateAndCorrectOffsets(content, comment.start, comment.end, comment.exact);
+          validatedComments.push({
+            ...comment,
+            start: validated.start,
+            end: validated.end,
+            prefix: validated.prefix,
+            suffix: validated.suffix
+          });
+        } catch (error) {
+          console.warn(`[MotivationParsers] Skipping invalid comment "${comment.exact}":`, error);
+        }
+      }
+      return validatedComments;
+    } catch (error) {
+      console.error("[MotivationParsers] Failed to parse AI comment response:", error);
+      return [];
+    }
+  }
+  /**
+   * Parse and validate AI response for highlight detection
+   *
+   * @param response - Raw AI response string (may include markdown code fences)
+   * @param content - Original content to validate offsets against
+   * @returns Array of validated highlight matches
+   */
+  static parseHighlights(response, content) {
+    try {
+      let cleaned = response.trim();
+      if (cleaned.startsWith("```json") || cleaned.startsWith("```")) {
+        cleaned = cleaned.slice(cleaned.indexOf("\n") + 1);
+        const endIndex = cleaned.lastIndexOf("```");
+        if (endIndex !== -1) {
+          cleaned = cleaned.slice(0, endIndex);
+        }
+      }
+      const parsed = JSON.parse(cleaned);
+      if (!Array.isArray(parsed)) {
+        console.warn("[MotivationParsers] Highlight response was not an array");
+        return [];
+      }
+      const highlights = parsed.filter(
+        (h) => h && typeof h.exact === "string" && typeof h.start === "number" && typeof h.end === "number"
+      );
+      const validatedHighlights = [];
+      for (const highlight of highlights) {
+        try {
+          const validated = validateAndCorrectOffsets(content, highlight.start, highlight.end, highlight.exact);
+          validatedHighlights.push({
+            ...highlight,
+            start: validated.start,
+            end: validated.end,
+            prefix: validated.prefix,
+            suffix: validated.suffix
+          });
+        } catch (error) {
+          console.warn(`[MotivationParsers] Skipping invalid highlight "${highlight.exact}":`, error);
+        }
+      }
+      return validatedHighlights;
+    } catch (error) {
+      console.error("[MotivationParsers] Failed to parse AI highlight response:", error);
+      console.error("Raw response:", response);
+      return [];
+    }
+  }
+  /**
+   * Parse and validate AI response for assessment detection
+   *
+   * @param response - Raw AI response string (may include markdown code fences)
+   * @param content - Original content to validate offsets against
+   * @returns Array of validated assessment matches
+   */
+  static parseAssessments(response, content) {
+    try {
+      let cleaned = response.trim();
+      if (cleaned.startsWith("```json") || cleaned.startsWith("```")) {
+        cleaned = cleaned.slice(cleaned.indexOf("\n") + 1);
+        const endIndex = cleaned.lastIndexOf("```");
+        if (endIndex !== -1) {
+          cleaned = cleaned.slice(0, endIndex);
+        }
+      }
+      const parsed = JSON.parse(cleaned);
+      if (!Array.isArray(parsed)) {
+        console.warn("[MotivationParsers] Assessment response was not an array");
+        return [];
+      }
+      const assessments = parsed.filter(
+        (a) => a && typeof a.exact === "string" && typeof a.start === "number" && typeof a.end === "number" && typeof a.assessment === "string"
+      );
+      const validatedAssessments = [];
+      for (const assessment of assessments) {
+        try {
+          const validated = validateAndCorrectOffsets(content, assessment.start, assessment.end, assessment.exact);
+          validatedAssessments.push({
+            ...assessment,
+            start: validated.start,
+            end: validated.end,
+            prefix: validated.prefix,
+            suffix: validated.suffix
+          });
+        } catch (error) {
+          console.warn(`[MotivationParsers] Skipping invalid assessment "${assessment.exact}":`, error);
+        }
+      }
+      return validatedAssessments;
+    } catch (error) {
+      console.error("[MotivationParsers] Failed to parse AI assessment response:", error);
+      console.error("Raw response:", response);
+      return [];
+    }
+  }
+  /**
+   * Parse and validate AI response for tag detection
+   * Note: Does NOT validate offsets - caller must do that with content
+   *
+   * @param response - Raw AI response string (may include markdown code fences)
+   * @returns Array of tag matches (offsets not yet validated)
+   */
+  static parseTags(response) {
+    try {
+      let cleaned = response.trim();
+      if (cleaned.startsWith("```")) {
+        cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+      }
+      const parsed = JSON.parse(cleaned);
+      if (!Array.isArray(parsed)) {
+        console.warn("[MotivationParsers] Tag response is not an array");
+        return [];
+      }
+      const valid = parsed.filter(
+        (t) => t && typeof t.exact === "string" && typeof t.start === "number" && typeof t.end === "number" && t.exact.trim().length > 0
+      );
+      console.log(`[MotivationParsers] Parsed ${valid.length} valid tags from ${parsed.length} total`);
+      return valid;
+    } catch (error) {
+      console.error("[MotivationParsers] Failed to parse AI tag response:", error);
+      return [];
+    }
+  }
+  /**
+   * Validate tag offsets against content and add category
+   * Helper for tag detection after initial parsing
+   *
+   * @param tags - Parsed tags without validated offsets
+   * @param content - Original content to validate against
+   * @param category - Category to assign to validated tags
+   * @returns Array of validated tag matches
+   */
+  static validateTagOffsets(tags, content, category) {
+    const validatedTags = [];
+    for (const tag of tags) {
+      try {
+        const validated = validateAndCorrectOffsets(content, tag.start, tag.end, tag.exact);
+        validatedTags.push({
+          ...tag,
+          category,
+          start: validated.start,
+          end: validated.end,
+          prefix: validated.prefix,
+          suffix: validated.suffix
+        });
+      } catch (error) {
+        console.warn(`[MotivationParsers] Skipping invalid tag for category "${category}":`, error);
+      }
+    }
+    return validatedTags;
+  }
+};
+export {
+  MotivationParsers,
+  MotivationPrompts,
+  extractEntities,
+  generateReferenceSuggestions,
+  generateResourceFromTopic,
+  generateResourceSummary,
+  generateText,
+  getInferenceClient,
+  getInferenceModel
+};
+//# sourceMappingURL=index.js.map