@semiont/inference 0.2.28-build.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,243 @@
1
+ # @semiont/inference
2
+
3
+ [![npm version](https://img.shields.io/npm/v/@semiont/inference)](https://www.npmjs.com/package/@semiont/inference)
4
+ [![Tests](https://github.com/The-AI-Alliance/semiont/actions/workflows/package-tests.yml/badge.svg)](https://github.com/The-AI-Alliance/semiont/actions/workflows/package-tests.yml?query=branch%3Amain+is%3Asuccess+job%3A%22Test+inference%22)
5
+
6
+ AI inference for entity extraction, text generation, and resource creation.
7
+
8
+ ## Philosophy
9
+
10
+ This package is named `inference` rather than `ai-inference` to align with Semiont's core tenet: humans and AI agents have equal opportunity to work behind similar interfaces. The abstraction remains open for future human-agent parity.
11
+
12
+ ## Installation
13
+
14
+ ```bash
15
+ npm install @semiont/inference
16
+ ```
17
+
18
+ ## Quick Start
19
+
20
+ ```typescript
21
+ import { extractEntities, generateText } from '@semiont/inference';
22
+ import type { EnvironmentConfig } from '@semiont/core';
23
+
24
+ const config: EnvironmentConfig = {
25
+ services: {
26
+ inference: {
27
+ type: 'anthropic',
28
+ model: 'claude-3-5-sonnet-20241022',
29
+ apiKey: '${ANTHROPIC_API_KEY}' // Supports environment variable expansion
30
+ }
31
+ }
32
+ };
33
+
34
+ // Extract entities with character offsets
35
+ const entities = await extractEntities(
36
+ 'Paris is the capital of France.',
37
+ ['Location'],
38
+ config
39
+ );
40
+
41
+ // Generate text
42
+ const text = await generateText(
43
+ 'Explain quantum computing in simple terms',
44
+ config
45
+ );
46
+ ```
47
+
48
+ ## API Reference
49
+
50
+ From [src/index.ts](src/index.ts):
51
+
52
+ ### Entity Extraction
53
+
54
+ **`extractEntities(text, entityTypes, config, includeDescriptiveReferences?)`**
55
+
56
+ Extract entity references from text with precise character offsets.
57
+
58
+ **Parameters:**
59
+ - `text: string` - Text to analyze
60
+ - `entityTypes: string[] | { type: string; examples?: string[] }[]` - Entity types to detect
61
+ - `config: EnvironmentConfig` - Configuration
62
+ - `includeDescriptiveReferences?: boolean` - Include anaphoric/cataphoric references (default: false)
63
+
64
+ **Returns:** `Promise<ExtractedEntity[]>`
65
+
66
+ ```typescript
67
+ interface ExtractedEntity {
68
+ exact: string; // Actual text span from input
69
+ entityType: string; // Detected entity type
70
+ startOffset: number; // Character position where entity starts (0-indexed)
71
+ endOffset: number; // Character position where entity ends
72
+ prefix?: string; // Up to 32 chars before entity (for disambiguation)
73
+ suffix?: string; // Up to 32 chars after entity (for disambiguation)
74
+ }
75
+ ```
76
+
77
+ **Implementation Details:**
78
+
79
+ From [src/entity-extractor.ts:101-102](src/entity-extractor.ts):
80
+ - Uses 4000 max_tokens to handle many entities without truncation
81
+ - Uses temperature 0.3 for consistent extraction
82
+
83
+ From [src/entity-extractor.ts:131-135](src/entity-extractor.ts):
84
+ - Throws error if response is truncated (stop_reason === 'max_tokens')
85
+ - Validates all character offsets after AI response
86
+
87
+ From [src/entity-extractor.ts:147-199](src/entity-extractor.ts):
88
+ - Corrects misaligned offsets using prefix/suffix context matching
89
+ - Filters invalid entities (negative offsets, out-of-bounds, mismatches)
90
+
91
+ **Anaphoric/Cataphoric Reference Support:**
92
+
93
+ From [src/entity-extractor.ts:48-75](src/entity-extractor.ts):
94
+
95
+ When `includeDescriptiveReferences` is true, includes:
96
+ - Direct mentions (names, proper nouns)
97
+ - Definite descriptions: "the Nobel laureate", "the tech giant"
98
+ - Role-based references: "the CEO", "the physicist"
99
+ - Epithets with context: "the Cupertino-based company"
100
+
101
+ Excludes:
102
+ - Simple pronouns: he, she, it, they
103
+ - Generic determiners: this, that, these, those
104
+ - Possessives without substance: his, her, their
105
+
106
+ ### Text Generation
107
+
108
+ **`generateText(prompt, config, maxTokens?, temperature?)`**
109
+
110
+ Simple text generation with configurable parameters.
111
+
112
+ **Parameters:**
113
+ - `prompt: string` - The prompt
114
+ - `config: EnvironmentConfig` - Configuration
115
+ - `maxTokens?: number` - Maximum tokens (default: 500)
116
+ - `temperature?: number` - Sampling temperature (default: 0.7)
117
+
118
+ **Returns:** `Promise<string>`
119
+
120
+ From [src/factory.ts:78-100](src/factory.ts):
121
+ - Uses Anthropic Messages API
122
+ - Extracts text content from first text block in response
123
+ - Throws error if no text content in response
124
+
125
+ **`generateResourceFromTopic(topic, entityTypes, config, options?)`**
126
+
127
+ Generate markdown resource content about a topic.
128
+
129
+ **Parameters:**
130
+ - `topic: string` - Topic to write about
131
+ - `entityTypes: string[]` - Entity types to focus on
132
+ - `config: EnvironmentConfig` - Configuration
133
+ - `userPrompt?: string` - Additional context
134
+ - `locale?: string` - Language locale (e.g., 'es', 'fr')
135
+ - `context?: GenerationContext` - Source document context
136
+ - `temperature?: number` - Sampling temperature (default: 0.7)
137
+ - `maxTokens?: number` - Maximum tokens (default: 500)
138
+
139
+ **Returns:** `Promise<{ title: string; content: string }>`
140
+
141
+ From [src/factory.ts:186-189](src/factory.ts):
142
+ - Returns topic as title (not extracted from generated content)
143
+ - Returns generated markdown as content
144
+
145
+ From [src/factory.ts:136-138](src/factory.ts):
146
+ - Supports non-English languages using locale parameter
147
+ - Converts locale to language name (e.g., 'es' → 'Spanish')
148
+
149
+ From [src/factory.ts:166-182](src/factory.ts):
150
+ - Automatically strips markdown code fences from response if present
151
+ - Handles ```markdown, ```md, and ``` formats
152
+
153
+ **`generateResourceSummary(resourceName, content, entityTypes, config)`**
154
+
155
+ Generate a 2-3 sentence summary of a resource.
156
+
157
+ **Parameters:**
158
+ - `resourceName: string` - Name of the resource
159
+ - `content: string` - Content to summarize (truncated to 2000 chars)
160
+ - `entityTypes: string[]` - Entity types mentioned
161
+ - `config: EnvironmentConfig` - Configuration
162
+
163
+ **Returns:** `Promise<string>`
164
+
165
+ From [src/factory.ts:216-219](src/factory.ts):
166
+ - Truncates content to first 2000 characters to stay within limits
167
+ - Uses temperature 0.7, max_tokens 150
168
+
169
+ **`generateReferenceSuggestions(referenceTitle, config, entityType?, currentContent?)`**
170
+
171
+ Generate 3 actionable next steps or related topics.
172
+
173
+ **Parameters:**
174
+ - `referenceTitle: string` - Title of the reference
175
+ - `config: EnvironmentConfig` - Configuration
176
+ - `entityType?: string` - Optional entity type
177
+ - `currentContent?: string` - Optional current content for context
178
+
179
+ **Returns:** `Promise<string[] | null>`
180
+
181
+ From [src/factory.ts:246-249](src/factory.ts):
182
+ - Returns array of 3 suggestions or null on parse error
183
+ - Uses temperature 0.8 for creative suggestions
184
+
185
+ ### Client Factory
186
+
187
+ **`getInferenceClient(config)`**
188
+
189
+ Get the singleton Anthropic client instance.
190
+
191
+ **Returns:** `Promise<Anthropic>`
192
+
193
+ From [src/factory.ts:10-51](src/factory.ts):
194
+ - Singleton pattern - creates client once, caches for reuse
195
+ - Supports environment variable expansion in API keys (e.g., '${ANTHROPIC_API_KEY}')
196
+ - Configurable baseURL with fallback to https://api.anthropic.com
197
+
198
+ **`getInferenceModel(config)`**
199
+
200
+ Get the configured model name.
201
+
202
+ **Returns:** `string`
203
+
204
+ ## Configuration
205
+
206
+ From [src/factory.ts:22-48](src/factory.ts):
207
+
208
+ ```typescript
209
+ config.services.inference = {
210
+ type: 'anthropic', // Provider type
211
+ model: string, // Model name (e.g., 'claude-3-5-sonnet-20241022')
212
+ apiKey: string, // API key or ${ENV_VAR} pattern
213
+ endpoint?: string, // Custom endpoint (optional)
214
+ baseURL?: string // Fallback endpoint (optional)
215
+ }
216
+ ```
217
+
218
+ ### Environment Variable Expansion
219
+
220
+ From [src/factory.ts:27-36](src/factory.ts):
221
+
222
+ API keys support ${VAR_NAME} syntax:
223
+
224
+ ```typescript
225
+ config.services.inference = {
226
+ apiKey: '${ANTHROPIC_API_KEY}' // Expands to process.env.ANTHROPIC_API_KEY
227
+ }
228
+ ```
229
+
230
+ Pattern: starts with '${' and ends with '}'
231
+ Throws error if environment variable is not set.
232
+
233
+ ## Dependencies
234
+
235
+ From [package.json](package.json):
236
+
237
+ - `@anthropic-ai/sdk` ^0.63.0 - Anthropic API client
238
+ - `@semiont/api-client` * - Types and utilities
239
+ - `@semiont/core` * - Environment configuration
240
+
241
+ ## License
242
+
243
+ Apache-2.0
@@ -0,0 +1,205 @@
1
+ import Anthropic from '@anthropic-ai/sdk';
2
+ import { GenerationContext } from '@semiont/api-client';
3
+ import { EnvironmentConfig } from '@semiont/core';
4
+
5
+ /**
6
+ * Get or create the inference client
7
+ * Following the singleton pattern from graph factory
8
+ */
9
+ declare function getInferenceClient(config: EnvironmentConfig): Promise<Anthropic>;
10
+ /**
11
+ * Get the configured model name
12
+ */
13
+ declare function getInferenceModel(config: EnvironmentConfig): string;
14
+ /**
15
+ * Helper function to make a simple inference call
16
+ */
17
+ declare function generateText(prompt: string, config: EnvironmentConfig, maxTokens?: number, temperature?: number): Promise<string>;
18
+ /**
19
+ * Generate resource content using inference
20
+ */
21
+ declare function generateResourceFromTopic(topic: string, entityTypes: string[], config: EnvironmentConfig, userPrompt?: string, locale?: string, context?: GenerationContext, temperature?: number, maxTokens?: number): Promise<{
22
+ title: string;
23
+ content: string;
24
+ }>;
25
+ /**
26
+ * Generate an intelligent summary for a resource
27
+ */
28
+ declare function generateResourceSummary(resourceName: string, content: string, entityTypes: string[], config: EnvironmentConfig): Promise<string>;
29
+ /**
30
+ * Generate smart suggestions for a reference
31
+ */
32
+ declare function generateReferenceSuggestions(referenceTitle: string, config: EnvironmentConfig, entityType?: string, currentContent?: string): Promise<string[] | null>;
33
+
34
+ /**
35
+ * Entity reference extracted from text
36
+ */
37
+ interface ExtractedEntity {
38
+ exact: string;
39
+ entityType: string;
40
+ startOffset: number;
41
+ endOffset: number;
42
+ prefix?: string;
43
+ suffix?: string;
44
+ }
45
+ /**
46
+ * Extract entity references from text using AI
47
+ *
48
+ * @param text - The text to analyze
49
+ * @param entityTypes - Array of entity types to detect (optionally with examples)
50
+ * @param config - Application configuration
51
+ * @param includeDescriptiveReferences - Include anaphoric/cataphoric references (default: false)
52
+ * @returns Array of extracted entities with their character offsets
53
+ */
54
+ declare function extractEntities(exact: string, entityTypes: string[] | {
55
+ type: string;
56
+ examples?: string[];
57
+ }[], config: EnvironmentConfig, includeDescriptiveReferences?: boolean): Promise<ExtractedEntity[]>;
58
+
59
+ /**
60
+ * Prompt builders for annotation detection motivations
61
+ *
62
+ * Provides static methods to build AI prompts for each Web Annotation motivation type.
63
+ * Extracted from worker implementations to centralize prompt logic.
64
+ */
65
+ declare class MotivationPrompts {
66
+ /**
67
+ * Build a prompt for detecting comment-worthy passages
68
+ *
69
+ * @param content - The text content to analyze (will be truncated to 8000 chars)
70
+ * @param instructions - Optional user-provided instructions
71
+ * @param tone - Optional tone guidance (e.g., "academic", "conversational")
72
+ * @param density - Optional target number of comments per 2000 words
73
+ * @returns Formatted prompt string
74
+ */
75
+ static buildCommentPrompt(content: string, instructions?: string, tone?: string, density?: number): string;
76
+ /**
77
+ * Build a prompt for detecting highlight-worthy passages
78
+ *
79
+ * @param content - The text content to analyze (will be truncated to 8000 chars)
80
+ * @param instructions - Optional user-provided instructions
81
+ * @param density - Optional target number of highlights per 2000 words
82
+ * @returns Formatted prompt string
83
+ */
84
+ static buildHighlightPrompt(content: string, instructions?: string, density?: number): string;
85
+ /**
86
+ * Build a prompt for detecting assessment-worthy passages
87
+ *
88
+ * @param content - The text content to analyze (will be truncated to 8000 chars)
89
+ * @param instructions - Optional user-provided instructions
90
+ * @param tone - Optional tone guidance (e.g., "critical", "supportive")
91
+ * @param density - Optional target number of assessments per 2000 words
92
+ * @returns Formatted prompt string
93
+ */
94
+ static buildAssessmentPrompt(content: string, instructions?: string, tone?: string, density?: number): string;
95
+ /**
96
+ * Build a prompt for detecting structural tags
97
+ *
98
+ * @param content - The full text content to analyze (NOT truncated for structural analysis)
99
+ * @param category - The specific category to detect
100
+ * @param schemaName - Human-readable schema name
101
+ * @param schemaDescription - Schema description
102
+ * @param schemaDomain - Schema domain
103
+ * @param categoryDescription - Category description
104
+ * @param categoryExamples - Example questions/guidance for this category
105
+ * @returns Formatted prompt string
106
+ */
107
+ static buildTagPrompt(content: string, category: string, schemaName: string, schemaDescription: string, schemaDomain: string, categoryDescription: string, categoryExamples: string[]): string;
108
+ }
109
+
110
+ /**
111
+ * Response parsers for annotation detection motivations
112
+ *
113
+ * Provides static methods to parse and validate AI responses for each motivation type.
114
+ * Includes offset validation and correction logic.
115
+ * Extracted from worker implementations to centralize parsing logic.
116
+ */
117
+ /**
118
+ * Represents a detected comment with validated position
119
+ */
120
+ interface CommentMatch {
121
+ exact: string;
122
+ start: number;
123
+ end: number;
124
+ prefix?: string;
125
+ suffix?: string;
126
+ comment: string;
127
+ }
128
+ /**
129
+ * Represents a detected highlight with validated position
130
+ */
131
+ interface HighlightMatch {
132
+ exact: string;
133
+ start: number;
134
+ end: number;
135
+ prefix?: string;
136
+ suffix?: string;
137
+ }
138
+ /**
139
+ * Represents a detected assessment with validated position
140
+ */
141
+ interface AssessmentMatch {
142
+ exact: string;
143
+ start: number;
144
+ end: number;
145
+ prefix?: string;
146
+ suffix?: string;
147
+ assessment: string;
148
+ }
149
+ /**
150
+ * Represents a detected tag with validated position
151
+ */
152
+ interface TagMatch {
153
+ exact: string;
154
+ start: number;
155
+ end: number;
156
+ prefix?: string;
157
+ suffix?: string;
158
+ category: string;
159
+ }
160
+ declare class MotivationParsers {
161
+ /**
162
+ * Parse and validate AI response for comment detection
163
+ *
164
+ * @param response - Raw AI response string (may include markdown code fences)
165
+ * @param content - Original content to validate offsets against
166
+ * @returns Array of validated comment matches
167
+ */
168
+ static parseComments(response: string, content: string): CommentMatch[];
169
+ /**
170
+ * Parse and validate AI response for highlight detection
171
+ *
172
+ * @param response - Raw AI response string (may include markdown code fences)
173
+ * @param content - Original content to validate offsets against
174
+ * @returns Array of validated highlight matches
175
+ */
176
+ static parseHighlights(response: string, content: string): HighlightMatch[];
177
+ /**
178
+ * Parse and validate AI response for assessment detection
179
+ *
180
+ * @param response - Raw AI response string (may include markdown code fences)
181
+ * @param content - Original content to validate offsets against
182
+ * @returns Array of validated assessment matches
183
+ */
184
+ static parseAssessments(response: string, content: string): AssessmentMatch[];
185
+ /**
186
+ * Parse and validate AI response for tag detection
187
+ * Note: Does NOT validate offsets - caller must do that with content
188
+ *
189
+ * @param response - Raw AI response string (may include markdown code fences)
190
+ * @returns Array of tag matches (offsets not yet validated)
191
+ */
192
+ static parseTags(response: string): Omit<TagMatch, 'category'>[];
193
+ /**
194
+ * Validate tag offsets against content and add category
195
+ * Helper for tag detection after initial parsing
196
+ *
197
+ * @param tags - Parsed tags without validated offsets
198
+ * @param content - Original content to validate against
199
+ * @param category - Category to assign to validated tags
200
+ * @returns Array of validated tag matches
201
+ */
202
+ static validateTagOffsets(tags: Omit<TagMatch, 'category'>[], content: string, category: string): TagMatch[];
203
+ }
204
+
205
+ export { type AssessmentMatch, type CommentMatch, type ExtractedEntity, type HighlightMatch, MotivationParsers, MotivationPrompts, type TagMatch, extractEntities, generateReferenceSuggestions, generateResourceFromTopic, generateResourceSummary, generateText, getInferenceClient, getInferenceModel };