@semiont/inference 0.5.6 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -177
- package/package.json +5 -7
package/README.md
CHANGED
|
@@ -6,21 +6,22 @@
|
|
|
6
6
|
[](https://www.npmjs.com/package/@semiont/inference)
|
|
7
7
|
[](https://github.com/The-AI-Alliance/semiont/blob/main/LICENSE)
|
|
8
8
|
|
|
9
|
-
**AI primitives for text generation
|
|
9
|
+
**AI primitives for text generation: a provider-agnostic inference client.**
|
|
10
10
|
|
|
11
11
|
This package provides the **core AI primitives** for the Semiont platform:
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
-
|
|
12
|
+
- The `InferenceClient` interface (provider abstraction)
|
|
13
|
+
- Client implementations for Anthropic and Ollama, plus a scripted mock for tests
|
|
14
|
+
- A `createInferenceClient()` factory that selects the implementation from config
|
|
15
|
+
- Cross-provider JSON output mode (`format: 'json'`)
|
|
16
|
+
- Usage metrics via `@semiont/observability`
|
|
16
17
|
|
|
17
|
-
For **application-specific AI logic** (
|
|
18
|
+
For **application-specific AI logic** (semantic processing, prompt engineering, response parsing), see [@semiont/make-meaning](../make-meaning/).
|
|
18
19
|
|
|
19
20
|
## Architecture Context
|
|
20
21
|
|
|
21
|
-
**Infrastructure Ownership**: In production
|
|
22
|
+
**Infrastructure Ownership**: In production, inference clients are **created by [@semiont/make-meaning](../make-meaning/)'s `startMakeMeaning()`** (one client per knowledge-system actor — Gatherer, Matcher) and by [@semiont/jobs](../jobs/)' worker process (one client per job group). Both build an `InferenceClientConfig` from their own configuration and call `createInferenceClient()`.
|
|
22
23
|
|
|
23
|
-
The API
|
|
24
|
+
The API below can also be used directly for **testing, CLI tools, or standalone scripts**.
|
|
24
25
|
|
|
25
26
|
## Philosophy
|
|
26
27
|
|
|
@@ -37,217 +38,115 @@ npm install @semiont/inference
|
|
|
37
38
|
## Quick Start
|
|
38
39
|
|
|
39
40
|
```typescript
|
|
40
|
-
import {
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
const
|
|
56
|
-
services: {
|
|
57
|
-
inference: {
|
|
58
|
-
type: 'ollama',
|
|
59
|
-
model: 'gemma2:9b',
|
|
60
|
-
endpoint: 'http://localhost:11434'
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
};
|
|
64
|
-
|
|
65
|
-
// Generate text using the primitive
|
|
66
|
-
const text = await generateText(
|
|
41
|
+
import { createInferenceClient } from '@semiont/inference';
|
|
42
|
+
|
|
43
|
+
// Anthropic (apiKey required)
|
|
44
|
+
const claude = createInferenceClient({
|
|
45
|
+
type: 'anthropic',
|
|
46
|
+
model: 'claude-sonnet-4-6',
|
|
47
|
+
apiKey: process.env['ANTHROPIC_API_KEY']!,
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// Ollama (no API key; endpoint defaults to http://localhost:11434)
|
|
51
|
+
const local = createInferenceClient({
|
|
52
|
+
type: 'ollama',
|
|
53
|
+
model: 'gemma2:9b',
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
const text = await claude.generateText(
|
|
67
57
|
'Explain quantum computing in simple terms',
|
|
68
|
-
config,
|
|
69
58
|
500, // maxTokens
|
|
70
59
|
0.7 // temperature
|
|
71
60
|
);
|
|
72
|
-
|
|
73
61
|
console.log(text);
|
|
74
62
|
```
|
|
75
63
|
|
|
76
64
|
## API Reference
|
|
77
65
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
**`generateText(prompt, config, maxTokens?, temperature?): Promise<string>`**
|
|
81
|
-
|
|
82
|
-
Simple text generation primitive.
|
|
83
|
-
|
|
84
|
-
**Parameters:**
|
|
85
|
-
- `prompt: string` - The prompt
|
|
86
|
-
- `config: EnvironmentConfig` - Configuration
|
|
87
|
-
- `maxTokens?: number` - Maximum tokens (default: 500)
|
|
88
|
-
- `temperature?: number` - Sampling temperature (default: 0.7)
|
|
89
|
-
|
|
90
|
-
**Returns:** `Promise<string>` - Generated text
|
|
91
|
-
|
|
92
|
-
**Implementation** ([src/factory.ts](src/factory.ts)):
|
|
93
|
-
- Routes to provider-specific client (Anthropic Messages API or Ollama `/api/generate`)
|
|
94
|
-
- Extracts text content from response
|
|
95
|
-
- Throws error if no text content in response
|
|
96
|
-
|
|
97
|
-
**Example:**
|
|
98
|
-
```typescript
|
|
99
|
-
const result = await generateText(
|
|
100
|
-
'Write a haiku about programming',
|
|
101
|
-
config,
|
|
102
|
-
100,
|
|
103
|
-
0.8
|
|
104
|
-
);
|
|
105
|
-
```
|
|
106
|
-
|
|
107
|
-
**`getInferenceClient(config): Promise<InferenceClient>`**
|
|
108
|
-
|
|
109
|
-
Get an inference client instance based on configuration.
|
|
66
|
+
See [docs/API.md](docs/API.md) for the full reference.
|
|
110
67
|
|
|
111
|
-
|
|
112
|
-
- `config: EnvironmentConfig` - Configuration
|
|
68
|
+
### `createInferenceClient(config, logger?): InferenceClient`
|
|
113
69
|
|
|
114
|
-
|
|
70
|
+
Factory ([src/factory.ts](src/factory.ts)). Selects the implementation from `config.type`:
|
|
115
71
|
|
|
116
|
-
**Implementation** ([src/factory.ts](src/factory.ts)):
|
|
117
|
-
- Creates `AnthropicInferenceClient` or `OllamaInferenceClient` based on `config.services.inference.type`
|
|
118
|
-
- Supports environment variable expansion in API keys (e.g., `'${ANTHROPIC_API_KEY}'`)
|
|
119
|
-
- Ollama defaults to `http://localhost:11434`, no API key required
|
|
120
|
-
|
|
121
|
-
**Example:**
|
|
122
72
|
```typescript
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
'
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
73
|
+
interface InferenceClientConfig {
|
|
74
|
+
type: 'anthropic' | 'ollama';
|
|
75
|
+
model: string; // e.g. 'claude-sonnet-4-6', 'gemma2:9b'
|
|
76
|
+
apiKey?: string; // required for 'anthropic' (throws if missing/empty)
|
|
77
|
+
endpoint?: string; // provider URL; Ollama default: http://localhost:11434
|
|
78
|
+
baseURL?: string; // fallback used when endpoint is not set
|
|
79
|
+
}
|
|
130
80
|
```
|
|
131
81
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
Get the configured model name.
|
|
82
|
+
The optional second argument is a `Logger` from `@semiont/core`.
|
|
135
83
|
|
|
136
|
-
|
|
137
|
-
- `config: EnvironmentConfig` - Configuration
|
|
84
|
+
### `InferenceClient`
|
|
138
85
|
|
|
139
|
-
|
|
86
|
+
The contract every implementation satisfies ([src/interface.ts](src/interface.ts)):
|
|
140
87
|
|
|
141
|
-
**Example:**
|
|
142
88
|
```typescript
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
## Configuration
|
|
89
|
+
interface InferenceClient {
|
|
90
|
+
readonly type: string; // 'anthropic' | 'ollama' | 'mock'
|
|
91
|
+
readonly modelId: string; // configured model name
|
|
148
92
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
```typescript
|
|
152
|
-
// Anthropic
|
|
153
|
-
config.services.inference = {
|
|
154
|
-
type: 'anthropic', // Provider type
|
|
155
|
-
model: string, // Model name (e.g., 'claude-3-5-sonnet-20241022')
|
|
156
|
-
apiKey: string, // API key or ${ENV_VAR} pattern
|
|
157
|
-
endpoint?: string, // Custom endpoint (optional)
|
|
158
|
-
baseURL?: string // Fallback endpoint (optional)
|
|
93
|
+
generateText(prompt, maxTokens, temperature, options?): Promise<string>;
|
|
94
|
+
generateTextWithMetadata(prompt, maxTokens, temperature, options?): Promise<InferenceResponse>;
|
|
159
95
|
}
|
|
160
96
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
model: string, // Model name (e.g., 'gemma2:9b', 'llama3.1:8b', 'mistral')
|
|
165
|
-
endpoint?: string, // Ollama server URL (default: http://localhost:11434)
|
|
97
|
+
interface InferenceResponse {
|
|
98
|
+
text: string;
|
|
99
|
+
stopReason: 'end_turn' | 'max_tokens' | 'stop_sequence' | string;
|
|
166
100
|
}
|
|
167
101
|
```
|
|
168
102
|
|
|
169
|
-
###
|
|
170
|
-
|
|
171
|
-
From [src/factory.ts:27-36](src/factory.ts#L27-L36):
|
|
103
|
+
### JSON output mode
|
|
172
104
|
|
|
173
|
-
|
|
105
|
+
Pass `{ format: 'json' }` as `options` to constrain output to a **parseable top-level JSON array**, regardless of provider:
|
|
174
106
|
|
|
175
107
|
```typescript
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
}
|
|
108
|
+
const json = await client.generateText(prompt, 1000, 0, { format: 'json' });
|
|
109
|
+
const items = JSON.parse(json); // guaranteed to be an array
|
|
179
110
|
```
|
|
180
111
|
|
|
181
|
-
|
|
182
|
-
**
|
|
183
|
-
|
|
184
|
-
## Application-Specific AI Logic
|
|
112
|
+
Each implementation honors the contract with its provider's mechanism:
|
|
113
|
+
- **Ollama**: grammar-constrained sampling — the request's `format` field carries a minimal array schema.
|
|
114
|
+
- **Anthropic**: assistant-turn prefill (`[`); the prefix is re-attached to the returned text so callers always see a complete JSON document.
|
|
185
115
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
**Entity Extraction:**
|
|
189
|
-
```typescript
|
|
190
|
-
import { extractEntities } from '@semiont/make-meaning';
|
|
191
|
-
|
|
192
|
-
const entities = await extractEntities(
|
|
193
|
-
'Marie Curie worked at the University of Paris.',
|
|
194
|
-
['Person', 'Organization'],
|
|
195
|
-
config
|
|
196
|
-
);
|
|
197
|
-
```
|
|
116
|
+
Current callers all expect arrays (entity extraction, motivation detection). If an object-emitting caller appears, the option grows a `root: 'array' | 'object'` field — see the notes in [src/interface.ts](src/interface.ts).
|
|
198
117
|
|
|
199
|
-
|
|
200
|
-
```typescript
|
|
201
|
-
import { generateResourceFromTopic } from '@semiont/make-meaning';
|
|
118
|
+
### `MockInferenceClient`
|
|
202
119
|
|
|
203
|
-
|
|
204
|
-
'Quantum Computing',
|
|
205
|
-
['Technology', 'Physics'],
|
|
206
|
-
config
|
|
207
|
-
);
|
|
208
|
-
```
|
|
120
|
+
A scripted test double ([src/implementations/mock.ts](src/implementations/mock.ts)): construct it with a list of canned responses, then inspect `calls` (recorded prompt/maxTokens/temperature/options per invocation). `reset()` and `setResponses()` helpers included.
|
|
209
121
|
|
|
210
|
-
**Motivation Prompts & Parsers:**
|
|
211
122
|
```typescript
|
|
212
|
-
import {
|
|
213
|
-
|
|
214
|
-
// Build prompt for comment detection
|
|
215
|
-
const prompt = MotivationPrompts.buildCommentPrompt(content, instructions);
|
|
123
|
+
import { MockInferenceClient } from '@semiont/inference';
|
|
216
124
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
// Parse response
|
|
221
|
-
const comments = MotivationParsers.parseComments(response, content);
|
|
125
|
+
const mock = new MockInferenceClient(['first reply', 'second reply']);
|
|
126
|
+
await mock.generateText('hi', 100, 0);
|
|
127
|
+
expect(mock.calls[0].prompt).toBe('hi');
|
|
222
128
|
```
|
|
223
129
|
|
|
224
|
-
|
|
225
|
-
```typescript
|
|
226
|
-
import { AnnotationDetection } from '@semiont/make-meaning';
|
|
130
|
+
## Observability
|
|
227
131
|
|
|
228
|
-
|
|
229
|
-
const highlights = await AnnotationDetection.detectHighlights(resourceId, config);
|
|
230
|
-
```
|
|
132
|
+
Every generation records a usage metric through `@semiont/observability`'s `recordInferenceUsage`: provider, model, duration, outcome (`success`/`error`), and token counts when the provider reports them.
|
|
231
133
|
|
|
232
134
|
## Architecture
|
|
233
135
|
|
|
234
136
|
```
|
|
235
137
|
┌─────────────────────────────────────────────┐
|
|
236
|
-
│
|
|
237
|
-
│ (
|
|
238
|
-
│ -
|
|
239
|
-
│ -
|
|
240
|
-
│ - Motivation prompts (comment/highlight) │
|
|
241
|
-
│ - Response parsers with offset correction │
|
|
242
|
-
│ - Orchestrated detection pipelines │
|
|
138
|
+
│ @semiont/make-meaning @semiont/jobs │
|
|
139
|
+
│ (application logic) (job workers) │
|
|
140
|
+
│ - builds InferenceClientConfig │
|
|
141
|
+
│ - calls createInferenceClient() │
|
|
243
142
|
└──────────────────┬──────────────────────────┘
|
|
244
143
|
│ uses
|
|
245
144
|
┌──────────────────▼──────────────────────────┐
|
|
246
145
|
│ @semiont/inference │
|
|
247
146
|
│ (AI primitives only) │
|
|
248
147
|
│ - InferenceClient interface │
|
|
249
|
-
│ -
|
|
250
|
-
│ -
|
|
148
|
+
│ - createInferenceClient() factory │
|
|
149
|
+
│ - cross-provider JSON output mode │
|
|
251
150
|
└──────────┬───────────────────┬──────────────┘
|
|
252
151
|
│ │
|
|
253
152
|
┌──────────▼──────────┐ ┌─────▼──────────────┐
|
|
@@ -258,15 +157,15 @@ const highlights = await AnnotationDetection.detectHighlights(resourceId, config
|
|
|
258
157
|
```
|
|
259
158
|
|
|
260
159
|
**Key Principles:**
|
|
261
|
-
- **@semiont/inference**:
|
|
262
|
-
- **@semiont/make-meaning**:
|
|
263
|
-
- **Clean separation**:
|
|
160
|
+
- **@semiont/inference**: provider abstraction, text generation, output discipline
|
|
161
|
+
- **@semiont/make-meaning**: semantic processing, prompt engineering, response parsing
|
|
162
|
+
- **Clean separation**: adding a new provider only affects @semiont/inference
|
|
264
163
|
|
|
265
164
|
## Supported Providers
|
|
266
165
|
|
|
267
166
|
| Provider | Type | API Key | Models |
|
|
268
167
|
|----------|------|---------|--------|
|
|
269
|
-
| Anthropic | `anthropic` | Required
|
|
168
|
+
| Anthropic | `anthropic` | Required | Claude family |
|
|
270
169
|
| Ollama | `ollama` | Not required | gemma2:9b, llama3.1:8b, mistral, etc. |
|
|
271
170
|
|
|
272
171
|
### Adding a New Provider
|
|
@@ -280,13 +179,12 @@ const highlights = await AnnotationDetection.detectHighlights(resourceId, config
|
|
|
280
179
|
|
|
281
180
|
From [package.json](package.json):
|
|
282
181
|
|
|
283
|
-
- `@anthropic-ai/sdk`
|
|
284
|
-
- `@semiont/core`
|
|
182
|
+
- `@anthropic-ai/sdk` - Anthropic API client
|
|
183
|
+
- `@semiont/core` - `Logger` type
|
|
184
|
+
- `@semiont/observability` - usage metrics
|
|
285
185
|
|
|
286
186
|
Ollama uses native HTTP (`fetch`) with no SDK dependency.
|
|
287
187
|
|
|
288
|
-
**Note:** No dependency on `@semiont/api-client` - primitives have minimal dependencies
|
|
289
|
-
|
|
290
188
|
## Testing
|
|
291
189
|
|
|
292
190
|
```bash
|
package/package.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@semiont/inference",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.8",
|
|
4
4
|
"engines": {
|
|
5
5
|
"node": ">=24.0.0"
|
|
6
6
|
},
|
|
7
7
|
"type": "module",
|
|
8
|
-
"description": "AI inference
|
|
8
|
+
"description": "AI inference primitives: provider-agnostic text generation clients for Anthropic and Ollama",
|
|
9
9
|
"main": "./dist/index.js",
|
|
10
10
|
"types": "./dist/index.d.ts",
|
|
11
11
|
"exports": {
|
|
@@ -27,10 +27,9 @@
|
|
|
27
27
|
"test:coverage": "vitest run --coverage"
|
|
28
28
|
},
|
|
29
29
|
"dependencies": {
|
|
30
|
-
"@anthropic-ai/sdk": "^0.
|
|
31
|
-
"@semiont/
|
|
32
|
-
"@semiont/
|
|
33
|
-
"@semiont/observability": "*"
|
|
30
|
+
"@anthropic-ai/sdk": "^0.104.1",
|
|
31
|
+
"@semiont/core": "0.5.8",
|
|
32
|
+
"@semiont/observability": "0.5.8"
|
|
34
33
|
},
|
|
35
34
|
"devDependencies": {
|
|
36
35
|
"@vitest/coverage-v8": "^4.1.8",
|
|
@@ -46,7 +45,6 @@
|
|
|
46
45
|
"keywords": [
|
|
47
46
|
"ai",
|
|
48
47
|
"inference",
|
|
49
|
-
"entity-extraction",
|
|
50
48
|
"llm",
|
|
51
49
|
"anthropic",
|
|
52
50
|
"ollama",
|