@semiont/inference 0.5.5 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -181
- package/package.json +10 -8
package/README.md
CHANGED
|
@@ -6,21 +6,22 @@
|
|
|
6
6
|
[](https://www.npmjs.com/package/@semiont/inference)
|
|
7
7
|
[](https://github.com/The-AI-Alliance/semiont/blob/main/LICENSE)
|
|
8
8
|
|
|
9
|
-
**AI primitives for text generation
|
|
9
|
+
**AI primitives for text generation: a provider-agnostic inference client.**
|
|
10
10
|
|
|
11
11
|
This package provides the **core AI primitives** for the Semiont platform:
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
-
|
|
12
|
+
- The `InferenceClient` interface (provider abstraction)
|
|
13
|
+
- Client implementations for Anthropic and Ollama, plus a scripted mock for tests
|
|
14
|
+
- A `createInferenceClient()` factory that selects the implementation from config
|
|
15
|
+
- Cross-provider JSON output mode (`format: 'json'`)
|
|
16
|
+
- Usage metrics via `@semiont/observability`
|
|
16
17
|
|
|
17
|
-
For **application-specific AI logic** (
|
|
18
|
+
For **application-specific AI logic** (semantic processing, prompt engineering, response parsing), see [@semiont/make-meaning](../make-meaning/).
|
|
18
19
|
|
|
19
20
|
## Architecture Context
|
|
20
21
|
|
|
21
|
-
**Infrastructure Ownership**: In production
|
|
22
|
+
**Infrastructure Ownership**: In production, inference clients are **created by [@semiont/make-meaning](../make-meaning/)'s `startMakeMeaning()`** (one client per knowledge-system actor — Gatherer, Matcher) and by [@semiont/jobs](../jobs/)' worker process (one client per job group). Both build an `InferenceClientConfig` from their own configuration and call `createInferenceClient()`.
|
|
22
23
|
|
|
23
|
-
The API
|
|
24
|
+
The API below can also be used directly for **testing, CLI tools, or standalone scripts**.
|
|
24
25
|
|
|
25
26
|
## Philosophy
|
|
26
27
|
|
|
@@ -37,217 +38,115 @@ npm install @semiont/inference
|
|
|
37
38
|
## Quick Start
|
|
38
39
|
|
|
39
40
|
```typescript
|
|
40
|
-
import {
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
const
|
|
56
|
-
services: {
|
|
57
|
-
inference: {
|
|
58
|
-
type: 'ollama',
|
|
59
|
-
model: 'gemma2:9b',
|
|
60
|
-
endpoint: 'http://localhost:11434'
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
};
|
|
64
|
-
|
|
65
|
-
// Generate text using the primitive
|
|
66
|
-
const text = await generateText(
|
|
41
|
+
import { createInferenceClient } from '@semiont/inference';
|
|
42
|
+
|
|
43
|
+
// Anthropic (apiKey required)
|
|
44
|
+
const claude = createInferenceClient({
|
|
45
|
+
type: 'anthropic',
|
|
46
|
+
model: 'claude-sonnet-4-6',
|
|
47
|
+
apiKey: process.env['ANTHROPIC_API_KEY']!,
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// Ollama (no API key; endpoint defaults to http://localhost:11434)
|
|
51
|
+
const local = createInferenceClient({
|
|
52
|
+
type: 'ollama',
|
|
53
|
+
model: 'gemma2:9b',
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
const text = await claude.generateText(
|
|
67
57
|
'Explain quantum computing in simple terms',
|
|
68
|
-
config,
|
|
69
58
|
500, // maxTokens
|
|
70
59
|
0.7 // temperature
|
|
71
60
|
);
|
|
72
|
-
|
|
73
61
|
console.log(text);
|
|
74
62
|
```
|
|
75
63
|
|
|
76
64
|
## API Reference
|
|
77
65
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
**`generateText(prompt, config, maxTokens?, temperature?): Promise<string>`**
|
|
81
|
-
|
|
82
|
-
Simple text generation primitive.
|
|
66
|
+
See [docs/API.md](docs/API.md) for the full reference.
|
|
83
67
|
|
|
84
|
-
|
|
85
|
-
- `prompt: string` - The prompt
|
|
86
|
-
- `config: EnvironmentConfig` - Configuration
|
|
87
|
-
- `maxTokens?: number` - Maximum tokens (default: 500)
|
|
88
|
-
- `temperature?: number` - Sampling temperature (default: 0.7)
|
|
68
|
+
### `createInferenceClient(config, logger?): InferenceClient`
|
|
89
69
|
|
|
90
|
-
|
|
70
|
+
Factory ([src/factory.ts](src/factory.ts)). Selects the implementation from `config.type`:
|
|
91
71
|
|
|
92
|
-
**Implementation** ([src/factory.ts](src/factory.ts)):
|
|
93
|
-
- Routes to provider-specific client (Anthropic Messages API or Ollama `/api/generate`)
|
|
94
|
-
- Extracts text content from response
|
|
95
|
-
- Throws error if no text content in response
|
|
96
|
-
|
|
97
|
-
**Example:**
|
|
98
72
|
```typescript
|
|
99
|
-
|
|
100
|
-
'
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
**`getInferenceClient(config): Promise<InferenceClient>`**
|
|
108
|
-
|
|
109
|
-
Get an inference client instance based on configuration.
|
|
110
|
-
|
|
111
|
-
**Parameters:**
|
|
112
|
-
- `config: EnvironmentConfig` - Configuration
|
|
113
|
-
|
|
114
|
-
**Returns:** `Promise<InferenceClient>` - Provider-specific client implementing the `InferenceClient` interface
|
|
115
|
-
|
|
116
|
-
**Implementation** ([src/factory.ts](src/factory.ts)):
|
|
117
|
-
- Creates `AnthropicInferenceClient` or `OllamaInferenceClient` based on `config.services.inference.type`
|
|
118
|
-
- Supports environment variable expansion in API keys (e.g., `'${ANTHROPIC_API_KEY}'`)
|
|
119
|
-
- Ollama defaults to `http://localhost:11434`, no API key required
|
|
120
|
-
|
|
121
|
-
**Example:**
|
|
122
|
-
```typescript
|
|
123
|
-
const client = await getInferenceClient(config);
|
|
124
|
-
const response = await client.generateTextWithMetadata(
|
|
125
|
-
'Hello',
|
|
126
|
-
100,
|
|
127
|
-
0.7
|
|
128
|
-
);
|
|
129
|
-
console.log(response.text);
|
|
73
|
+
interface InferenceClientConfig {
|
|
74
|
+
type: 'anthropic' | 'ollama';
|
|
75
|
+
model: string; // e.g. 'claude-sonnet-4-6', 'gemma2:9b'
|
|
76
|
+
apiKey?: string; // required for 'anthropic' (throws if missing/empty)
|
|
77
|
+
endpoint?: string; // provider URL; Ollama default: http://localhost:11434
|
|
78
|
+
baseURL?: string; // fallback used when endpoint is not set
|
|
79
|
+
}
|
|
130
80
|
```
|
|
131
81
|
|
|
132
|
-
|
|
82
|
+
The optional second argument is a `Logger` from `@semiont/core`.
|
|
133
83
|
|
|
134
|
-
|
|
84
|
+
### `InferenceClient`
|
|
135
85
|
|
|
136
|
-
|
|
137
|
-
- `config: EnvironmentConfig` - Configuration
|
|
86
|
+
The contract every implementation satisfies ([src/interface.ts](src/interface.ts)):
|
|
138
87
|
|
|
139
|
-
**Returns:** `string` - Model name (e.g., `'claude-3-5-sonnet-20241022'` or `'gemma2:9b'`)
|
|
140
|
-
|
|
141
|
-
**Example:**
|
|
142
88
|
```typescript
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
## Configuration
|
|
148
|
-
|
|
149
|
-
From [src/factory.ts](src/factory.ts):
|
|
89
|
+
interface InferenceClient {
|
|
90
|
+
readonly type: string; // 'anthropic' | 'ollama' | 'mock'
|
|
91
|
+
readonly modelId: string; // configured model name
|
|
150
92
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
config.services.inference = {
|
|
154
|
-
type: 'anthropic', // Provider type
|
|
155
|
-
model: string, // Model name (e.g., 'claude-3-5-sonnet-20241022')
|
|
156
|
-
apiKey: string, // API key or ${ENV_VAR} pattern
|
|
157
|
-
endpoint?: string, // Custom endpoint (optional)
|
|
158
|
-
baseURL?: string // Fallback endpoint (optional)
|
|
93
|
+
generateText(prompt, maxTokens, temperature, options?): Promise<string>;
|
|
94
|
+
generateTextWithMetadata(prompt, maxTokens, temperature, options?): Promise<InferenceResponse>;
|
|
159
95
|
}
|
|
160
96
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
model: string, // Model name (e.g., 'gemma2:9b', 'llama3.1:8b', 'mistral')
|
|
165
|
-
endpoint?: string, // Ollama server URL (default: http://localhost:11434)
|
|
97
|
+
interface InferenceResponse {
|
|
98
|
+
text: string;
|
|
99
|
+
stopReason: 'end_turn' | 'max_tokens' | 'stop_sequence' | string;
|
|
166
100
|
}
|
|
167
101
|
```
|
|
168
102
|
|
|
169
|
-
###
|
|
170
|
-
|
|
171
|
-
From [src/factory.ts:27-36](src/factory.ts#L27-L36):
|
|
103
|
+
### JSON output mode
|
|
172
104
|
|
|
173
|
-
|
|
105
|
+
Pass `{ format: 'json' }` as `options` to constrain output to a **parseable top-level JSON array**, regardless of provider:
|
|
174
106
|
|
|
175
107
|
```typescript
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
}
|
|
108
|
+
const json = await client.generateText(prompt, 1000, 0, { format: 'json' });
|
|
109
|
+
const items = JSON.parse(json); // guaranteed to be an array
|
|
179
110
|
```
|
|
180
111
|
|
|
181
|
-
|
|
182
|
-
**
|
|
112
|
+
Each implementation honors the contract with its provider's mechanism:
|
|
113
|
+
- **Ollama**: grammar-constrained sampling — the request's `format` field carries a minimal array schema.
|
|
114
|
+
- **Anthropic**: assistant-turn prefill (`[`); the prefix is re-attached to the returned text so callers always see a complete JSON document.
|
|
183
115
|
|
|
184
|
-
|
|
116
|
+
Current callers all expect arrays (entity extraction, motivation detection). If an object-emitting caller appears, the option grows a `root: 'array' | 'object'` field — see the notes in [src/interface.ts](src/interface.ts).
|
|
185
117
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
**Entity Extraction:**
|
|
189
|
-
```typescript
|
|
190
|
-
import { extractEntities } from '@semiont/make-meaning';
|
|
118
|
+
### `MockInferenceClient`
|
|
191
119
|
|
|
192
|
-
|
|
193
|
-
'Marie Curie worked at the University of Paris.',
|
|
194
|
-
['Person', 'Organization'],
|
|
195
|
-
config
|
|
196
|
-
);
|
|
197
|
-
```
|
|
120
|
+
A scripted test double ([src/implementations/mock.ts](src/implementations/mock.ts)): construct it with a list of canned responses, then inspect `calls` (recorded prompt/maxTokens/temperature/options per invocation). `reset()` and `setResponses()` helpers included.
|
|
198
121
|
|
|
199
|
-
**Resource Generation:**
|
|
200
122
|
```typescript
|
|
201
|
-
import {
|
|
123
|
+
import { MockInferenceClient } from '@semiont/inference';
|
|
202
124
|
|
|
203
|
-
const
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
config
|
|
207
|
-
);
|
|
125
|
+
const mock = new MockInferenceClient(['first reply', 'second reply']);
|
|
126
|
+
await mock.generateText('hi', 100, 0);
|
|
127
|
+
expect(mock.calls[0].prompt).toBe('hi');
|
|
208
128
|
```
|
|
209
129
|
|
|
210
|
-
|
|
211
|
-
```typescript
|
|
212
|
-
import { MotivationPrompts, MotivationParsers } from '@semiont/make-meaning';
|
|
213
|
-
|
|
214
|
-
// Build prompt for comment detection
|
|
215
|
-
const prompt = MotivationPrompts.buildCommentPrompt(content, instructions);
|
|
130
|
+
## Observability
|
|
216
131
|
|
|
217
|
-
|
|
218
|
-
const response = await generateText(prompt, config);
|
|
219
|
-
|
|
220
|
-
// Parse response
|
|
221
|
-
const comments = MotivationParsers.parseComments(response, content);
|
|
222
|
-
```
|
|
223
|
-
|
|
224
|
-
**Orchestrated Detection:**
|
|
225
|
-
```typescript
|
|
226
|
-
import { AnnotationDetection } from '@semiont/make-meaning';
|
|
227
|
-
|
|
228
|
-
const comments = await AnnotationDetection.detectComments(resourceId, config);
|
|
229
|
-
const highlights = await AnnotationDetection.detectHighlights(resourceId, config);
|
|
230
|
-
```
|
|
132
|
+
Every generation records a usage metric through `@semiont/observability`'s `recordInferenceUsage`: provider, model, duration, outcome (`success`/`error`), and token counts when the provider reports them.
|
|
231
133
|
|
|
232
134
|
## Architecture
|
|
233
135
|
|
|
234
136
|
```
|
|
235
137
|
┌─────────────────────────────────────────────┐
|
|
236
|
-
│
|
|
237
|
-
│ (
|
|
238
|
-
│ -
|
|
239
|
-
│ -
|
|
240
|
-
│ - Motivation prompts (comment/highlight) │
|
|
241
|
-
│ - Response parsers with offset correction │
|
|
242
|
-
│ - Orchestrated detection pipelines │
|
|
138
|
+
│ @semiont/make-meaning @semiont/jobs │
|
|
139
|
+
│ (application logic) (job workers) │
|
|
140
|
+
│ - builds InferenceClientConfig │
|
|
141
|
+
│ - calls createInferenceClient() │
|
|
243
142
|
└──────────────────┬──────────────────────────┘
|
|
244
143
|
│ uses
|
|
245
144
|
┌──────────────────▼──────────────────────────┐
|
|
246
145
|
│ @semiont/inference │
|
|
247
146
|
│ (AI primitives only) │
|
|
248
147
|
│ - InferenceClient interface │
|
|
249
|
-
│ -
|
|
250
|
-
│ -
|
|
148
|
+
│ - createInferenceClient() factory │
|
|
149
|
+
│ - cross-provider JSON output mode │
|
|
251
150
|
└──────────┬───────────────────┬──────────────┘
|
|
252
151
|
│ │
|
|
253
152
|
┌──────────▼──────────┐ ┌─────▼──────────────┐
|
|
@@ -258,15 +157,15 @@ const highlights = await AnnotationDetection.detectHighlights(resourceId, config
|
|
|
258
157
|
```
|
|
259
158
|
|
|
260
159
|
**Key Principles:**
|
|
261
|
-
- **@semiont/inference**:
|
|
262
|
-
- **@semiont/make-meaning**:
|
|
263
|
-
- **Clean separation**:
|
|
160
|
+
- **@semiont/inference**: provider abstraction, text generation, output discipline
|
|
161
|
+
- **@semiont/make-meaning**: semantic processing, prompt engineering, response parsing
|
|
162
|
+
- **Clean separation**: adding a new provider only affects @semiont/inference
|
|
264
163
|
|
|
265
164
|
## Supported Providers
|
|
266
165
|
|
|
267
166
|
| Provider | Type | API Key | Models |
|
|
268
167
|
|----------|------|---------|--------|
|
|
269
|
-
| Anthropic | `anthropic` | Required
|
|
168
|
+
| Anthropic | `anthropic` | Required | Claude family |
|
|
270
169
|
| Ollama | `ollama` | Not required | gemma2:9b, llama3.1:8b, mistral, etc. |
|
|
271
170
|
|
|
272
171
|
### Adding a New Provider
|
|
@@ -280,13 +179,12 @@ const highlights = await AnnotationDetection.detectHighlights(resourceId, config
|
|
|
280
179
|
|
|
281
180
|
From [package.json](package.json):
|
|
282
181
|
|
|
283
|
-
- `@anthropic-ai/sdk`
|
|
284
|
-
- `@semiont/core`
|
|
182
|
+
- `@anthropic-ai/sdk` - Anthropic API client
|
|
183
|
+
- `@semiont/core` - `Logger` type
|
|
184
|
+
- `@semiont/observability` - usage metrics
|
|
285
185
|
|
|
286
186
|
Ollama uses native HTTP (`fetch`) with no SDK dependency.
|
|
287
187
|
|
|
288
|
-
**Note:** No dependency on `@semiont/api-client` - primitives have minimal dependencies
|
|
289
|
-
|
|
290
188
|
## Testing
|
|
291
189
|
|
|
292
190
|
```bash
|
|
@@ -295,10 +193,6 @@ npm run test:watch # Watch mode
|
|
|
295
193
|
npm run test:coverage # Coverage report
|
|
296
194
|
```
|
|
297
195
|
|
|
298
|
-
## Examples
|
|
299
|
-
|
|
300
|
-
See [examples/basic.ts](examples/basic.ts) for usage examples.
|
|
301
|
-
|
|
302
196
|
## License
|
|
303
197
|
|
|
304
198
|
Apache-2.0
|
package/package.json
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@semiont/inference",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.7",
|
|
4
|
+
"engines": {
|
|
5
|
+
"node": ">=24.0.0"
|
|
6
|
+
},
|
|
4
7
|
"type": "module",
|
|
5
|
-
"description": "AI inference
|
|
8
|
+
"description": "AI inference primitives: provider-agnostic text generation clients for Anthropic and Ollama",
|
|
6
9
|
"main": "./dist/index.js",
|
|
7
10
|
"types": "./dist/index.d.ts",
|
|
8
11
|
"exports": {
|
|
@@ -24,17 +27,17 @@
|
|
|
24
27
|
"test:coverage": "vitest run --coverage"
|
|
25
28
|
},
|
|
26
29
|
"dependencies": {
|
|
27
|
-
"@anthropic-ai/sdk": "^0.
|
|
28
|
-
"@semiont/api-client": "*",
|
|
30
|
+
"@anthropic-ai/sdk": "^0.100.1",
|
|
29
31
|
"@semiont/core": "*",
|
|
30
32
|
"@semiont/observability": "*"
|
|
31
33
|
},
|
|
32
34
|
"devDependencies": {
|
|
33
|
-
"@vitest/coverage-v8": "^4.1.
|
|
34
|
-
"rollup": "^4.
|
|
35
|
+
"@vitest/coverage-v8": "^4.1.8",
|
|
36
|
+
"rollup": "^4.61.0",
|
|
35
37
|
"rollup-plugin-dts": "^6.4.1",
|
|
36
38
|
"tsup": "^8.0.1",
|
|
37
|
-
"typescript": "^6.0.2"
|
|
39
|
+
"typescript": "^6.0.2",
|
|
40
|
+
"vitest": "^4.1.8"
|
|
38
41
|
},
|
|
39
42
|
"publishConfig": {
|
|
40
43
|
"access": "public"
|
|
@@ -42,7 +45,6 @@
|
|
|
42
45
|
"keywords": [
|
|
43
46
|
"ai",
|
|
44
47
|
"inference",
|
|
45
|
-
"entity-extraction",
|
|
46
48
|
"llm",
|
|
47
49
|
"anthropic",
|
|
48
50
|
"ollama",
|