@semiont/inference 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ import type { Logger } from '@semiont/core';
2
+ import { InferenceClient } from './interface.js';
3
+ export type InferenceClientType = 'anthropic' | 'ollama';
4
+ export interface InferenceClientConfig {
5
+ type: InferenceClientType;
6
+ apiKey?: string;
7
+ model: string;
8
+ endpoint?: string;
9
+ baseURL?: string;
10
+ }
11
+ export declare function createInferenceClient(config: InferenceClientConfig, logger?: Logger): InferenceClient;
12
+ //# sourceMappingURL=factory.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"factory.d.ts","sourceRoot":"","sources":["../src/factory.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAC5C,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAIjD,MAAM,MAAM,mBAAmB,GAAG,WAAW,GAAG,QAAQ,CAAC;AAEzD,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,mBAAmB,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,qBAAqB,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,eAAe,CAyBrG"}
@@ -0,0 +1,12 @@
1
+ import type { Logger } from '@semiont/core';
2
+ import { InferenceClient, InferenceOptions, InferenceResponse } from '../interface.js';
3
+ export declare class AnthropicInferenceClient implements InferenceClient {
4
+ readonly type: "anthropic";
5
+ readonly modelId: string;
6
+ private client;
7
+ private logger?;
8
+ constructor(apiKey: string, model: string, baseURL?: string, logger?: Logger);
9
+ generateText(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<string>;
10
+ generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<InferenceResponse>;
11
+ }
12
+ //# sourceMappingURL=anthropic.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"anthropic.d.ts","sourceRoot":"","sources":["../../src/implementations/anthropic.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAE5C,OAAO,EAAE,eAAe,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAEvF,qBAAa,wBAAyB,YAAW,eAAe;IAC9D,QAAQ,CAAC,IAAI,EAAG,WAAW,CAAU;IACrC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,OAAO,CAAC,MAAM,CAAY;IAC1B,OAAO,CAAC,MAAM,CAAC,CAAS;gBAEZ,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM;IAStE,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC;IAKjH,wBAAwB,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,iBAAiB,CAAC;CAqG/I"}
@@ -0,0 +1,20 @@
1
+ import { InferenceClient, InferenceOptions, InferenceResponse } from '../interface.js';
2
+ export declare class MockInferenceClient implements InferenceClient {
3
+ readonly type: "mock";
4
+ readonly modelId: "mock-model";
5
+ private responses;
6
+ private responseIndex;
7
+ private stopReasons;
8
+ calls: Array<{
9
+ prompt: string;
10
+ maxTokens: number;
11
+ temperature: number;
12
+ options?: InferenceOptions;
13
+ }>;
14
+ constructor(responses?: string[], stopReasons?: string[]);
15
+ generateText(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<string>;
16
+ generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<InferenceResponse>;
17
+ reset(): void;
18
+ setResponses(responses: string[], stopReasons?: string[]): void;
19
+ }
20
+ //# sourceMappingURL=mock.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mock.d.ts","sourceRoot":"","sources":["../../src/implementations/mock.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAEvF,qBAAa,mBAAoB,YAAW,eAAe;IACzD,QAAQ,CAAC,IAAI,EAAG,MAAM,CAAU;IAChC,QAAQ,CAAC,OAAO,EAAG,YAAY,CAAU;IACzC,OAAO,CAAC,SAAS,CAAgB;IACjC,OAAO,CAAC,aAAa,CAAa;IAClC,OAAO,CAAC,WAAW,CAAgB;IAC5B,KAAK,EAAE,KAAK,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,gBAAgB,CAAA;KAAE,CAAC,CAAM;gBAErG,SAAS,GAAE,MAAM,EAAsB,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE;IAKrE,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC;IAKjH,wBAAwB,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAc9I,KAAK,IAAI,IAAI;IAKb,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI;CAKhE"}
@@ -0,0 +1,12 @@
1
+ import type { Logger } from '@semiont/core';
2
+ import { InferenceClient, InferenceOptions, InferenceResponse } from '../interface.js';
3
+ export declare class OllamaInferenceClient implements InferenceClient {
4
+ readonly type: "ollama";
5
+ readonly modelId: string;
6
+ private baseURL;
7
+ private logger?;
8
+ constructor(model: string, baseURL?: string, logger?: Logger);
9
+ generateText(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<string>;
10
+ generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<InferenceResponse>;
11
+ }
12
+ //# sourceMappingURL=ollama.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ollama.d.ts","sourceRoot":"","sources":["../../src/implementations/ollama.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAE5C,OAAO,EAAE,eAAe,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAYvF,qBAAa,qBAAsB,YAAW,eAAe;IAC3D,QAAQ,CAAC,IAAI,EAAG,QAAQ,CAAU;IAClC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,MAAM,CAAC,CAAS;gBAEZ,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM;IAMtD,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC;IAKjH,wBAAwB,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,iBAAiB,CAAC;CA0G/I"}
package/dist/index.d.ts CHANGED
@@ -1,78 +1,6 @@
1
- import { Logger } from '@semiont/core';
2
-
3
- interface InferenceResponse {
4
- text: string;
5
- stopReason: 'end_turn' | 'max_tokens' | 'stop_sequence' | string;
6
- }
7
- interface InferenceClient {
8
- /** Provider type identifier (e.g. 'anthropic', 'ollama') */
9
- readonly type: string;
10
- /** Model identifier used for generation (e.g. 'claude-opus-4-6', 'llama3') */
11
- readonly modelId: string;
12
- /**
13
- * Generate text from a prompt (simple interface)
14
- * @param prompt - The input prompt
15
- * @param maxTokens - Maximum tokens to generate
16
- * @param temperature - Sampling temperature (0-1)
17
- * @returns Generated text
18
- */
19
- generateText(prompt: string, maxTokens: number, temperature: number): Promise<string>;
20
- /**
21
- * Generate text with detailed response information
22
- * @param prompt - The input prompt
23
- * @param maxTokens - Maximum tokens to generate
24
- * @param temperature - Sampling temperature (0-1)
25
- * @returns Response with text and metadata
26
- */
27
- generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse>;
28
- }
29
-
30
- type InferenceClientType = 'anthropic' | 'ollama';
31
- interface InferenceClientConfig {
32
- type: InferenceClientType;
33
- apiKey?: string;
34
- model: string;
35
- endpoint?: string;
36
- baseURL?: string;
37
- }
38
- declare function createInferenceClient(config: InferenceClientConfig, logger?: Logger): InferenceClient;
39
-
40
- declare class AnthropicInferenceClient implements InferenceClient {
41
- readonly type: "anthropic";
42
- readonly modelId: string;
43
- private client;
44
- private logger?;
45
- constructor(apiKey: string, model: string, baseURL?: string, logger?: Logger);
46
- generateText(prompt: string, maxTokens: number, temperature: number): Promise<string>;
47
- generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse>;
48
- }
49
-
50
- declare class OllamaInferenceClient implements InferenceClient {
51
- readonly type: "ollama";
52
- readonly modelId: string;
53
- private baseURL;
54
- private logger?;
55
- constructor(model: string, baseURL?: string, logger?: Logger);
56
- generateText(prompt: string, maxTokens: number, temperature: number): Promise<string>;
57
- generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse>;
58
- }
59
-
60
- declare class MockInferenceClient implements InferenceClient {
61
- readonly type: "mock";
62
- readonly modelId: "mock-model";
63
- private responses;
64
- private responseIndex;
65
- private stopReasons;
66
- calls: Array<{
67
- prompt: string;
68
- maxTokens: number;
69
- temperature: number;
70
- }>;
71
- constructor(responses?: string[], stopReasons?: string[]);
72
- generateText(prompt: string, maxTokens: number, temperature: number): Promise<string>;
73
- generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse>;
74
- reset(): void;
75
- setResponses(responses: string[], stopReasons?: string[]): void;
76
- }
77
-
78
- export { AnthropicInferenceClient, type InferenceClient, type InferenceClientConfig, type InferenceClientType, type InferenceResponse, MockInferenceClient, OllamaInferenceClient, createInferenceClient };
1
+ export { createInferenceClient, type InferenceClientConfig, type InferenceClientType, } from './factory';
2
+ export { type InferenceClient, type InferenceOptions, type InferenceResponse } from './interface';
3
+ export { AnthropicInferenceClient } from './implementations/anthropic';
4
+ export { OllamaInferenceClient } from './implementations/ollama';
5
+ export { MockInferenceClient } from './implementations/mock';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,qBAAqB,EACrB,KAAK,qBAAqB,EAC1B,KAAK,mBAAmB,GACzB,MAAM,WAAW,CAAC;AAEnB,OAAO,EAAE,KAAK,eAAe,EAAE,KAAK,gBAAgB,EAAE,KAAK,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAClG,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AACvE,OAAO,EAAE,qBAAqB,EAAE,MAAM,0BAA0B,CAAC;AACjE,OAAO,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC"}
package/dist/index.js CHANGED
@@ -14,17 +14,26 @@ var AnthropicInferenceClient = class {
14
14
  this.modelId = model;
15
15
  this.logger = logger;
16
16
  }
17
- async generateText(prompt, maxTokens, temperature) {
18
- const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);
17
+ async generateText(prompt, maxTokens, temperature, options) {
18
+ const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature, options);
19
19
  return response.text;
20
20
  }
21
- async generateTextWithMetadata(prompt, maxTokens, temperature) {
21
+ async generateTextWithMetadata(prompt, maxTokens, temperature, options) {
22
+ const jsonMode = options?.format === "json";
23
+ const prefill = jsonMode ? "[" : void 0;
22
24
  this.logger?.debug("Generating text with inference client", {
23
25
  model: this.modelId,
24
26
  promptLength: prompt.length,
25
27
  maxTokens,
26
- temperature
28
+ temperature,
29
+ format: options?.format
27
30
  });
31
+ const messages = [
32
+ { role: "user", content: prompt }
33
+ ];
34
+ if (prefill) {
35
+ messages.push({ role: "assistant", content: prefill });
36
+ }
28
37
  const start = performance.now();
29
38
  let response;
30
39
  try {
@@ -32,12 +41,7 @@ var AnthropicInferenceClient = class {
32
41
  model: this.modelId,
33
42
  max_tokens: maxTokens,
34
43
  temperature,
35
- messages: [
36
- {
37
- role: "user",
38
- content: prompt
39
- }
40
- ]
44
+ messages
41
45
  });
42
46
  } catch (err) {
43
47
  recordInferenceUsage({
@@ -77,13 +81,14 @@ var AnthropicInferenceClient = class {
77
81
  inputTokens: response.usage?.input_tokens,
78
82
  outputTokens: response.usage?.output_tokens
79
83
  });
84
+ const text = prefill ? prefill + textContent.text : textContent.text;
80
85
  this.logger?.info("Text generation completed", {
81
86
  model: this.modelId,
82
- textLength: textContent.text.length,
87
+ textLength: text.length,
83
88
  stopReason: response.stop_reason
84
89
  });
85
90
  return {
86
- text: textContent.text,
91
+ text,
87
92
  stopReason: response.stop_reason || "unknown"
88
93
  };
89
94
  }
@@ -101,34 +106,39 @@ var OllamaInferenceClient = class {
101
106
  this.modelId = model;
102
107
  this.logger = logger;
103
108
  }
104
- async generateText(prompt, maxTokens, temperature) {
105
- const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);
109
+ async generateText(prompt, maxTokens, temperature, options) {
110
+ const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature, options);
106
111
  return response.text;
107
112
  }
108
- async generateTextWithMetadata(prompt, maxTokens, temperature) {
113
+ async generateTextWithMetadata(prompt, maxTokens, temperature, options) {
109
114
  this.logger?.debug("Generating text with Ollama", {
110
115
  model: this.modelId,
111
116
  promptLength: prompt.length,
112
117
  maxTokens,
113
- temperature
118
+ temperature,
119
+ format: options?.format
114
120
  });
115
121
  const url = `${this.baseURL}/api/generate`;
116
122
  const start = performance.now();
123
+ const body = {
124
+ model: this.modelId,
125
+ prompt,
126
+ stream: false,
127
+ think: false,
128
+ options: {
129
+ num_predict: maxTokens,
130
+ temperature
131
+ }
132
+ };
133
+ if (options?.format === "json") {
134
+ body["format"] = { type: "array", items: {} };
135
+ }
117
136
  let res;
118
137
  try {
119
138
  res = await fetch(url, {
120
139
  method: "POST",
121
140
  headers: { "Content-Type": "application/json" },
122
- body: JSON.stringify({
123
- model: this.modelId,
124
- prompt,
125
- stream: false,
126
- think: false,
127
- options: {
128
- num_predict: maxTokens,
129
- temperature
130
- }
131
- })
141
+ body: JSON.stringify(body)
132
142
  });
133
143
  } catch (err) {
134
144
  recordInferenceUsage2({
@@ -146,13 +156,13 @@ var OllamaInferenceClient = class {
146
156
  durationMs: performance.now() - start,
147
157
  outcome: "error"
148
158
  });
149
- const body = await res.text();
159
+ const body2 = await res.text();
150
160
  this.logger?.error("Ollama API error", {
151
161
  model: this.modelId,
152
162
  status: res.status,
153
- body
163
+ body: body2
154
164
  });
155
- throw new Error(`Ollama API error (${res.status}): ${body}`);
165
+ throw new Error(`Ollama API error (${res.status}): ${body2}`);
156
166
  }
157
167
  const data = await res.json();
158
168
  if (!data.response) {
@@ -236,12 +246,12 @@ var MockInferenceClient = class {
236
246
  this.responses = responses;
237
247
  this.stopReasons = stopReasons || responses.map(() => "end_turn");
238
248
  }
239
- async generateText(prompt, maxTokens, temperature) {
240
- const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);
249
+ async generateText(prompt, maxTokens, temperature, options) {
250
+ const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature, options);
241
251
  return response.text;
242
252
  }
243
- async generateTextWithMetadata(prompt, maxTokens, temperature) {
244
- this.calls.push({ prompt, maxTokens, temperature });
253
+ async generateTextWithMetadata(prompt, maxTokens, temperature, options) {
254
+ this.calls.push({ prompt, maxTokens, temperature, ...options ? { options } : {} });
245
255
  const text = this.responses[this.responseIndex];
246
256
  const stopReason = this.stopReasons[this.responseIndex] || "end_turn";
247
257
  if (this.responseIndex < this.responses.length - 1) {
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/implementations/anthropic.ts","../src/implementations/ollama.ts","../src/factory.ts","../src/implementations/mock.ts"],"sourcesContent":["// Anthropic Claude implementation of InferenceClient interface\n\nimport Anthropic from '@anthropic-ai/sdk';\nimport type { Logger } from '@semiont/core';\nimport { recordInferenceUsage } from '@semiont/observability';\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\nexport class AnthropicInferenceClient implements InferenceClient {\n readonly type = 'anthropic' as const;\n readonly modelId: string;\n private client: Anthropic;\n private logger?: Logger;\n\n constructor(apiKey: string, model: string, baseURL?: string, logger?: Logger) {\n this.client = new Anthropic({\n apiKey,\n baseURL: baseURL || 'https://api.anthropic.com',\n });\n this.modelId = model;\n this.logger = logger;\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.logger?.debug('Generating text with inference client', {\n model: this.modelId,\n promptLength: prompt.length,\n maxTokens,\n temperature\n });\n\n const start = performance.now();\n let response: Awaited<ReturnType<typeof this.client.messages.create>>;\n try {\n response = await this.client.messages.create({\n model: this.modelId,\n max_tokens: maxTokens,\n temperature,\n messages: [\n {\n role: 'user',\n content: prompt\n }\n ]\n });\n } catch (err) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n });\n throw err;\n }\n\n this.logger?.debug('Inference response received', {\n model: this.modelId,\n contentBlocks: response.content.length,\n stopReason: response.stop_reason\n });\n\n const textContent = response.content.find(c => c.type === 'text');\n\n if (!textContent || textContent.type !== 'text') {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n inputTokens: response.usage?.input_tokens,\n outputTokens: response.usage?.output_tokens,\n });\n this.logger?.error('No text content in inference response', {\n model: this.modelId,\n contentTypes: response.content.map(c => c.type)\n });\n throw new Error('No text content in inference response');\n }\n\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'success',\n inputTokens: response.usage?.input_tokens,\n outputTokens: response.usage?.output_tokens,\n });\n\n this.logger?.info('Text generation completed', {\n model: this.modelId,\n textLength: textContent.text.length,\n stopReason: response.stop_reason\n });\n\n return {\n text: textContent.text,\n stopReason: response.stop_reason || 'unknown'\n };\n }\n}\n","// Ollama implementation of InferenceClient interface\n// Uses native Ollama HTTP API (no SDK dependency)\n\nimport type { Logger } from '@semiont/core';\nimport { recordInferenceUsage } from '@semiont/observability';\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\ninterface OllamaGenerateResponse {\n response: string;\n done: boolean;\n done_reason?: string;\n /** Number of prompt tokens evaluated. Available on most Ollama versions. */\n prompt_eval_count?: number;\n /** Number of tokens generated. */\n eval_count?: number;\n}\n\nexport class OllamaInferenceClient implements InferenceClient {\n readonly type = 'ollama' as const;\n readonly modelId: string;\n private baseURL: string;\n private logger?: Logger;\n\n constructor(model: string, baseURL?: string, logger?: Logger) {\n this.baseURL = (baseURL || 'http://localhost:11434').replace(/\\/+$/, '');\n this.modelId = model;\n this.logger = logger;\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.logger?.debug('Generating text with Ollama', {\n model: this.modelId,\n promptLength: prompt.length,\n maxTokens,\n temperature\n });\n\n const url = `${this.baseURL}/api/generate`;\n const start = performance.now();\n\n let res: Response;\n try {\n res = await fetch(url, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.modelId,\n prompt,\n stream: false,\n think: false,\n options: {\n num_predict: maxTokens,\n temperature,\n },\n }),\n });\n } catch (err) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n });\n throw err;\n }\n\n if (!res.ok) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n });\n const body = await res.text();\n this.logger?.error('Ollama API error', {\n model: this.modelId,\n status: res.status,\n body,\n });\n throw new Error(`Ollama API error (${res.status}): ${body}`);\n }\n\n const data = await res.json() as OllamaGenerateResponse;\n\n if (!data.response) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n inputTokens: data.prompt_eval_count,\n outputTokens: data.eval_count,\n });\n this.logger?.error('Empty response from Ollama', { model: this.modelId });\n throw new Error('Empty response from Ollama');\n }\n\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'success',\n inputTokens: data.prompt_eval_count,\n outputTokens: data.eval_count,\n });\n\n const stopReason = mapStopReason(data.done_reason);\n\n this.logger?.info('Text generation completed', {\n model: this.modelId,\n textLength: data.response.length,\n stopReason,\n });\n\n return {\n text: data.response,\n stopReason,\n };\n }\n}\n\nfunction mapStopReason(doneReason: string | undefined): string {\n switch (doneReason) {\n case 'stop': return 'end_turn';\n case 'length': return 'max_tokens';\n default: return doneReason || 'unknown';\n }\n}\n","// Factory for creating inference client instances based on configuration\n\nimport type { Logger } from '@semiont/core';\nimport { InferenceClient } from './interface.js';\nimport { AnthropicInferenceClient } from './implementations/anthropic.js';\nimport { OllamaInferenceClient } from './implementations/ollama.js';\n\nexport type InferenceClientType = 'anthropic' | 'ollama';\n\nexport interface InferenceClientConfig {\n type: InferenceClientType;\n apiKey?: string;\n model: string;\n endpoint?: string;\n baseURL?: string;\n}\n\nexport function createInferenceClient(config: InferenceClientConfig, logger?: Logger): InferenceClient {\n switch (config.type) {\n case 'anthropic': {\n if (!config.apiKey || config.apiKey.trim() === '') {\n throw new Error('apiKey is required for Anthropic inference client');\n }\n return new AnthropicInferenceClient(\n config.apiKey,\n config.model,\n config.endpoint || config.baseURL,\n logger\n );\n }\n\n case 'ollama': {\n return new OllamaInferenceClient(\n config.model,\n config.endpoint || config.baseURL,\n logger\n );\n }\n\n default:\n throw new Error(`Unsupported inference client type: ${config.type}`);\n }\n}\n","// Mock implementation of InferenceClient for testing\n\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\nexport class MockInferenceClient implements InferenceClient {\n readonly type = 'mock' as const;\n readonly modelId = 'mock-model' as const;\n private responses: string[] = [];\n private responseIndex: number = 0;\n private stopReasons: string[] = [];\n public calls: Array<{ prompt: string; maxTokens: number; temperature: number }> = [];\n\n constructor(responses: string[] = ['Mock response'], stopReasons?: string[]) {\n this.responses = responses;\n this.stopReasons = stopReasons || responses.map(() => 'end_turn');\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.calls.push({ prompt, maxTokens, temperature });\n\n const text = this.responses[this.responseIndex];\n const stopReason = this.stopReasons[this.responseIndex] || 'end_turn';\n\n if (this.responseIndex < this.responses.length - 1) {\n this.responseIndex++;\n }\n\n return { text, stopReason };\n }\n\n // Test helper methods\n reset(): void {\n this.calls = [];\n this.responseIndex = 0;\n }\n\n setResponses(responses: string[], stopReasons?: string[]): void {\n this.responses = responses;\n this.stopReasons = stopReasons || responses.map(() => 'end_turn');\n this.responseIndex = 0;\n }\n}\n"],"mappings":";AAEA,OAAO,eAAe;AAEtB,SAAS,4BAA4B;AAG9B,IAAM,2BAAN,MAA0D;AAAA,EACtD,OAAO;AAAA,EACP;AAAA,EACD;AAAA,EACA;AAAA,EAER,YAAY,QAAgB,OAAe,SAAkB,QAAiB;AAC5E,SAAK,SAAS,IAAI,UAAU;AAAA,MAC1B;AAAA,MACA,SAAS,WAAW;AAAA,IACtB,CAAC;AACD,SAAK,UAAU;AACf,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,QAAQ,MAAM,yCAAyC;AAAA,MAC1D,OAAO,KAAK;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,QAAQ,YAAY,IAAI;AAC9B,QAAI;AACJ,QAAI;AACF,iBAAW,MAAM,KAAK,OAAO,SAAS,OAAO;AAAA,QAC3C,OAAO,KAAK;AAAA,QACZ,YAAY;AAAA,QACZ;AAAA,QACA,UAAU;AAAA,UACR;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,UACX;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,2BAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAEA,SAAK,QAAQ,MAAM,+BAA+B;AAAA,MAChD,OAAO,KAAK;AAAA,MACZ,eAAe,SAAS,QAAQ;AAAA,MAChC,YAAY,SAAS;AAAA,IACvB,CAAC;AAED,UAAM,cAAc,SAAS,QAAQ,KAAK,OAAK,EAAE,SAAS,MAAM;AAEhE,QAAI,CAAC,eAAe,YAAY,SAAS,QAAQ;AAC/C,2BAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,QACT,aAAa,SAAS,OAAO;AAAA,QAC7B,cAAc,SAAS,OAAO;AAAA,MAChC,CAAC;AACD,WAAK,QAAQ,MAAM,yCAAyC;AAAA,QAC1D,OAAO,KAAK;AAAA,QACZ,cAAc,SAAS,QAAQ,IAAI,OAAK,EAAE,IAAI;AAAA,MAChD,CAAC;AACD,YAAM,IAAI,MAAM,uCAAuC;AAAA,IACzD;AAEA,yBAAqB;AAAA,MACnB,UAAU,KAAK;AAAA,MACf,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,IAAI,IAAI;AAAA,MAChC,SAAS;AAAA,MACT,aAAa,SAAS,OAAO;AAAA,MAC7B,cAAc,SAAS,OAAO;AAAA,IAChC,CAAC;AAED,SAAK,QAAQ,KAAK,6BAA6B;AAAA,MAC7C,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,KAAK;AAAA,MAC7B,YAAY,SAAS;AAAA,IACvB,CAAC;AAED,WAAO;AAAA,MACL,MAAM,YAAY;AAAA,MAClB,YAAY,SAAS,eAAe;AAAA,IACtC;AAAA,EACF;AACF;;;ACnGA,SAAS,wBAAAA,6BAA4B;AAa9B,IAAM,wBAAN,MAAuD;AAAA,EACnD,OAAO;AAAA,EACP;AAAA,EACD;AAAA,EACA;AAAA,EAER,YAAY,OAAe,SAAkB,QAAiB;AAC5D,SAAK,WAAW,WAAW,0BAA0B,QAAQ,QAAQ,EAAE;AACvE,SAAK,UAAU;AACf,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,QAAQ,MAAM,+BAA+B;AAAA,MAChD,OAAO,KAAK;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,MAAM,GAAG,KAAK,OAAO;AAC3B,UAAM,QAAQ,YAAY,IAAI;AAE9B,QAAI;AACJ,QAAI;AACF,YAAM,MAAM,MAAM,KAAK;AAAA,QACrB,QAAQ;AAAA,QACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,QAC9C,MAAM,KAAK,UAAU;AAAA,UACnB,OAAO,KAAK;AAAA,UACZ;AAAA,UACA,QAAQ;AAAA,UACR,OAAO;AAAA,UACP,SAAS;AAAA,YACP,aAAa;AAAA,YACb;AAAA,UACF;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,MAAAA,sBAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAEA,QAAI,CAAC,IAAI,IAAI;AACX,MAAAA,sBAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,MACX,CAAC;AACD,YAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,WAAK,QAAQ,MAAM,oBAAoB;AAAA,QACrC,OAAO,KAAK;AAAA,QACZ,QAAQ,IAAI;AAAA,QACZ;AAAA,MACF,CAAC;AACD,YAAM,IAAI,MAAM,qBAAqB,IAAI,MAAM,MAAM,IAAI,EAAE;AAAA,IAC7D;AAEA,UAAM,OAAO,MAAM,IAAI,KAAK;AAE5B,QAAI,CAAC,KAAK,UAAU;AAClB,MAAAA,sBAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,QACT,aAAa,KAAK;AAAA,QAClB,cAAc,KAAK;AAAA,MACrB,CAAC;AACD,WAAK,QAAQ,MAAM,8BAA8B,EAAE,OAAO,KAAK,QAAQ,CAAC;AACxE,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAEA,IAAAA,sBAAqB;AAAA,MACnB,UAAU,KAAK;AAAA,MACf,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,IAAI,IAAI;AAAA,MAChC,SAAS;AAAA,MACT,aAAa,KAAK;AAAA,MAClB,cAAc,KAAK;AAAA,IACrB,CAAC;AAED,UAAM,aAAa,cAAc,KAAK,WAAW;AAEjD,SAAK,QAAQ,KAAK,6BAA6B;AAAA,MAC7C,OAAO,KAAK;AAAA,MACZ,YAAY,KAAK,SAAS;AAAA,MAC1B;AAAA,IACF,CAAC;AAED,WAAO;AAAA,MACL,MAAM,KAAK;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,cAAc,YAAwC;AAC7D,UAAQ,YAAY;AAAA,IAClB,KAAK;AAAQ,aAAO;AAAA,IACpB,KAAK;AAAU,aAAO;AAAA,IACtB;AAAS,aAAO,cAAc;AAAA,EAChC;AACF;;;ACnHO,SAAS,sBAAsB,QAA+B,QAAkC;AACrG,UAAQ,OAAO,MAAM;AAAA,IACnB,KAAK,aAAa;AAChB,UAAI,CAAC,OAAO,UAAU,OAAO,OAAO,KAAK,MAAM,IAAI;AACjD,cAAM,IAAI,MAAM,mDAAmD;AAAA,MACrE;AACA,aAAO,IAAI;AAAA,QACT,OAAO;AAAA,QACP,OAAO;AAAA,QACP,OAAO,YAAY,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,IAEA,KAAK,UAAU;AACb,aAAO,IAAI;AAAA,QACT,OAAO;AAAA,QACP,OAAO,YAAY,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,IAEA;AACE,YAAM,IAAI,MAAM,sCAAsC,OAAO,IAAI,EAAE;AAAA,EACvE;AACF;;;ACtCO,IAAM,sBAAN,MAAqD;AAAA,EACjD,OAAO;AAAA,EACP,UAAU;AAAA,EACX,YAAsB,CAAC;AAAA,EACvB,gBAAwB;AAAA,EACxB,cAAwB,CAAC;AAAA,EAC1B,QAA2E,CAAC;AAAA,EAEnF,YAAY,YAAsB,CAAC,eAAe,GAAG,aAAwB;AAC3E,SAAK,YAAY;AACjB,SAAK,cAAc,eAAe,UAAU,IAAI,MAAM,UAAU;AAAA,EAClE;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,MAAM,KAAK,EAAE,QAAQ,WAAW,YAAY,CAAC;AAElD,UAAM,OAAO,KAAK,UAAU,KAAK,aAAa;AAC9C,UAAM,aAAa,KAAK,YAAY,KAAK,aAAa,KAAK;AAE3D,QAAI,KAAK,gBAAgB,KAAK,UAAU,SAAS,GAAG;AAClD,WAAK;AAAA,IACP;AAEA,WAAO,EAAE,MAAM,WAAW;AAAA,EAC5B;AAAA;AAAA,EAGA,QAAc;AACZ,SAAK,QAAQ,CAAC;AACd,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,aAAa,WAAqB,aAA8B;AAC9D,SAAK,YAAY;AACjB,SAAK,cAAc,eAAe,UAAU,IAAI,MAAM,UAAU;AAChE,SAAK,gBAAgB;AAAA,EACvB;AACF;","names":["recordInferenceUsage"]}
1
+ {"version":3,"sources":["../src/implementations/anthropic.ts","../src/implementations/ollama.ts","../src/factory.ts","../src/implementations/mock.ts"],"sourcesContent":["// Anthropic Claude implementation of InferenceClient interface\n\nimport Anthropic from '@anthropic-ai/sdk';\nimport type { Logger } from '@semiont/core';\nimport { recordInferenceUsage } from '@semiont/observability';\nimport { InferenceClient, InferenceOptions, InferenceResponse } from '../interface.js';\n\nexport class AnthropicInferenceClient implements InferenceClient {\n readonly type = 'anthropic' as const;\n readonly modelId: string;\n private client: Anthropic;\n private logger?: Logger;\n\n constructor(apiKey: string, model: string, baseURL?: string, logger?: Logger) {\n this.client = new Anthropic({\n apiKey,\n baseURL: baseURL || 'https://api.anthropic.com',\n });\n this.modelId = model;\n this.logger = logger;\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature, options);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<InferenceResponse> {\n // Anthropic has no grammar-constrained sampling layer like Ollama's\n // `format: \"json\"`. The closest equivalent is *prefill*: add an\n // assistant turn whose content is the opening bracket of the\n // expected structure. Claude continues from there, syntactically\n // committed to producing valid JSON. The prefill characters don't\n // appear in the response, so we prepend them ourselves on return.\n //\n // We assume `[` (array) — every current caller of the inference\n // layer that asks for JSON expects an array (entity extraction,\n // motivation detection). If an object-emitting caller appears, the\n // option needs to grow a `root: 'array' | 'object'` field; for now\n // a single shape keeps the contract small.\n const jsonMode = options?.format === 'json';\n const prefill = jsonMode ? '[' : undefined;\n\n this.logger?.debug('Generating text with inference client', {\n model: this.modelId,\n promptLength: prompt.length,\n maxTokens,\n temperature,\n format: options?.format,\n });\n\n const messages: Array<{ role: 'user' | 'assistant'; content: string }> = [\n { role: 'user', content: prompt },\n ];\n if (prefill) {\n messages.push({ role: 'assistant', content: prefill });\n }\n\n const start = performance.now();\n let response: Awaited<ReturnType<typeof this.client.messages.create>>;\n try {\n response = await this.client.messages.create({\n model: this.modelId,\n max_tokens: maxTokens,\n temperature,\n messages,\n });\n } catch (err) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n });\n throw err;\n }\n\n this.logger?.debug('Inference response received', {\n model: this.modelId,\n contentBlocks: response.content.length,\n stopReason: response.stop_reason\n });\n\n const textContent = response.content.find(c => c.type === 'text');\n\n if (!textContent || textContent.type !== 'text') {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n inputTokens: response.usage?.input_tokens,\n outputTokens: response.usage?.output_tokens,\n });\n this.logger?.error('No text content in inference response', {\n model: this.modelId,\n contentTypes: response.content.map(c => c.type)\n });\n throw new Error('No text content in inference response');\n }\n\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'success',\n inputTokens: response.usage?.input_tokens,\n outputTokens: response.usage?.output_tokens,\n });\n\n // Re-attach the prefill prefix so the returned text is what the caller\n // *would have seen* if Anthropic had a native JSON mode — a complete,\n // parseable JSON document. Without this, the consumer would get the\n // tail (`{...}, {...}]`) and parse-fail trying to read it as a top-\n // level array.\n const text = prefill ? prefill + textContent.text : textContent.text;\n\n this.logger?.info('Text generation completed', {\n model: this.modelId,\n textLength: text.length,\n stopReason: response.stop_reason\n });\n\n return {\n text,\n stopReason: response.stop_reason || 'unknown'\n };\n }\n}\n","// Ollama implementation of InferenceClient interface\n// Uses native Ollama HTTP API (no SDK dependency)\n\nimport type { Logger } from '@semiont/core';\nimport { recordInferenceUsage } from '@semiont/observability';\nimport { InferenceClient, InferenceOptions, InferenceResponse } from '../interface.js';\n\ninterface OllamaGenerateResponse {\n response: string;\n done: boolean;\n done_reason?: string;\n /** Number of prompt tokens evaluated. Available on most Ollama versions. */\n prompt_eval_count?: number;\n /** Number of tokens generated. */\n eval_count?: number;\n}\n\nexport class OllamaInferenceClient implements InferenceClient {\n readonly type = 'ollama' as const;\n readonly modelId: string;\n private baseURL: string;\n private logger?: Logger;\n\n constructor(model: string, baseURL?: string, logger?: Logger) {\n this.baseURL = (baseURL || 'http://localhost:11434').replace(/\\/+$/, '');\n this.modelId = model;\n this.logger = logger;\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature, options);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<InferenceResponse> {\n this.logger?.debug('Generating text with Ollama', {\n model: this.modelId,\n promptLength: prompt.length,\n maxTokens,\n temperature,\n format: options?.format,\n });\n\n const url = `${this.baseURL}/api/generate`;\n const start = performance.now();\n\n // Ollama's `format` parameter accepts either the literal string\n // `\"json\"` (any valid JSON, including objects, numbers, etc.) or a\n // JSON schema (constrains the top-level shape). The contract on the\n // inference side is \"parseable JSON array,\" so we pass a minimal\n // array schema rather than the bare `\"json\"` string — without it,\n // the model can satisfy \"valid JSON\" by emitting `{\"entities\": [...]}`\n // and break every consumer that expects to call `.map` on the\n // top-level value. The schema's `items: {}` keeps element shape\n // unconstrained — the prompt still carries the per-element schema;\n // we only enforce the outer array.\n const body: Record<string, unknown> = {\n model: this.modelId,\n prompt,\n stream: false,\n think: false,\n options: {\n num_predict: maxTokens,\n temperature,\n },\n };\n if (options?.format === 'json') {\n body['format'] = { type: 'array', items: {} };\n }\n\n let res: Response;\n try {\n res = await fetch(url, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n });\n } catch (err) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n });\n throw err;\n }\n\n if (!res.ok) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n });\n const body = await res.text();\n this.logger?.error('Ollama API error', {\n model: this.modelId,\n status: res.status,\n body,\n });\n throw new Error(`Ollama API error (${res.status}): ${body}`);\n }\n\n const data = await res.json() as OllamaGenerateResponse;\n\n if (!data.response) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n inputTokens: data.prompt_eval_count,\n outputTokens: data.eval_count,\n });\n this.logger?.error('Empty response from Ollama', { model: this.modelId });\n throw new Error('Empty response from Ollama');\n }\n\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'success',\n inputTokens: data.prompt_eval_count,\n outputTokens: data.eval_count,\n });\n\n const stopReason = mapStopReason(data.done_reason);\n\n this.logger?.info('Text generation completed', {\n model: this.modelId,\n textLength: data.response.length,\n stopReason,\n });\n\n return {\n text: data.response,\n stopReason,\n };\n }\n}\n\nfunction mapStopReason(doneReason: string | undefined): string {\n switch (doneReason) {\n case 'stop': return 'end_turn';\n case 'length': return 'max_tokens';\n default: return doneReason || 'unknown';\n }\n}\n","// Factory for creating inference client instances based on configuration\n\nimport type { Logger } from '@semiont/core';\nimport { InferenceClient } from './interface.js';\nimport { AnthropicInferenceClient } from './implementations/anthropic.js';\nimport { OllamaInferenceClient } from './implementations/ollama.js';\n\nexport type InferenceClientType = 'anthropic' | 'ollama';\n\nexport interface InferenceClientConfig {\n type: InferenceClientType;\n apiKey?: string;\n model: string;\n endpoint?: string;\n baseURL?: string;\n}\n\nexport function createInferenceClient(config: InferenceClientConfig, logger?: Logger): InferenceClient {\n switch (config.type) {\n case 'anthropic': {\n if (!config.apiKey || config.apiKey.trim() === '') {\n throw new Error('apiKey is required for Anthropic inference client');\n }\n return new AnthropicInferenceClient(\n config.apiKey,\n config.model,\n config.endpoint || config.baseURL,\n logger\n );\n }\n\n case 'ollama': {\n return new OllamaInferenceClient(\n config.model,\n config.endpoint || config.baseURL,\n logger\n );\n }\n\n default:\n throw new Error(`Unsupported inference client type: ${config.type}`);\n }\n}\n","// Mock implementation of InferenceClient for testing\n\nimport { InferenceClient, InferenceOptions, InferenceResponse } from '../interface.js';\n\nexport class MockInferenceClient implements InferenceClient {\n readonly type = 'mock' as const;\n readonly modelId = 'mock-model' as const;\n private responses: string[] = [];\n private responseIndex: number = 0;\n private stopReasons: string[] = [];\n public calls: Array<{ prompt: string; maxTokens: number; temperature: number; options?: InferenceOptions }> = [];\n\n constructor(responses: string[] = ['Mock response'], stopReasons?: string[]) {\n this.responses = responses;\n this.stopReasons = stopReasons || responses.map(() => 'end_turn');\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature, options);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<InferenceResponse> {\n this.calls.push({ prompt, maxTokens, temperature, ...(options ? { options } : {}) });\n\n const text = this.responses[this.responseIndex];\n const stopReason = this.stopReasons[this.responseIndex] || 'end_turn';\n\n if (this.responseIndex < this.responses.length - 1) {\n this.responseIndex++;\n }\n\n return { text, stopReason };\n }\n\n // Test helper methods\n reset(): void {\n this.calls = [];\n this.responseIndex = 0;\n }\n\n setResponses(responses: string[], stopReasons?: string[]): void {\n this.responses = responses;\n this.stopReasons = stopReasons || responses.map(() => 'end_turn');\n this.responseIndex = 0;\n }\n}\n"],"mappings":";AAEA,OAAO,eAAe;AAEtB,SAAS,4BAA4B;AAG9B,IAAM,2BAAN,MAA0D;AAAA,EACtD,OAAO;AAAA,EACP;AAAA,EACD;AAAA,EACA;AAAA,EAER,YAAY,QAAgB,OAAe,SAAkB,QAAiB;AAC5E,SAAK,SAAS,IAAI,UAAU;AAAA,MAC1B;AAAA,MACA,SAAS,WAAW;AAAA,IACtB,CAAC;AACD,SAAK,UAAU;AACf,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAqB,SAA6C;AACtH,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,aAAa,OAAO;AAC5F,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAqB,SAAwD;AAa7I,UAAM,WAAW,SAAS,WAAW;AACrC,UAAM,UAAU,WAAW,MAAM;AAEjC,SAAK,QAAQ,MAAM,yCAAyC;AAAA,MAC1D,OAAO,KAAK;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB;AAAA,MACA;AAAA,MACA,QAAQ,SAAS;AAAA,IACnB,CAAC;AAED,UAAM,WAAmE;AAAA,MACvE,EAAE,MAAM,QAAQ,SAAS,OAAO;AAAA,IAClC;AACA,QAAI,SAAS;AACX,eAAS,KAAK,EAAE,MAAM,aAAa,SAAS,QAAQ,CAAC;AAAA,IACvD;AAEA,UAAM,QAAQ,YAAY,IAAI;AAC9B,QAAI;AACJ,QAAI;AACF,iBAAW,MAAM,KAAK,OAAO,SAAS,OAAO;AAAA,QAC3C,OAAO,KAAK;AAAA,QACZ,YAAY;AAAA,QACZ;AAAA,QACA;AAAA,MACF,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,2BAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAEA,SAAK,QAAQ,MAAM,+BAA+B;AAAA,MAChD,OAAO,KAAK;AAAA,MACZ,eAAe,SAAS,QAAQ;AAAA,MAChC,YAAY,SAAS;AAAA,IACvB,CAAC;AAED,UAAM,cAAc,SAAS,QAAQ,KAAK,OAAK,EAAE,SAAS,MAAM;AAEhE,QAAI,CAAC,eAAe,YAAY,SAAS,QAAQ;AAC/C,2BAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,QACT,aAAa,SAAS,OAAO;AAAA,QAC7B,cAAc,SAAS,OAAO;AAAA,MAChC,CAAC;AACD,WAAK,QAAQ,MAAM,yCAAyC;AAAA,QAC1D,OAAO,KAAK;AAAA,QACZ,cAAc,SAAS,QAAQ,IAAI,OAAK,EAAE,IAAI;AAAA,MAChD,CAAC;AACD,YAAM,IAAI,MAAM,uCAAuC;AAAA,IACzD;AAEA,yBAAqB;AAAA,MACnB,UAAU,KAAK;AAAA,MACf,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,IAAI,IAAI;AAAA,MAChC,SAAS;AAAA,MACT,aAAa,SAAS,OAAO;AAAA,MAC7B,cAAc,SAAS,OAAO;AAAA,IAChC,CAAC;AAOD,UAAM,OAAO,UAAU,UAAU,YAAY,OAAO,YAAY;AAEhE,SAAK,QAAQ,KAAK,6BAA6B;AAAA,MAC7C,OAAO,KAAK;AAAA,MACZ,YAAY,KAAK;AAAA,MACjB,YAAY,SAAS;AAAA,IACvB,CAAC;AAED,WAAO;AAAA,MACL;AAAA,MACA,YAAY,SAAS,eAAe;AAAA,IACtC;AAAA,EACF;AACF;;;AC5HA,SAAS,wBAAAA,6BAA4B;AAa9B,IAAM,wBAAN,MAAuD;AAAA,EACnD,OAAO;AAAA,EACP;AAAA,EACD;AAAA,EACA;AAAA,EAER,YAAY,OAAe,SAAkB,QAAiB;AAC5D,SAAK,WAAW,WAAW,0BAA0B,QAAQ,QAAQ,EAAE;AACvE,SAAK,UAAU;AACf,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAqB,SAA6C;AACtH,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,aAAa,OAAO;AAC5F,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAqB,SAAwD;AAC7I,SAAK,QAAQ,MAAM,+BAA+B;AAAA,MAChD,OAAO,KAAK;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB;AAAA,MACA;AAAA,MACA,QAAQ,SAAS;AAAA,IACnB,CAAC;AAED,UAAM,MAAM,GAAG,KAAK,OAAO;AAC3B,UAAM,QAAQ,YAAY,IAAI;AAY9B,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK;AAAA,MACZ;AAAA,MACA,QAAQ;AAAA,MACR,OAAO;AAAA,MACP,SAAS;AAAA,QACP,aAAa;AAAA,QACb;AAAA,MACF;AAAA,IACF;AACA,QAAI,SAAS,WAAW,QAAQ;AAC9B,WAAK,QAAQ,IAAI,EAAE,MAAM,SAAS,OAAO,CAAC,EAAE;AAAA,IAC9C;AAEA,QAAI;AACJ,QAAI;AACF,YAAM,MAAM,MAAM,KAAK;AAAA,QACrB,QAAQ;AAAA,QACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,QAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MAC3B,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,MAAAA,sBAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAEA,QAAI,CAAC,IAAI,IAAI;AACX,MAAAA,sBAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,MACX,CAAC;AACD,YAAMC,QAAO,MAAM,IAAI,KAAK;AAC5B,WAAK,QAAQ,MAAM,oBAAoB;AAAA,QACrC,OAAO,KAAK;AAAA,QACZ,QAAQ,IAAI;AAAA,QACZ,MAAAA;AAAA,MACF,CAAC;AACD,YAAM,IAAI,MAAM,qBAAqB,IAAI,MAAM,MAAMA,KAAI,EAAE;AAAA,IAC7D;AAEA,UAAM,OAAO,MAAM,IAAI,KAAK;AAE5B,QAAI,CAAC,KAAK,UAAU;AAClB,MAAAD,sBAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,QACT,aAAa,KAAK;AAAA,QAClB,cAAc,KAAK;AAAA,MACrB,CAAC;AACD,WAAK,QAAQ,MAAM,8BAA8B,EAAE,OAAO,KAAK,QAAQ,CAAC;AACxE,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAEA,IAAAA,sBAAqB;AAAA,MACnB,UAAU,KAAK;AAAA,MACf,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,IAAI,IAAI;AAAA,MAChC,SAAS;AAAA,MACT,aAAa,KAAK;AAAA,MAClB,cAAc,KAAK;AAAA,IACrB,CAAC;AAED,UAAM,aAAa,cAAc,KAAK,WAAW;AAEjD,SAAK,QAAQ,KAAK,6BAA6B;AAAA,MAC7C,OAAO,KAAK;AAAA,MACZ,YAAY,KAAK,SAAS;AAAA,MAC1B;AAAA,IACF,CAAC;AAED,WAAO;AAAA,MACL,MAAM,KAAK;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,cAAc,YAAwC;AAC7D,UAAQ,YAAY;AAAA,IAClB,KAAK;AAAQ,aAAO;AAAA,IACpB,KAAK;AAAU,aAAO;AAAA,IACtB;AAAS,aAAO,cAAc;AAAA,EAChC;AACF;;;ACnIO,SAAS,sBAAsB,QAA+B,QAAkC;AACrG,UAAQ,OAAO,MAAM;AAAA,IACnB,KAAK,aAAa;AAChB,UAAI,CAAC,OAAO,UAAU,OAAO,OAAO,KAAK,MAAM,IAAI;AACjD,cAAM,IAAI,MAAM,mDAAmD;AAAA,MACrE;AACA,aAAO,IAAI;AAAA,QACT,OAAO;AAAA,QACP,OAAO;AAAA,QACP,OAAO,YAAY,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,IAEA,KAAK,UAAU;AACb,aAAO,IAAI;AAAA,QACT,OAAO;AAAA,QACP,OAAO,YAAY,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,IAEA;AACE,YAAM,IAAI,MAAM,sCAAsC,OAAO,IAAI,EAAE;AAAA,EACvE;AACF;;;ACtCO,IAAM,sBAAN,MAAqD;AAAA,EACjD,OAAO;AAAA,EACP,UAAU;AAAA,EACX,YAAsB,CAAC;AAAA,EACvB,gBAAwB;AAAA,EACxB,cAAwB,CAAC;AAAA,EAC1B,QAAuG,CAAC;AAAA,EAE/G,YAAY,YAAsB,CAAC,eAAe,GAAG,aAAwB;AAC3E,SAAK,YAAY;AACjB,SAAK,cAAc,eAAe,UAAU,IAAI,MAAM,UAAU;AAAA,EAClE;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAqB,SAA6C;AACtH,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,aAAa,OAAO;AAC5F,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAqB,SAAwD;AAC7I,SAAK,MAAM,KAAK,EAAE,QAAQ,WAAW,aAAa,GAAI,UAAU,EAAE,QAAQ,IAAI,CAAC,EAAG,CAAC;AAEnF,UAAM,OAAO,KAAK,UAAU,KAAK,aAAa;AAC9C,UAAM,aAAa,KAAK,YAAY,KAAK,aAAa,KAAK;AAE3D,QAAI,KAAK,gBAAgB,KAAK,UAAU,SAAS,GAAG;AAClD,WAAK;AAAA,IACP;AAEA,WAAO,EAAE,MAAM,WAAW;AAAA,EAC5B;AAAA;AAAA,EAGA,QAAc;AACZ,SAAK,QAAQ,CAAC;AACd,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,aAAa,WAAqB,aAA8B;AAC9D,SAAK,YAAY;AACjB,SAAK,cAAc,eAAe,UAAU,IAAI,MAAM,UAAU;AAChE,SAAK,gBAAgB;AAAA,EACvB;AACF;","names":["recordInferenceUsage","body"]}
@@ -0,0 +1,41 @@
1
+ export interface InferenceResponse {
2
+ text: string;
3
+ stopReason: 'end_turn' | 'max_tokens' | 'stop_sequence' | string;
4
+ }
5
+ /**
6
+ * Per-call options. Drift here when output discipline matters more than
7
+ * raw model behavior — e.g. forcing valid JSON for entity extraction
8
+ * where a parse failure in the consumer is silent and useless.
9
+ */
10
+ export interface InferenceOptions {
11
+ /**
12
+ * Constrain output to a parseable JSON array. Every implementation
13
+ * MUST satisfy this contract using whatever mechanism its provider
14
+ * supports — Ollama uses grammar-constrained sampling
15
+ * (`format: "json"`); Anthropic uses assistant-turn prefill (`[`)
16
+ * and re-attaches the prefix on return. Callers can rely on the
17
+ * returned `text` being a top-level JSON array regardless of
18
+ * provider.
19
+ *
20
+ * Current callers all expect arrays (entity extraction, motivation
21
+ * detection). If an object-emitting caller appears, this option
22
+ * grows a `root: 'array' | 'object'` field; do not silently drop
23
+ * the constraint.
24
+ */
25
+ format?: 'json';
26
+ }
27
+ export interface InferenceClient {
28
+ /** Provider type identifier (e.g. 'anthropic', 'ollama') */
29
+ readonly type: string;
30
+ /** Model identifier used for generation (e.g. 'claude-opus-4-6', 'llama3') */
31
+ readonly modelId: string;
32
+ /**
33
+ * Generate text from a prompt (simple interface)
34
+ */
35
+ generateText(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<string>;
36
+ /**
37
+ * Generate text with detailed response information
38
+ */
39
+ generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number, options?: InferenceOptions): Promise<InferenceResponse>;
40
+ }
41
+ //# sourceMappingURL=interface.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interface.d.ts","sourceRoot":"","sources":["../src/interface.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,UAAU,GAAG,YAAY,GAAG,eAAe,GAAG,MAAM,CAAC;CAClE;AAED;;;;GAIG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;;;;;;;;;;;;OAaG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,4DAA4D;IAC5D,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAEtB,8EAA8E;IAC9E,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IAEzB;;OAEG;IACH,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAElH;;OAEG;IACH,wBAAwB,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;CAC1I"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@semiont/inference",
3
- "version": "0.5.3",
3
+ "version": "0.5.4",
4
4
  "type": "module",
5
5
  "description": "AI inference capabilities for entity extraction, text generation, and resource creation",
6
6
  "main": "./dist/index.js",
@@ -16,7 +16,7 @@
16
16
  "README.md"
17
17
  ],
18
18
  "scripts": {
19
- "build": "npm run typecheck && tsup",
19
+ "build": "npm run typecheck && tsup && tsc -p tsconfig.build.json",
20
20
  "typecheck": "tsc --noEmit",
21
21
  "clean": "rm -rf dist",
22
22
  "test": "vitest run",
@@ -32,7 +32,7 @@
32
32
  "devDependencies": {
33
33
  "@vitest/coverage-v8": "^4.1.0",
34
34
  "tsup": "^8.0.1",
35
- "typescript": "^5.6.3"
35
+ "typescript": "^6.0.2"
36
36
  },
37
37
  "publishConfig": {
38
38
  "access": "public"