@semiont/inference 0.4.20 → 0.4.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +88 -24
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/dist/index.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
// src/implementations/anthropic.ts
|
|
2
2
|
import Anthropic from "@anthropic-ai/sdk";
|
|
3
|
+
import { recordInferenceUsage } from "@semiont/observability";
|
|
3
4
|
var AnthropicInferenceClient = class {
|
|
4
5
|
type = "anthropic";
|
|
5
6
|
modelId;
|
|
@@ -24,17 +25,29 @@ var AnthropicInferenceClient = class {
|
|
|
24
25
|
maxTokens,
|
|
25
26
|
temperature
|
|
26
27
|
});
|
|
27
|
-
const
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
28
|
+
const start = performance.now();
|
|
29
|
+
let response;
|
|
30
|
+
try {
|
|
31
|
+
response = await this.client.messages.create({
|
|
32
|
+
model: this.modelId,
|
|
33
|
+
max_tokens: maxTokens,
|
|
34
|
+
temperature,
|
|
35
|
+
messages: [
|
|
36
|
+
{
|
|
37
|
+
role: "user",
|
|
38
|
+
content: prompt
|
|
39
|
+
}
|
|
40
|
+
]
|
|
41
|
+
});
|
|
42
|
+
} catch (err) {
|
|
43
|
+
recordInferenceUsage({
|
|
44
|
+
provider: this.type,
|
|
45
|
+
model: this.modelId,
|
|
46
|
+
durationMs: performance.now() - start,
|
|
47
|
+
outcome: "error"
|
|
48
|
+
});
|
|
49
|
+
throw err;
|
|
50
|
+
}
|
|
38
51
|
this.logger?.debug("Inference response received", {
|
|
39
52
|
model: this.modelId,
|
|
40
53
|
contentBlocks: response.content.length,
|
|
@@ -42,12 +55,28 @@ var AnthropicInferenceClient = class {
|
|
|
42
55
|
});
|
|
43
56
|
const textContent = response.content.find((c) => c.type === "text");
|
|
44
57
|
if (!textContent || textContent.type !== "text") {
|
|
58
|
+
recordInferenceUsage({
|
|
59
|
+
provider: this.type,
|
|
60
|
+
model: this.modelId,
|
|
61
|
+
durationMs: performance.now() - start,
|
|
62
|
+
outcome: "error",
|
|
63
|
+
inputTokens: response.usage?.input_tokens,
|
|
64
|
+
outputTokens: response.usage?.output_tokens
|
|
65
|
+
});
|
|
45
66
|
this.logger?.error("No text content in inference response", {
|
|
46
67
|
model: this.modelId,
|
|
47
68
|
contentTypes: response.content.map((c) => c.type)
|
|
48
69
|
});
|
|
49
70
|
throw new Error("No text content in inference response");
|
|
50
71
|
}
|
|
72
|
+
recordInferenceUsage({
|
|
73
|
+
provider: this.type,
|
|
74
|
+
model: this.modelId,
|
|
75
|
+
durationMs: performance.now() - start,
|
|
76
|
+
outcome: "success",
|
|
77
|
+
inputTokens: response.usage?.input_tokens,
|
|
78
|
+
outputTokens: response.usage?.output_tokens
|
|
79
|
+
});
|
|
51
80
|
this.logger?.info("Text generation completed", {
|
|
52
81
|
model: this.modelId,
|
|
53
82
|
textLength: textContent.text.length,
|
|
@@ -61,6 +90,7 @@ var AnthropicInferenceClient = class {
|
|
|
61
90
|
};
|
|
62
91
|
|
|
63
92
|
// src/implementations/ollama.ts
|
|
93
|
+
import { recordInferenceUsage as recordInferenceUsage2 } from "@semiont/observability";
|
|
64
94
|
var OllamaInferenceClient = class {
|
|
65
95
|
type = "ollama";
|
|
66
96
|
modelId;
|
|
@@ -83,21 +113,39 @@ var OllamaInferenceClient = class {
|
|
|
83
113
|
temperature
|
|
84
114
|
});
|
|
85
115
|
const url = `${this.baseURL}/api/generate`;
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
116
|
+
const start = performance.now();
|
|
117
|
+
let res;
|
|
118
|
+
try {
|
|
119
|
+
res = await fetch(url, {
|
|
120
|
+
method: "POST",
|
|
121
|
+
headers: { "Content-Type": "application/json" },
|
|
122
|
+
body: JSON.stringify({
|
|
123
|
+
model: this.modelId,
|
|
124
|
+
prompt,
|
|
125
|
+
stream: false,
|
|
126
|
+
think: false,
|
|
127
|
+
options: {
|
|
128
|
+
num_predict: maxTokens,
|
|
129
|
+
temperature
|
|
130
|
+
}
|
|
131
|
+
})
|
|
132
|
+
});
|
|
133
|
+
} catch (err) {
|
|
134
|
+
recordInferenceUsage2({
|
|
135
|
+
provider: this.type,
|
|
90
136
|
model: this.modelId,
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
temperature
|
|
97
|
-
}
|
|
98
|
-
})
|
|
99
|
-
});
|
|
137
|
+
durationMs: performance.now() - start,
|
|
138
|
+
outcome: "error"
|
|
139
|
+
});
|
|
140
|
+
throw err;
|
|
141
|
+
}
|
|
100
142
|
if (!res.ok) {
|
|
143
|
+
recordInferenceUsage2({
|
|
144
|
+
provider: this.type,
|
|
145
|
+
model: this.modelId,
|
|
146
|
+
durationMs: performance.now() - start,
|
|
147
|
+
outcome: "error"
|
|
148
|
+
});
|
|
101
149
|
const body = await res.text();
|
|
102
150
|
this.logger?.error("Ollama API error", {
|
|
103
151
|
model: this.modelId,
|
|
@@ -108,9 +156,25 @@ var OllamaInferenceClient = class {
|
|
|
108
156
|
}
|
|
109
157
|
const data = await res.json();
|
|
110
158
|
if (!data.response) {
|
|
159
|
+
recordInferenceUsage2({
|
|
160
|
+
provider: this.type,
|
|
161
|
+
model: this.modelId,
|
|
162
|
+
durationMs: performance.now() - start,
|
|
163
|
+
outcome: "error",
|
|
164
|
+
inputTokens: data.prompt_eval_count,
|
|
165
|
+
outputTokens: data.eval_count
|
|
166
|
+
});
|
|
111
167
|
this.logger?.error("Empty response from Ollama", { model: this.modelId });
|
|
112
168
|
throw new Error("Empty response from Ollama");
|
|
113
169
|
}
|
|
170
|
+
recordInferenceUsage2({
|
|
171
|
+
provider: this.type,
|
|
172
|
+
model: this.modelId,
|
|
173
|
+
durationMs: performance.now() - start,
|
|
174
|
+
outcome: "success",
|
|
175
|
+
inputTokens: data.prompt_eval_count,
|
|
176
|
+
outputTokens: data.eval_count
|
|
177
|
+
});
|
|
114
178
|
const stopReason = mapStopReason(data.done_reason);
|
|
115
179
|
this.logger?.info("Text generation completed", {
|
|
116
180
|
model: this.modelId,
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/implementations/anthropic.ts","../src/implementations/ollama.ts","../src/factory.ts","../src/implementations/mock.ts"],"sourcesContent":["// Anthropic Claude implementation of InferenceClient interface\n\nimport Anthropic from '@anthropic-ai/sdk';\nimport type { Logger } from '@semiont/core';\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\nexport class AnthropicInferenceClient implements InferenceClient {\n readonly type = 'anthropic' as const;\n readonly modelId: string;\n private client: Anthropic;\n private logger?: Logger;\n\n constructor(apiKey: string, model: string, baseURL?: string, logger?: Logger) {\n this.client = new Anthropic({\n apiKey,\n baseURL: baseURL || 'https://api.anthropic.com',\n });\n this.modelId = model;\n this.logger = logger;\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.logger?.debug('Generating text with inference client', {\n model: this.modelId,\n promptLength: prompt.length,\n maxTokens,\n temperature\n });\n\n const response = await this.client.messages.create({\n model: this.modelId,\n max_tokens: maxTokens,\n temperature,\n messages: [\n {\n role: 'user',\n content: prompt\n }\n ]\n });\n\n this.logger?.debug('Inference response received', {\n model: this.modelId,\n contentBlocks: response.content.length,\n stopReason: response.stop_reason\n });\n\n const textContent = response.content.find(c => c.type === 'text');\n\n if (!textContent || textContent.type !== 'text') {\n this.logger?.error('No text content in inference response', {\n model: this.modelId,\n contentTypes: response.content.map(c => c.type)\n });\n throw new Error('No text content in inference response');\n }\n\n this.logger?.info('Text generation completed', {\n model: this.modelId,\n textLength: textContent.text.length,\n stopReason: response.stop_reason\n });\n\n return {\n text: textContent.text,\n stopReason: response.stop_reason || 'unknown'\n };\n }\n}\n","// Ollama implementation of InferenceClient interface\n// Uses native Ollama HTTP API (no SDK dependency)\n\nimport type { Logger } from '@semiont/core';\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\ninterface OllamaGenerateResponse {\n response: string;\n done: boolean;\n done_reason?: string;\n}\n\nexport class OllamaInferenceClient implements InferenceClient {\n readonly type = 'ollama' as const;\n readonly modelId: string;\n private baseURL: string;\n private logger?: Logger;\n\n constructor(model: string, baseURL?: string, logger?: Logger) {\n this.baseURL = (baseURL || 'http://localhost:11434').replace(/\\/+$/, '');\n this.modelId = model;\n this.logger = logger;\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.logger?.debug('Generating text with Ollama', {\n model: this.modelId,\n promptLength: prompt.length,\n maxTokens,\n temperature\n });\n\n const url = `${this.baseURL}/api/generate`;\n\n const res = await fetch(url, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.modelId,\n prompt,\n stream: false,\n think: false,\n options: {\n num_predict: maxTokens,\n temperature,\n },\n }),\n });\n\n if (!res.ok) {\n const body = await res.text();\n this.logger?.error('Ollama API error', {\n model: this.modelId,\n status: res.status,\n body,\n });\n throw new Error(`Ollama API error (${res.status}): ${body}`);\n }\n\n const data = await res.json() as OllamaGenerateResponse;\n\n if (!data.response) {\n this.logger?.error('Empty response from Ollama', { model: this.modelId });\n throw new Error('Empty response from Ollama');\n }\n\n const stopReason = mapStopReason(data.done_reason);\n\n this.logger?.info('Text generation completed', {\n model: this.modelId,\n textLength: data.response.length,\n stopReason,\n });\n\n return {\n text: data.response,\n stopReason,\n };\n }\n}\n\nfunction mapStopReason(doneReason: string | undefined): string {\n switch (doneReason) {\n case 'stop': return 'end_turn';\n case 'length': return 'max_tokens';\n default: return doneReason || 'unknown';\n }\n}\n","// Factory for creating inference client instances based on configuration\n\nimport type { Logger } from '@semiont/core';\nimport { InferenceClient } from './interface.js';\nimport { AnthropicInferenceClient } from './implementations/anthropic.js';\nimport { OllamaInferenceClient } from './implementations/ollama.js';\n\nexport type InferenceClientType = 'anthropic' | 'ollama';\n\nexport interface InferenceClientConfig {\n type: InferenceClientType;\n apiKey?: string;\n model: string;\n endpoint?: string;\n baseURL?: string;\n}\n\nexport function createInferenceClient(config: InferenceClientConfig, logger?: Logger): InferenceClient {\n switch (config.type) {\n case 'anthropic': {\n if (!config.apiKey || config.apiKey.trim() === '') {\n throw new Error('apiKey is required for Anthropic inference client');\n }\n return new AnthropicInferenceClient(\n config.apiKey,\n config.model,\n config.endpoint || config.baseURL,\n logger\n );\n }\n\n case 'ollama': {\n return new OllamaInferenceClient(\n config.model,\n config.endpoint || config.baseURL,\n logger\n );\n }\n\n default:\n throw new Error(`Unsupported inference client type: ${config.type}`);\n }\n}\n","// Mock implementation of InferenceClient for testing\n\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\nexport class MockInferenceClient implements InferenceClient {\n readonly type = 'mock' as const;\n readonly modelId = 'mock-model' as const;\n private responses: string[] = [];\n private responseIndex: number = 0;\n private stopReasons: string[] = [];\n public calls: Array<{ prompt: string; maxTokens: number; temperature: number }> = [];\n\n constructor(responses: string[] = ['Mock response'], stopReasons?: string[]) {\n this.responses = responses;\n this.stopReasons = stopReasons || responses.map(() => 'end_turn');\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.calls.push({ prompt, maxTokens, temperature });\n\n const text = this.responses[this.responseIndex];\n const stopReason = this.stopReasons[this.responseIndex] || 'end_turn';\n\n if (this.responseIndex < this.responses.length - 1) {\n this.responseIndex++;\n }\n\n return { text, stopReason };\n }\n\n // Test helper methods\n reset(): void {\n this.calls = [];\n this.responseIndex = 0;\n }\n\n setResponses(responses: string[], stopReasons?: string[]): void {\n this.responses = responses;\n this.stopReasons = stopReasons || responses.map(() => 'end_turn');\n this.responseIndex = 0;\n }\n}\n"],"mappings":";AAEA,OAAO,eAAe;AAIf,IAAM,2BAAN,MAA0D;AAAA,EACtD,OAAO;AAAA,EACP;AAAA,EACD;AAAA,EACA;AAAA,EAER,YAAY,QAAgB,OAAe,SAAkB,QAAiB;AAC5E,SAAK,SAAS,IAAI,UAAU;AAAA,MAC1B;AAAA,MACA,SAAS,WAAW;AAAA,IACtB,CAAC;AACD,SAAK,UAAU;AACf,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,QAAQ,MAAM,yCAAyC;AAAA,MAC1D,OAAO,KAAK;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,WAAW,MAAM,KAAK,OAAO,SAAS,OAAO;AAAA,MACjD,OAAO,KAAK;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE,MAAM;AAAA,UACN,SAAS;AAAA,QACX;AAAA,MACF;AAAA,IACF,CAAC;AAED,SAAK,QAAQ,MAAM,+BAA+B;AAAA,MAChD,OAAO,KAAK;AAAA,MACZ,eAAe,SAAS,QAAQ;AAAA,MAChC,YAAY,SAAS;AAAA,IACvB,CAAC;AAED,UAAM,cAAc,SAAS,QAAQ,KAAK,OAAK,EAAE,SAAS,MAAM;AAEhE,QAAI,CAAC,eAAe,YAAY,SAAS,QAAQ;AAC/C,WAAK,QAAQ,MAAM,yCAAyC;AAAA,QAC1D,OAAO,KAAK;AAAA,QACZ,cAAc,SAAS,QAAQ,IAAI,OAAK,EAAE,IAAI;AAAA,MAChD,CAAC;AACD,YAAM,IAAI,MAAM,uCAAuC;AAAA,IACzD;AAEA,SAAK,QAAQ,KAAK,6BAA6B;AAAA,MAC7C,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,KAAK;AAAA,MAC7B,YAAY,SAAS;AAAA,IACvB,CAAC;AAED,WAAO;AAAA,MACL,MAAM,YAAY;AAAA,MAClB,YAAY,SAAS,eAAe;AAAA,IACtC;AAAA,EACF;AACF;;;AC7DO,IAAM,wBAAN,MAAuD;AAAA,EACnD,OAAO;AAAA,EACP;AAAA,EACD;AAAA,EACA;AAAA,EAER,YAAY,OAAe,SAAkB,QAAiB;AAC5D,SAAK,WAAW,WAAW,0BAA0B,QAAQ,QAAQ,EAAE;AACvE,SAAK,UAAU;AACf,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,QAAQ,MAAM,+BAA+B;AAAA,MAChD,OAAO,KAAK;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,MAAM,GAAG,KAAK,OAAO;AAE3B,UAAM,MAAM,MAAM,MAAM,KAAK;AAAA,MAC3B,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK;AAAA,QACZ;AAAA,QACA,QAAQ;AAAA,QACR,OAAO;AAAA,QACP,SAAS;AAAA,UACP,aAAa;AAAA,UACb;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAED,QAAI,CAAC,IAAI,IAAI;AACX,YAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,WAAK,QAAQ,MAAM,oBAAoB;AAAA,QACrC,OAAO,KAAK;AAAA,QACZ,QAAQ,IAAI;AAAA,QACZ;AAAA,MACF,CAAC;AACD,YAAM,IAAI,MAAM,qBAAqB,IAAI,MAAM,MAAM,IAAI,EAAE;AAAA,IAC7D;AAEA,UAAM,OAAO,MAAM,IAAI,KAAK;AAE5B,QAAI,CAAC,KAAK,UAAU;AAClB,WAAK,QAAQ,MAAM,8BAA8B,EAAE,OAAO,KAAK,QAAQ,CAAC;AACxE,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAEA,UAAM,aAAa,cAAc,KAAK,WAAW;AAEjD,SAAK,QAAQ,KAAK,6BAA6B;AAAA,MAC7C,OAAO,KAAK;AAAA,MACZ,YAAY,KAAK,SAAS;AAAA,MAC1B;AAAA,IACF,CAAC;AAED,WAAO;AAAA,MACL,MAAM,KAAK;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,cAAc,YAAwC;AAC7D,UAAQ,YAAY;AAAA,IAClB,KAAK;AAAQ,aAAO;AAAA,IACpB,KAAK;AAAU,aAAO;AAAA,IACtB;AAAS,aAAO,cAAc;AAAA,EAChC;AACF;;;AC3EO,SAAS,sBAAsB,QAA+B,QAAkC;AACrG,UAAQ,OAAO,MAAM;AAAA,IACnB,KAAK,aAAa;AAChB,UAAI,CAAC,OAAO,UAAU,OAAO,OAAO,KAAK,MAAM,IAAI;AACjD,cAAM,IAAI,MAAM,mDAAmD;AAAA,MACrE;AACA,aAAO,IAAI;AAAA,QACT,OAAO;AAAA,QACP,OAAO;AAAA,QACP,OAAO,YAAY,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,IAEA,KAAK,UAAU;AACb,aAAO,IAAI;AAAA,QACT,OAAO;AAAA,QACP,OAAO,YAAY,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,IAEA;AACE,YAAM,IAAI,MAAM,sCAAsC,OAAO,IAAI,EAAE;AAAA,EACvE;AACF;;;ACtCO,IAAM,sBAAN,MAAqD;AAAA,EACjD,OAAO;AAAA,EACP,UAAU;AAAA,EACX,YAAsB,CAAC;AAAA,EACvB,gBAAwB;AAAA,EACxB,cAAwB,CAAC;AAAA,EAC1B,QAA2E,CAAC;AAAA,EAEnF,YAAY,YAAsB,CAAC,eAAe,GAAG,aAAwB;AAC3E,SAAK,YAAY;AACjB,SAAK,cAAc,eAAe,UAAU,IAAI,MAAM,UAAU;AAAA,EAClE;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,MAAM,KAAK,EAAE,QAAQ,WAAW,YAAY,CAAC;AAElD,UAAM,OAAO,KAAK,UAAU,KAAK,aAAa;AAC9C,UAAM,aAAa,KAAK,YAAY,KAAK,aAAa,KAAK;AAE3D,QAAI,KAAK,gBAAgB,KAAK,UAAU,SAAS,GAAG;AAClD,WAAK;AAAA,IACP;AAEA,WAAO,EAAE,MAAM,WAAW;AAAA,EAC5B;AAAA;AAAA,EAGA,QAAc;AACZ,SAAK,QAAQ,CAAC;AACd,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,aAAa,WAAqB,aAA8B;AAC9D,SAAK,YAAY;AACjB,SAAK,cAAc,eAAe,UAAU,IAAI,MAAM,UAAU;AAChE,SAAK,gBAAgB;AAAA,EACvB;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/implementations/anthropic.ts","../src/implementations/ollama.ts","../src/factory.ts","../src/implementations/mock.ts"],"sourcesContent":["// Anthropic Claude implementation of InferenceClient interface\n\nimport Anthropic from '@anthropic-ai/sdk';\nimport type { Logger } from '@semiont/core';\nimport { recordInferenceUsage } from '@semiont/observability';\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\nexport class AnthropicInferenceClient implements InferenceClient {\n readonly type = 'anthropic' as const;\n readonly modelId: string;\n private client: Anthropic;\n private logger?: Logger;\n\n constructor(apiKey: string, model: string, baseURL?: string, logger?: Logger) {\n this.client = new Anthropic({\n apiKey,\n baseURL: baseURL || 'https://api.anthropic.com',\n });\n this.modelId = model;\n this.logger = logger;\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.logger?.debug('Generating text with inference client', {\n model: this.modelId,\n promptLength: prompt.length,\n maxTokens,\n temperature\n });\n\n const start = performance.now();\n let response: Awaited<ReturnType<typeof this.client.messages.create>>;\n try {\n response = await this.client.messages.create({\n model: this.modelId,\n max_tokens: maxTokens,\n temperature,\n messages: [\n {\n role: 'user',\n content: prompt\n }\n ]\n });\n } catch (err) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n });\n throw err;\n }\n\n this.logger?.debug('Inference response received', {\n model: this.modelId,\n contentBlocks: response.content.length,\n stopReason: response.stop_reason\n });\n\n const textContent = response.content.find(c => c.type === 'text');\n\n if (!textContent || textContent.type !== 'text') {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n inputTokens: response.usage?.input_tokens,\n outputTokens: response.usage?.output_tokens,\n });\n this.logger?.error('No text content in inference response', {\n model: this.modelId,\n contentTypes: response.content.map(c => c.type)\n });\n throw new Error('No text content in inference response');\n }\n\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'success',\n inputTokens: response.usage?.input_tokens,\n outputTokens: response.usage?.output_tokens,\n });\n\n this.logger?.info('Text generation completed', {\n model: this.modelId,\n textLength: textContent.text.length,\n stopReason: response.stop_reason\n });\n\n return {\n text: textContent.text,\n stopReason: response.stop_reason || 'unknown'\n };\n }\n}\n","// Ollama implementation of InferenceClient interface\n// Uses native Ollama HTTP API (no SDK dependency)\n\nimport type { Logger } from '@semiont/core';\nimport { recordInferenceUsage } from '@semiont/observability';\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\ninterface OllamaGenerateResponse {\n response: string;\n done: boolean;\n done_reason?: string;\n /** Number of prompt tokens evaluated. Available on most Ollama versions. */\n prompt_eval_count?: number;\n /** Number of tokens generated. */\n eval_count?: number;\n}\n\nexport class OllamaInferenceClient implements InferenceClient {\n readonly type = 'ollama' as const;\n readonly modelId: string;\n private baseURL: string;\n private logger?: Logger;\n\n constructor(model: string, baseURL?: string, logger?: Logger) {\n this.baseURL = (baseURL || 'http://localhost:11434').replace(/\\/+$/, '');\n this.modelId = model;\n this.logger = logger;\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.logger?.debug('Generating text with Ollama', {\n model: this.modelId,\n promptLength: prompt.length,\n maxTokens,\n temperature\n });\n\n const url = `${this.baseURL}/api/generate`;\n const start = performance.now();\n\n let res: Response;\n try {\n res = await fetch(url, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.modelId,\n prompt,\n stream: false,\n think: false,\n options: {\n num_predict: maxTokens,\n temperature,\n },\n }),\n });\n } catch (err) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n });\n throw err;\n }\n\n if (!res.ok) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n });\n const body = await res.text();\n this.logger?.error('Ollama API error', {\n model: this.modelId,\n status: res.status,\n body,\n });\n throw new Error(`Ollama API error (${res.status}): ${body}`);\n }\n\n const data = await res.json() as OllamaGenerateResponse;\n\n if (!data.response) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n inputTokens: data.prompt_eval_count,\n outputTokens: data.eval_count,\n });\n this.logger?.error('Empty response from Ollama', { model: this.modelId });\n throw new Error('Empty response from Ollama');\n }\n\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'success',\n inputTokens: data.prompt_eval_count,\n outputTokens: data.eval_count,\n });\n\n const stopReason = mapStopReason(data.done_reason);\n\n this.logger?.info('Text generation completed', {\n model: this.modelId,\n textLength: data.response.length,\n stopReason,\n });\n\n return {\n text: data.response,\n stopReason,\n };\n }\n}\n\nfunction mapStopReason(doneReason: string | undefined): string {\n switch (doneReason) {\n case 'stop': return 'end_turn';\n case 'length': return 'max_tokens';\n default: return doneReason || 'unknown';\n }\n}\n","// Factory for creating inference client instances based on configuration\n\nimport type { Logger } from '@semiont/core';\nimport { InferenceClient } from './interface.js';\nimport { AnthropicInferenceClient } from './implementations/anthropic.js';\nimport { OllamaInferenceClient } from './implementations/ollama.js';\n\nexport type InferenceClientType = 'anthropic' | 'ollama';\n\nexport interface InferenceClientConfig {\n type: InferenceClientType;\n apiKey?: string;\n model: string;\n endpoint?: string;\n baseURL?: string;\n}\n\nexport function createInferenceClient(config: InferenceClientConfig, logger?: Logger): InferenceClient {\n switch (config.type) {\n case 'anthropic': {\n if (!config.apiKey || config.apiKey.trim() === '') {\n throw new Error('apiKey is required for Anthropic inference client');\n }\n return new AnthropicInferenceClient(\n config.apiKey,\n config.model,\n config.endpoint || config.baseURL,\n logger\n );\n }\n\n case 'ollama': {\n return new OllamaInferenceClient(\n config.model,\n config.endpoint || config.baseURL,\n logger\n );\n }\n\n default:\n throw new Error(`Unsupported inference client type: ${config.type}`);\n }\n}\n","// Mock implementation of InferenceClient for testing\n\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\nexport class MockInferenceClient implements InferenceClient {\n readonly type = 'mock' as const;\n readonly modelId = 'mock-model' as const;\n private responses: string[] = [];\n private responseIndex: number = 0;\n private stopReasons: string[] = [];\n public calls: Array<{ prompt: string; maxTokens: number; temperature: number }> = [];\n\n constructor(responses: string[] = ['Mock response'], stopReasons?: string[]) {\n this.responses = responses;\n this.stopReasons = stopReasons || responses.map(() => 'end_turn');\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.calls.push({ prompt, maxTokens, temperature });\n\n const text = this.responses[this.responseIndex];\n const stopReason = this.stopReasons[this.responseIndex] || 'end_turn';\n\n if (this.responseIndex < this.responses.length - 1) {\n this.responseIndex++;\n }\n\n return { text, stopReason };\n }\n\n // Test helper methods\n reset(): void {\n this.calls = [];\n this.responseIndex = 0;\n }\n\n setResponses(responses: string[], stopReasons?: string[]): void {\n this.responses = responses;\n this.stopReasons = stopReasons || responses.map(() => 'end_turn');\n this.responseIndex = 0;\n }\n}\n"],"mappings":";AAEA,OAAO,eAAe;AAEtB,SAAS,4BAA4B;AAG9B,IAAM,2BAAN,MAA0D;AAAA,EACtD,OAAO;AAAA,EACP;AAAA,EACD;AAAA,EACA;AAAA,EAER,YAAY,QAAgB,OAAe,SAAkB,QAAiB;AAC5E,SAAK,SAAS,IAAI,UAAU;AAAA,MAC1B;AAAA,MACA,SAAS,WAAW;AAAA,IACtB,CAAC;AACD,SAAK,UAAU;AACf,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,QAAQ,MAAM,yCAAyC;AAAA,MAC1D,OAAO,KAAK;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,QAAQ,YAAY,IAAI;AAC9B,QAAI;AACJ,QAAI;AACF,iBAAW,MAAM,KAAK,OAAO,SAAS,OAAO;AAAA,QAC3C,OAAO,KAAK;AAAA,QACZ,YAAY;AAAA,QACZ;AAAA,QACA,UAAU;AAAA,UACR;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,UACX;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,2BAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAEA,SAAK,QAAQ,MAAM,+BAA+B;AAAA,MAChD,OAAO,KAAK;AAAA,MACZ,eAAe,SAAS,QAAQ;AAAA,MAChC,YAAY,SAAS;AAAA,IACvB,CAAC;AAED,UAAM,cAAc,SAAS,QAAQ,KAAK,OAAK,EAAE,SAAS,MAAM;AAEhE,QAAI,CAAC,eAAe,YAAY,SAAS,QAAQ;AAC/C,2BAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,QACT,aAAa,SAAS,OAAO;AAAA,QAC7B,cAAc,SAAS,OAAO;AAAA,MAChC,CAAC;AACD,WAAK,QAAQ,MAAM,yCAAyC;AAAA,QAC1D,OAAO,KAAK;AAAA,QACZ,cAAc,SAAS,QAAQ,IAAI,OAAK,EAAE,IAAI;AAAA,MAChD,CAAC;AACD,YAAM,IAAI,MAAM,uCAAuC;AAAA,IACzD;AAEA,yBAAqB;AAAA,MACnB,UAAU,KAAK;AAAA,MACf,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,IAAI,IAAI;AAAA,MAChC,SAAS;AAAA,MACT,aAAa,SAAS,OAAO;AAAA,MAC7B,cAAc,SAAS,OAAO;AAAA,IAChC,CAAC;AAED,SAAK,QAAQ,KAAK,6BAA6B;AAAA,MAC7C,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,KAAK;AAAA,MAC7B,YAAY,SAAS;AAAA,IACvB,CAAC;AAED,WAAO;AAAA,MACL,MAAM,YAAY;AAAA,MAClB,YAAY,SAAS,eAAe;AAAA,IACtC;AAAA,EACF;AACF;;;ACnGA,SAAS,wBAAAA,6BAA4B;AAa9B,IAAM,wBAAN,MAAuD;AAAA,EACnD,OAAO;AAAA,EACP;AAAA,EACD;AAAA,EACA;AAAA,EAER,YAAY,OAAe,SAAkB,QAAiB;AAC5D,SAAK,WAAW,WAAW,0BAA0B,QAAQ,QAAQ,EAAE;AACvE,SAAK,UAAU;AACf,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,QAAQ,MAAM,+BAA+B;AAAA,MAChD,OAAO,KAAK;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,MAAM,GAAG,KAAK,OAAO;AAC3B,UAAM,QAAQ,YAAY,IAAI;AAE9B,QAAI;AACJ,QAAI;AACF,YAAM,MAAM,MAAM,KAAK;AAAA,QACrB,QAAQ;AAAA,QACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,QAC9C,MAAM,KAAK,UAAU;AAAA,UACnB,OAAO,KAAK;AAAA,UACZ;AAAA,UACA,QAAQ;AAAA,UACR,OAAO;AAAA,UACP,SAAS;AAAA,YACP,aAAa;AAAA,YACb;AAAA,UACF;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,MAAAA,sBAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAEA,QAAI,CAAC,IAAI,IAAI;AACX,MAAAA,sBAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,MACX,CAAC;AACD,YAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,WAAK,QAAQ,MAAM,oBAAoB;AAAA,QACrC,OAAO,KAAK;AAAA,QACZ,QAAQ,IAAI;AAAA,QACZ;AAAA,MACF,CAAC;AACD,YAAM,IAAI,MAAM,qBAAqB,IAAI,MAAM,MAAM,IAAI,EAAE;AAAA,IAC7D;AAEA,UAAM,OAAO,MAAM,IAAI,KAAK;AAE5B,QAAI,CAAC,KAAK,UAAU;AAClB,MAAAA,sBAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,QACT,aAAa,KAAK;AAAA,QAClB,cAAc,KAAK;AAAA,MACrB,CAAC;AACD,WAAK,QAAQ,MAAM,8BAA8B,EAAE,OAAO,KAAK,QAAQ,CAAC;AACxE,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAEA,IAAAA,sBAAqB;AAAA,MACnB,UAAU,KAAK;AAAA,MACf,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,IAAI,IAAI;AAAA,MAChC,SAAS;AAAA,MACT,aAAa,KAAK;AAAA,MAClB,cAAc,KAAK;AAAA,IACrB,CAAC;AAED,UAAM,aAAa,cAAc,KAAK,WAAW;AAEjD,SAAK,QAAQ,KAAK,6BAA6B;AAAA,MAC7C,OAAO,KAAK;AAAA,MACZ,YAAY,KAAK,SAAS;AAAA,MAC1B;AAAA,IACF,CAAC;AAED,WAAO;AAAA,MACL,MAAM,KAAK;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,cAAc,YAAwC;AAC7D,UAAQ,YAAY;AAAA,IAClB,KAAK;AAAQ,aAAO;AAAA,IACpB,KAAK;AAAU,aAAO;AAAA,IACtB;AAAS,aAAO,cAAc;AAAA,EAChC;AACF;;;ACnHO,SAAS,sBAAsB,QAA+B,QAAkC;AACrG,UAAQ,OAAO,MAAM;AAAA,IACnB,KAAK,aAAa;AAChB,UAAI,CAAC,OAAO,UAAU,OAAO,OAAO,KAAK,MAAM,IAAI;AACjD,cAAM,IAAI,MAAM,mDAAmD;AAAA,MACrE;AACA,aAAO,IAAI;AAAA,QACT,OAAO;AAAA,QACP,OAAO;AAAA,QACP,OAAO,YAAY,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,IAEA,KAAK,UAAU;AACb,aAAO,IAAI;AAAA,QACT,OAAO;AAAA,QACP,OAAO,YAAY,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,IAEA;AACE,YAAM,IAAI,MAAM,sCAAsC,OAAO,IAAI,EAAE;AAAA,EACvE;AACF;;;ACtCO,IAAM,sBAAN,MAAqD;AAAA,EACjD,OAAO;AAAA,EACP,UAAU;AAAA,EACX,YAAsB,CAAC;AAAA,EACvB,gBAAwB;AAAA,EACxB,cAAwB,CAAC;AAAA,EAC1B,QAA2E,CAAC;AAAA,EAEnF,YAAY,YAAsB,CAAC,eAAe,GAAG,aAAwB;AAC3E,SAAK,YAAY;AACjB,SAAK,cAAc,eAAe,UAAU,IAAI,MAAM,UAAU;AAAA,EAClE;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,MAAM,KAAK,EAAE,QAAQ,WAAW,YAAY,CAAC;AAElD,UAAM,OAAO,KAAK,UAAU,KAAK,aAAa;AAC9C,UAAM,aAAa,KAAK,YAAY,KAAK,aAAa,KAAK;AAE3D,QAAI,KAAK,gBAAgB,KAAK,UAAU,SAAS,GAAG;AAClD,WAAK;AAAA,IACP;AAEA,WAAO,EAAE,MAAM,WAAW;AAAA,EAC5B;AAAA;AAAA,EAGA,QAAc;AACZ,SAAK,QAAQ,CAAC;AACd,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,aAAa,WAAqB,aAA8B;AAC9D,SAAK,YAAY;AACjB,SAAK,cAAc,eAAe,UAAU,IAAI,MAAM,UAAU;AAChE,SAAK,gBAAgB;AAAA,EACvB;AACF;","names":["recordInferenceUsage"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@semiont/inference",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.22",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "AI inference capabilities for entity extraction, text generation, and resource creation",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -26,7 +26,8 @@
|
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"@anthropic-ai/sdk": "^0.84.0",
|
|
28
28
|
"@semiont/api-client": "*",
|
|
29
|
-
"@semiont/core": "*"
|
|
29
|
+
"@semiont/core": "*",
|
|
30
|
+
"@semiont/observability": "*"
|
|
30
31
|
},
|
|
31
32
|
"devDependencies": {
|
|
32
33
|
"@vitest/coverage-v8": "^4.1.0",
|