npm - @semiont/inference - Versions diffs - 0.4.21 → 0.4.22 - Mend

@semiont/inference 0.4.21 → 0.4.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -1,5 +1,6 @@
 // src/implementations/anthropic.ts
 import Anthropic from "@anthropic-ai/sdk";
+import { recordInferenceUsage } from "@semiont/observability";
 var AnthropicInferenceClient = class {
   type = "anthropic";
   modelId;
@@ -24,17 +25,29 @@ var AnthropicInferenceClient = class {
       maxTokens,
       temperature
     });
-    const response = await this.client.messages.create({
-      model: this.modelId,
-      max_tokens: maxTokens,
-      temperature,
-      messages: [
-        {
-          role: "user",
-          content: prompt
-        }
-      ]
-    });
+    const start = performance.now();
+    let response;
+    try {
+      response = await this.client.messages.create({
+        model: this.modelId,
+        max_tokens: maxTokens,
+        temperature,
+        messages: [
+          {
+            role: "user",
+            content: prompt
+          }
+        ]
+      });
+    } catch (err) {
+      recordInferenceUsage({
+        provider: this.type,
+        model: this.modelId,
+        durationMs: performance.now() - start,
+        outcome: "error"
+      });
+      throw err;
+    }
     this.logger?.debug("Inference response received", {
       model: this.modelId,
       contentBlocks: response.content.length,
@@ -42,12 +55,28 @@ var AnthropicInferenceClient = class {
     });
     const textContent = response.content.find((c) => c.type === "text");
     if (!textContent || textContent.type !== "text") {
+      recordInferenceUsage({
+        provider: this.type,
+        model: this.modelId,
+        durationMs: performance.now() - start,
+        outcome: "error",
+        inputTokens: response.usage?.input_tokens,
+        outputTokens: response.usage?.output_tokens
+      });
       this.logger?.error("No text content in inference response", {
         model: this.modelId,
         contentTypes: response.content.map((c) => c.type)
       });
       throw new Error("No text content in inference response");
     }
+    recordInferenceUsage({
+      provider: this.type,
+      model: this.modelId,
+      durationMs: performance.now() - start,
+      outcome: "success",
+      inputTokens: response.usage?.input_tokens,
+      outputTokens: response.usage?.output_tokens
+    });
     this.logger?.info("Text generation completed", {
       model: this.modelId,
       textLength: textContent.text.length,
@@ -61,6 +90,7 @@ var AnthropicInferenceClient = class {
 };
 // src/implementations/ollama.ts
+import { recordInferenceUsage as recordInferenceUsage2 } from "@semiont/observability";
 var OllamaInferenceClient = class {
   type = "ollama";
   modelId;
@@ -83,21 +113,39 @@ var OllamaInferenceClient = class {
       temperature
     });
     const url = `${this.baseURL}/api/generate`;
-    const res = await fetch(url, {
-      method: "POST",
-      headers: { "Content-Type": "application/json" },
-      body: JSON.stringify({
+    const start = performance.now();
+    let res;
+    try {
+      res = await fetch(url, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          model: this.modelId,
+          prompt,
+          stream: false,
+          think: false,
+          options: {
+            num_predict: maxTokens,
+            temperature
+          }
+        })
+      });
+    } catch (err) {
+      recordInferenceUsage2({
+        provider: this.type,
         model: this.modelId,
-        prompt,
-        stream: false,
-        think: false,
-        options: {
-          num_predict: maxTokens,
-          temperature
-        }
-      })
-    });
+        durationMs: performance.now() - start,
+        outcome: "error"
+      });
+      throw err;
+    }
     if (!res.ok) {
+      recordInferenceUsage2({
+        provider: this.type,
+        model: this.modelId,
+        durationMs: performance.now() - start,
+        outcome: "error"
+      });
       const body = await res.text();
       this.logger?.error("Ollama API error", {
         model: this.modelId,
@@ -108,9 +156,25 @@ var OllamaInferenceClient = class {
     }
     const data = await res.json();
     if (!data.response) {
+      recordInferenceUsage2({
+        provider: this.type,
+        model: this.modelId,
+        durationMs: performance.now() - start,
+        outcome: "error",
+        inputTokens: data.prompt_eval_count,
+        outputTokens: data.eval_count
+      });
       this.logger?.error("Empty response from Ollama", { model: this.modelId });
       throw new Error("Empty response from Ollama");
     }
+    recordInferenceUsage2({
+      provider: this.type,
+      model: this.modelId,
+      durationMs: performance.now() - start,
+      outcome: "success",
+      inputTokens: data.prompt_eval_count,
+      outputTokens: data.eval_count
+    });
     const stopReason = mapStopReason(data.done_reason);
     this.logger?.info("Text generation completed", {
       model: this.modelId,

package/dist/index.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"sources":["../src/implementations/anthropic.ts","../src/implementations/ollama.ts","../src/factory.ts","../src/implementations/mock.ts"],"sourcesContent":["// Anthropic Claude implementation of InferenceClient interface\n\nimport Anthropic from '@anthropic-ai/sdk';\nimport type { Logger } from '@semiont/core';\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\nexport class AnthropicInferenceClient implements InferenceClient {\n readonly type = 'anthropic' as const;\n readonly modelId: string;\n private client: Anthropic;\n private logger?: Logger;\n\n constructor(apiKey: string, model: string, baseURL?: string, logger?: Logger) {\n this.client = new Anthropic({\n apiKey,\n baseURL: baseURL \|\| 'https://api.anthropic.com',\n });\n this.modelId = model;\n this.logger = logger;\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.logger?.debug('Generating text with inference client', {\n model: this.modelId,\n promptLength: prompt.length,\n maxTokens,\n temperature\n });\n\n const response = await this.client.messages.create({\n model: this.modelId,\n max_tokens: maxTokens,\n temperature,\n messages: [\n {\n role: 'user',\n content: prompt\n }\n ]\n });\n\n this.logger?.debug('Inference response received', {\n model: this.modelId,\n contentBlocks: response.content.length,\n stopReason: response.stop_reason\n });\n\n const textContent = response.content.find(c => c.type === 'text');\n\n if (!textContent \|\| textContent.type !== 'text') {\n this.logger?.error('No text content in inference response', {\n model: this.modelId,\n contentTypes: response.content.map(c => c.type)\n });\n throw new Error('No text content in inference response');\n }\n\n this.logger?.info('Text generation completed', {\n model: this.modelId,\n textLength: textContent.text.length,\n stopReason: response.stop_reason\n });\n\n return {\n text: textContent.text,\n stopReason: response.stop_reason \|\| 'unknown'\n };\n }\n}\n","// Ollama implementation of InferenceClient interface\n// Uses native Ollama HTTP API (no SDK dependency)\n\nimport type { Logger } from '@semiont/core';\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\ninterface OllamaGenerateResponse {\n response: string;\n done: boolean;\n done_reason?: string;\n}\n\nexport class OllamaInferenceClient implements InferenceClient {\n readonly type = 'ollama' as const;\n readonly modelId: string;\n private baseURL: string;\n private logger?: Logger;\n\n constructor(model: string, baseURL?: string, logger?: Logger) {\n this.baseURL = (baseURL \|\| 'http://localhost:11434').replace(/\\/+$/, '');\n this.modelId = model;\n this.logger = logger;\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.logger?.debug('Generating text with Ollama', {\n model: this.modelId,\n promptLength: prompt.length,\n maxTokens,\n temperature\n });\n\n const url = `${this.baseURL}/api/generate`;\n\n const res = await fetch(url, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.modelId,\n prompt,\n stream: false,\n think: false,\n options: {\n num_predict: maxTokens,\n temperature,\n },\n }),\n });\n\n if (!res.ok) {\n const body = await res.text();\n this.logger?.error('Ollama API error', {\n model: this.modelId,\n status: res.status,\n body,\n });\n throw new Error(`Ollama API error (${res.status}): ${body}`);\n }\n\n const data = await res.json() as OllamaGenerateResponse;\n\n if (!data.response) {\n this.logger?.error('Empty response from Ollama', { model: this.modelId });\n throw new Error('Empty response from Ollama');\n }\n\n const stopReason = mapStopReason(data.done_reason);\n\n this.logger?.info('Text generation completed', {\n model: this.modelId,\n textLength: data.response.length,\n stopReason,\n });\n\n return {\n text: data.response,\n stopReason,\n };\n }\n}\n\nfunction mapStopReason(doneReason: string \| undefined): string {\n switch (doneReason) {\n case 'stop': return 'end_turn';\n case 'length': return 'max_tokens';\n default: return doneReason \|\| 'unknown';\n }\n}\n","// Factory for creating inference client instances based on configuration\n\nimport type { Logger } from '@semiont/core';\nimport { InferenceClient } from './interface.js';\nimport { AnthropicInferenceClient } from './implementations/anthropic.js';\nimport { OllamaInferenceClient } from './implementations/ollama.js';\n\nexport type InferenceClientType = 'anthropic' \| 'ollama';\n\nexport interface InferenceClientConfig {\n type: InferenceClientType;\n apiKey?: string;\n model: string;\n endpoint?: string;\n baseURL?: string;\n}\n\nexport function createInferenceClient(config: InferenceClientConfig, logger?: Logger): InferenceClient {\n switch (config.type) {\n case 'anthropic': {\n if (!config.apiKey \|\| config.apiKey.trim() === '') {\n throw new Error('apiKey is required for Anthropic inference client');\n }\n return new AnthropicInferenceClient(\n config.apiKey,\n config.model,\n config.endpoint \|\| config.baseURL,\n logger\n );\n }\n\n case 'ollama': {\n return new OllamaInferenceClient(\n config.model,\n config.endpoint \|\| config.baseURL,\n logger\n );\n }\n\n default:\n throw new Error(`Unsupported inference client type: ${config.type}`);\n }\n}\n","// Mock implementation of InferenceClient for testing\n\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\nexport class MockInferenceClient implements InferenceClient {\n readonly type = 'mock' as const;\n readonly modelId = 'mock-model' as const;\n private responses: string[] = [];\n private responseIndex: number = 0;\n private stopReasons: string[] = [];\n public calls: Array<{ prompt: string; maxTokens: number; temperature: number }> = [];\n\n constructor(responses: string[] = ['Mock response'], stopReasons?: string[]) {\n this.responses = responses;\n this.stopReasons = stopReasons \|\| responses.map(() => 'end_turn');\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.calls.push({ prompt, maxTokens, temperature });\n\n const text = this.responses[this.responseIndex];\n const stopReason = this.stopReasons[this.responseIndex] \|\| 'end_turn';\n\n if (this.responseIndex < this.responses.length - 1) {\n this.responseIndex++;\n }\n\n return { text, stopReason };\n }\n\n // Test helper methods\n reset(): void {\n this.calls = [];\n this.responseIndex = 0;\n }\n\n setResponses(responses: string[], stopReasons?: string[]): void {\n this.responses = responses;\n this.stopReasons = stopReasons \|\| responses.map(() => 'end_turn');\n this.responseIndex = 0;\n }\n}\n"],"mappings":";AAEA,OAAO,eAAe;AAIf,IAAM,2BAAN,MAA0D;AAAA,EACtD,OAAO;AAAA,EACP;AAAA,EACD;AAAA,EACA;AAAA,EAER,YAAY,QAAgB,OAAe,SAAkB,QAAiB;AAC5E,SAAK,SAAS,IAAI,UAAU;AAAA,MAC1B;AAAA,MACA,SAAS,WAAW;AAAA,IACtB,CAAC;AACD,SAAK,UAAU;AACf,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,QAAQ,MAAM,yCAAyC;AAAA,MAC1D,OAAO,KAAK;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,WAAW,MAAM,KAAK,OAAO,SAAS,OAAO;AAAA,MACjD,OAAO,KAAK;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE,MAAM;AAAA,UACN,SAAS;AAAA,QACX;AAAA,MACF;AAAA,IACF,CAAC;AAED,SAAK,QAAQ,MAAM,+BAA+B;AAAA,MAChD,OAAO,KAAK;AAAA,MACZ,eAAe,SAAS,QAAQ;AAAA,MAChC,YAAY,SAAS;AAAA,IACvB,CAAC;AAED,UAAM,cAAc,SAAS,QAAQ,KAAK,OAAK,EAAE,SAAS,MAAM;AAEhE,QAAI,CAAC,eAAe,YAAY,SAAS,QAAQ;AAC/C,WAAK,QAAQ,MAAM,yCAAyC;AAAA,QAC1D,OAAO,KAAK;AAAA,QACZ,cAAc,SAAS,QAAQ,IAAI,OAAK,EAAE,IAAI;AAAA,MAChD,CAAC;AACD,YAAM,IAAI,MAAM,uCAAuC;AAAA,IACzD;AAEA,SAAK,QAAQ,KAAK,6BAA6B;AAAA,MAC7C,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,KAAK;AAAA,MAC7B,YAAY,SAAS;AAAA,IACvB,CAAC;AAED,WAAO;AAAA,MACL,MAAM,YAAY;AAAA,MAClB,YAAY,SAAS,eAAe;AAAA,IACtC;AAAA,EACF;AACF;;;AC7DO,IAAM,wBAAN,MAAuD;AAAA,EACnD,OAAO;AAAA,EACP;AAAA,EACD;AAAA,EACA;AAAA,EAER,YAAY,OAAe,SAAkB,QAAiB;AAC5D,SAAK,WAAW,WAAW,0BAA0B,QAAQ,QAAQ,EAAE;AACvE,SAAK,UAAU;AACf,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,QAAQ,MAAM,+BAA+B;AAAA,MAChD,OAAO,KAAK;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,MAAM,GAAG,KAAK,OAAO;AAE3B,UAAM,MAAM,MAAM,MAAM,KAAK;AAAA,MAC3B,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK;AAAA,QACZ;AAAA,QACA,QAAQ;AAAA,QACR,OAAO;AAAA,QACP,SAAS;AAAA,UACP,aAAa;AAAA,UACb;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAED,QAAI,CAAC,IAAI,IAAI;AACX,YAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,WAAK,QAAQ,MAAM,oBAAoB;AAAA,QACrC,OAAO,KAAK;AAAA,QACZ,QAAQ,IAAI;AAAA,QACZ;AAAA,MACF,CAAC;AACD,YAAM,IAAI,MAAM,qBAAqB,IAAI,MAAM,MAAM,IAAI,EAAE;AAAA,IAC7D;AAEA,UAAM,OAAO,MAAM,IAAI,KAAK;AAE5B,QAAI,CAAC,KAAK,UAAU;AAClB,WAAK,QAAQ,MAAM,8BAA8B,EAAE,OAAO,KAAK,QAAQ,CAAC;AACxE,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAEA,UAAM,aAAa,cAAc,KAAK,WAAW;AAEjD,SAAK,QAAQ,KAAK,6BAA6B;AAAA,MAC7C,OAAO,KAAK;AAAA,MACZ,YAAY,KAAK,SAAS;AAAA,MAC1B;AAAA,IACF,CAAC;AAED,WAAO;AAAA,MACL,MAAM,KAAK;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,cAAc,YAAwC;AAC7D,UAAQ,YAAY;AAAA,IAClB,KAAK;AAAQ,aAAO;AAAA,IACpB,KAAK;AAAU,aAAO;AAAA,IACtB;AAAS,aAAO,cAAc;AAAA,EAChC;AACF;;;AC3EO,SAAS,sBAAsB,QAA+B,QAAkC;AACrG,UAAQ,OAAO,MAAM;AAAA,IACnB,KAAK,aAAa;AAChB,UAAI,CAAC,OAAO,UAAU,OAAO,OAAO,KAAK,MAAM,IAAI;AACjD,cAAM,IAAI,MAAM,mDAAmD;AAAA,MACrE;AACA,aAAO,IAAI;AAAA,QACT,OAAO;AAAA,QACP,OAAO;AAAA,QACP,OAAO,YAAY,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,IAEA,KAAK,UAAU;AACb,aAAO,IAAI;AAAA,QACT,OAAO;AAAA,QACP,OAAO,YAAY,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,IAEA;AACE,YAAM,IAAI,MAAM,sCAAsC,OAAO,IAAI,EAAE;AAAA,EACvE;AACF;;;ACtCO,IAAM,sBAAN,MAAqD;AAAA,EACjD,OAAO;AAAA,EACP,UAAU;AAAA,EACX,YAAsB,CAAC;AAAA,EACvB,gBAAwB;AAAA,EACxB,cAAwB,CAAC;AAAA,EAC1B,QAA2E,CAAC;AAAA,EAEnF,YAAY,YAAsB,CAAC,eAAe,GAAG,aAAwB;AAC3E,SAAK,YAAY;AACjB,SAAK,cAAc,eAAe,UAAU,IAAI,MAAM,UAAU;AAAA,EAClE;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,MAAM,KAAK,EAAE,QAAQ,WAAW,YAAY,CAAC;AAElD,UAAM,OAAO,KAAK,UAAU,KAAK,aAAa;AAC9C,UAAM,aAAa,KAAK,YAAY,KAAK,aAAa,KAAK;AAE3D,QAAI,KAAK,gBAAgB,KAAK,UAAU,SAAS,GAAG;AAClD,WAAK;AAAA,IACP;AAEA,WAAO,EAAE,MAAM,WAAW;AAAA,EAC5B;AAAA;AAAA,EAGA,QAAc;AACZ,SAAK,QAAQ,CAAC;AACd,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,aAAa,WAAqB,aAA8B;AAC9D,SAAK,YAAY;AACjB,SAAK,cAAc,eAAe,UAAU,IAAI,MAAM,UAAU;AAChE,SAAK,gBAAgB;AAAA,EACvB;AACF;","names":[]}
1	+ {"version":3,"sources":["../src/implementations/anthropic.ts","../src/implementations/ollama.ts","../src/factory.ts","../src/implementations/mock.ts"],"sourcesContent":["// Anthropic Claude implementation of InferenceClient interface\n\nimport Anthropic from '@anthropic-ai/sdk';\nimport type { Logger } from '@semiont/core';\nimport { recordInferenceUsage } from '@semiont/observability';\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\nexport class AnthropicInferenceClient implements InferenceClient {\n readonly type = 'anthropic' as const;\n readonly modelId: string;\n private client: Anthropic;\n private logger?: Logger;\n\n constructor(apiKey: string, model: string, baseURL?: string, logger?: Logger) {\n this.client = new Anthropic({\n apiKey,\n baseURL: baseURL \|\| 'https://api.anthropic.com',\n });\n this.modelId = model;\n this.logger = logger;\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.logger?.debug('Generating text with inference client', {\n model: this.modelId,\n promptLength: prompt.length,\n maxTokens,\n temperature\n });\n\n const start = performance.now();\n let response: Awaited<ReturnType<typeof this.client.messages.create>>;\n try {\n response = await this.client.messages.create({\n model: this.modelId,\n max_tokens: maxTokens,\n temperature,\n messages: [\n {\n role: 'user',\n content: prompt\n }\n ]\n });\n } catch (err) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n });\n throw err;\n }\n\n this.logger?.debug('Inference response received', {\n model: this.modelId,\n contentBlocks: response.content.length,\n stopReason: response.stop_reason\n });\n\n const textContent = response.content.find(c => c.type === 'text');\n\n if (!textContent \|\| textContent.type !== 'text') {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n inputTokens: response.usage?.input_tokens,\n outputTokens: response.usage?.output_tokens,\n });\n this.logger?.error('No text content in inference response', {\n model: this.modelId,\n contentTypes: response.content.map(c => c.type)\n });\n throw new Error('No text content in inference response');\n }\n\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'success',\n inputTokens: response.usage?.input_tokens,\n outputTokens: response.usage?.output_tokens,\n });\n\n this.logger?.info('Text generation completed', {\n model: this.modelId,\n textLength: textContent.text.length,\n stopReason: response.stop_reason\n });\n\n return {\n text: textContent.text,\n stopReason: response.stop_reason \|\| 'unknown'\n };\n }\n}\n","// Ollama implementation of InferenceClient interface\n// Uses native Ollama HTTP API (no SDK dependency)\n\nimport type { Logger } from '@semiont/core';\nimport { recordInferenceUsage } from '@semiont/observability';\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\ninterface OllamaGenerateResponse {\n response: string;\n done: boolean;\n done_reason?: string;\n /** Number of prompt tokens evaluated. Available on most Ollama versions. /\n prompt_eval_count?: number;\n /* Number of tokens generated. */\n eval_count?: number;\n}\n\nexport class OllamaInferenceClient implements InferenceClient {\n readonly type = 'ollama' as const;\n readonly modelId: string;\n private baseURL: string;\n private logger?: Logger;\n\n constructor(model: string, baseURL?: string, logger?: Logger) {\n this.baseURL = (baseURL \|\| 'http://localhost:11434').replace(/\\/+$/, '');\n this.modelId = model;\n this.logger = logger;\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.logger?.debug('Generating text with Ollama', {\n model: this.modelId,\n promptLength: prompt.length,\n maxTokens,\n temperature\n });\n\n const url = `${this.baseURL}/api/generate`;\n const start = performance.now();\n\n let res: Response;\n try {\n res = await fetch(url, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.modelId,\n prompt,\n stream: false,\n think: false,\n options: {\n num_predict: maxTokens,\n temperature,\n },\n }),\n });\n } catch (err) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n });\n throw err;\n }\n\n if (!res.ok) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n });\n const body = await res.text();\n this.logger?.error('Ollama API error', {\n model: this.modelId,\n status: res.status,\n body,\n });\n throw new Error(`Ollama API error (${res.status}): ${body}`);\n }\n\n const data = await res.json() as OllamaGenerateResponse;\n\n if (!data.response) {\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'error',\n inputTokens: data.prompt_eval_count,\n outputTokens: data.eval_count,\n });\n this.logger?.error('Empty response from Ollama', { model: this.modelId });\n throw new Error('Empty response from Ollama');\n }\n\n recordInferenceUsage({\n provider: this.type,\n model: this.modelId,\n durationMs: performance.now() - start,\n outcome: 'success',\n inputTokens: data.prompt_eval_count,\n outputTokens: data.eval_count,\n });\n\n const stopReason = mapStopReason(data.done_reason);\n\n this.logger?.info('Text generation completed', {\n model: this.modelId,\n textLength: data.response.length,\n stopReason,\n });\n\n return {\n text: data.response,\n stopReason,\n };\n }\n}\n\nfunction mapStopReason(doneReason: string \| undefined): string {\n switch (doneReason) {\n case 'stop': return 'end_turn';\n case 'length': return 'max_tokens';\n default: return doneReason \|\| 'unknown';\n }\n}\n","// Factory for creating inference client instances based on configuration\n\nimport type { Logger } from '@semiont/core';\nimport { InferenceClient } from './interface.js';\nimport { AnthropicInferenceClient } from './implementations/anthropic.js';\nimport { OllamaInferenceClient } from './implementations/ollama.js';\n\nexport type InferenceClientType = 'anthropic' \| 'ollama';\n\nexport interface InferenceClientConfig {\n type: InferenceClientType;\n apiKey?: string;\n model: string;\n endpoint?: string;\n baseURL?: string;\n}\n\nexport function createInferenceClient(config: InferenceClientConfig, logger?: Logger): InferenceClient {\n switch (config.type) {\n case 'anthropic': {\n if (!config.apiKey \|\| config.apiKey.trim() === '') {\n throw new Error('apiKey is required for Anthropic inference client');\n }\n return new AnthropicInferenceClient(\n config.apiKey,\n config.model,\n config.endpoint \|\| config.baseURL,\n logger\n );\n }\n\n case 'ollama': {\n return new OllamaInferenceClient(\n config.model,\n config.endpoint \|\| config.baseURL,\n logger\n );\n }\n\n default:\n throw new Error(`Unsupported inference client type: ${config.type}`);\n }\n}\n","// Mock implementation of InferenceClient for testing\n\nimport { InferenceClient, InferenceResponse } from '../interface.js';\n\nexport class MockInferenceClient implements InferenceClient {\n readonly type = 'mock' as const;\n readonly modelId = 'mock-model' as const;\n private responses: string[] = [];\n private responseIndex: number = 0;\n private stopReasons: string[] = [];\n public calls: Array<{ prompt: string; maxTokens: number; temperature: number }> = [];\n\n constructor(responses: string[] = ['Mock response'], stopReasons?: string[]) {\n this.responses = responses;\n this.stopReasons = stopReasons \|\| responses.map(() => 'end_turn');\n }\n\n async generateText(prompt: string, maxTokens: number, temperature: number): Promise<string> {\n const response = await this.generateTextWithMetadata(prompt, maxTokens, temperature);\n return response.text;\n }\n\n async generateTextWithMetadata(prompt: string, maxTokens: number, temperature: number): Promise<InferenceResponse> {\n this.calls.push({ prompt, maxTokens, temperature });\n\n const text = this.responses[this.responseIndex];\n const stopReason = this.stopReasons[this.responseIndex] \|\| 'end_turn';\n\n if (this.responseIndex < this.responses.length - 1) {\n this.responseIndex++;\n }\n\n return { text, stopReason };\n }\n\n // Test helper methods\n reset(): void {\n this.calls = [];\n this.responseIndex = 0;\n }\n\n setResponses(responses: string[], stopReasons?: string[]): void {\n this.responses = responses;\n this.stopReasons = stopReasons \|\| responses.map(() => 'end_turn');\n this.responseIndex = 0;\n }\n}\n"],"mappings":";AAEA,OAAO,eAAe;AAEtB,SAAS,4BAA4B;AAG9B,IAAM,2BAAN,MAA0D;AAAA,EACtD,OAAO;AAAA,EACP;AAAA,EACD;AAAA,EACA;AAAA,EAER,YAAY,QAAgB,OAAe,SAAkB,QAAiB;AAC5E,SAAK,SAAS,IAAI,UAAU;AAAA,MAC1B;AAAA,MACA,SAAS,WAAW;AAAA,IACtB,CAAC;AACD,SAAK,UAAU;AACf,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,QAAQ,MAAM,yCAAyC;AAAA,MAC1D,OAAO,KAAK;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,QAAQ,YAAY,IAAI;AAC9B,QAAI;AACJ,QAAI;AACF,iBAAW,MAAM,KAAK,OAAO,SAAS,OAAO;AAAA,QAC3C,OAAO,KAAK;AAAA,QACZ,YAAY;AAAA,QACZ;AAAA,QACA,UAAU;AAAA,UACR;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,UACX;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,2BAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAEA,SAAK,QAAQ,MAAM,+BAA+B;AAAA,MAChD,OAAO,KAAK;AAAA,MACZ,eAAe,SAAS,QAAQ;AAAA,MAChC,YAAY,SAAS;AAAA,IACvB,CAAC;AAED,UAAM,cAAc,SAAS,QAAQ,KAAK,OAAK,EAAE,SAAS,MAAM;AAEhE,QAAI,CAAC,eAAe,YAAY,SAAS,QAAQ;AAC/C,2BAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,QACT,aAAa,SAAS,OAAO;AAAA,QAC7B,cAAc,SAAS,OAAO;AAAA,MAChC,CAAC;AACD,WAAK,QAAQ,MAAM,yCAAyC;AAAA,QAC1D,OAAO,KAAK;AAAA,QACZ,cAAc,SAAS,QAAQ,IAAI,OAAK,EAAE,IAAI;AAAA,MAChD,CAAC;AACD,YAAM,IAAI,MAAM,uCAAuC;AAAA,IACzD;AAEA,yBAAqB;AAAA,MACnB,UAAU,KAAK;AAAA,MACf,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,IAAI,IAAI;AAAA,MAChC,SAAS;AAAA,MACT,aAAa,SAAS,OAAO;AAAA,MAC7B,cAAc,SAAS,OAAO;AAAA,IAChC,CAAC;AAED,SAAK,QAAQ,KAAK,6BAA6B;AAAA,MAC7C,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,KAAK;AAAA,MAC7B,YAAY,SAAS;AAAA,IACvB,CAAC;AAED,WAAO;AAAA,MACL,MAAM,YAAY;AAAA,MAClB,YAAY,SAAS,eAAe;AAAA,IACtC;AAAA,EACF;AACF;;;ACnGA,SAAS,wBAAAA,6BAA4B;AAa9B,IAAM,wBAAN,MAAuD;AAAA,EACnD,OAAO;AAAA,EACP;AAAA,EACD;AAAA,EACA;AAAA,EAER,YAAY,OAAe,SAAkB,QAAiB;AAC5D,SAAK,WAAW,WAAW,0BAA0B,QAAQ,QAAQ,EAAE;AACvE,SAAK,UAAU;AACf,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,QAAQ,MAAM,+BAA+B;AAAA,MAChD,OAAO,KAAK;AAAA,MACZ,cAAc,OAAO;AAAA,MACrB;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,MAAM,GAAG,KAAK,OAAO;AAC3B,UAAM,QAAQ,YAAY,IAAI;AAE9B,QAAI;AACJ,QAAI;AACF,YAAM,MAAM,MAAM,KAAK;AAAA,QACrB,QAAQ;AAAA,QACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,QAC9C,MAAM,KAAK,UAAU;AAAA,UACnB,OAAO,KAAK;AAAA,UACZ;AAAA,UACA,QAAQ;AAAA,UACR,OAAO;AAAA,UACP,SAAS;AAAA,YACP,aAAa;AAAA,YACb;AAAA,UACF;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,MAAAA,sBAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,MACX,CAAC;AACD,YAAM;AAAA,IACR;AAEA,QAAI,CAAC,IAAI,IAAI;AACX,MAAAA,sBAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,MACX,CAAC;AACD,YAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,WAAK,QAAQ,MAAM,oBAAoB;AAAA,QACrC,OAAO,KAAK;AAAA,QACZ,QAAQ,IAAI;AAAA,QACZ;AAAA,MACF,CAAC;AACD,YAAM,IAAI,MAAM,qBAAqB,IAAI,MAAM,MAAM,IAAI,EAAE;AAAA,IAC7D;AAEA,UAAM,OAAO,MAAM,IAAI,KAAK;AAE5B,QAAI,CAAC,KAAK,UAAU;AAClB,MAAAA,sBAAqB;AAAA,QACnB,UAAU,KAAK;AAAA,QACf,OAAO,KAAK;AAAA,QACZ,YAAY,YAAY,IAAI,IAAI;AAAA,QAChC,SAAS;AAAA,QACT,aAAa,KAAK;AAAA,QAClB,cAAc,KAAK;AAAA,MACrB,CAAC;AACD,WAAK,QAAQ,MAAM,8BAA8B,EAAE,OAAO,KAAK,QAAQ,CAAC;AACxE,YAAM,IAAI,MAAM,4BAA4B;AAAA,IAC9C;AAEA,IAAAA,sBAAqB;AAAA,MACnB,UAAU,KAAK;AAAA,MACf,OAAO,KAAK;AAAA,MACZ,YAAY,YAAY,IAAI,IAAI;AAAA,MAChC,SAAS;AAAA,MACT,aAAa,KAAK;AAAA,MAClB,cAAc,KAAK;AAAA,IACrB,CAAC;AAED,UAAM,aAAa,cAAc,KAAK,WAAW;AAEjD,SAAK,QAAQ,KAAK,6BAA6B;AAAA,MAC7C,OAAO,KAAK;AAAA,MACZ,YAAY,KAAK,SAAS;AAAA,MAC1B;AAAA,IACF,CAAC;AAED,WAAO;AAAA,MACL,MAAM,KAAK;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,cAAc,YAAwC;AAC7D,UAAQ,YAAY;AAAA,IAClB,KAAK;AAAQ,aAAO;AAAA,IACpB,KAAK;AAAU,aAAO;AAAA,IACtB;AAAS,aAAO,cAAc;AAAA,EAChC;AACF;;;ACnHO,SAAS,sBAAsB,QAA+B,QAAkC;AACrG,UAAQ,OAAO,MAAM;AAAA,IACnB,KAAK,aAAa;AAChB,UAAI,CAAC,OAAO,UAAU,OAAO,OAAO,KAAK,MAAM,IAAI;AACjD,cAAM,IAAI,MAAM,mDAAmD;AAAA,MACrE;AACA,aAAO,IAAI;AAAA,QACT,OAAO;AAAA,QACP,OAAO;AAAA,QACP,OAAO,YAAY,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,IAEA,KAAK,UAAU;AACb,aAAO,IAAI;AAAA,QACT,OAAO;AAAA,QACP,OAAO,YAAY,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,IAEA;AACE,YAAM,IAAI,MAAM,sCAAsC,OAAO,IAAI,EAAE;AAAA,EACvE;AACF;;;ACtCO,IAAM,sBAAN,MAAqD;AAAA,EACjD,OAAO;AAAA,EACP,UAAU;AAAA,EACX,YAAsB,CAAC;AAAA,EACvB,gBAAwB;AAAA,EACxB,cAAwB,CAAC;AAAA,EAC1B,QAA2E,CAAC;AAAA,EAEnF,YAAY,YAAsB,CAAC,eAAe,GAAG,aAAwB;AAC3E,SAAK,YAAY;AACjB,SAAK,cAAc,eAAe,UAAU,IAAI,MAAM,UAAU;AAAA,EAClE;AAAA,EAEA,MAAM,aAAa,QAAgB,WAAmB,aAAsC;AAC1F,UAAM,WAAW,MAAM,KAAK,yBAAyB,QAAQ,WAAW,WAAW;AACnF,WAAO,SAAS;AAAA,EAClB;AAAA,EAEA,MAAM,yBAAyB,QAAgB,WAAmB,aAAiD;AACjH,SAAK,MAAM,KAAK,EAAE,QAAQ,WAAW,YAAY,CAAC;AAElD,UAAM,OAAO,KAAK,UAAU,KAAK,aAAa;AAC9C,UAAM,aAAa,KAAK,YAAY,KAAK,aAAa,KAAK;AAE3D,QAAI,KAAK,gBAAgB,KAAK,UAAU,SAAS,GAAG;AAClD,WAAK;AAAA,IACP;AAEA,WAAO,EAAE,MAAM,WAAW;AAAA,EAC5B;AAAA;AAAA,EAGA,QAAc;AACZ,SAAK,QAAQ,CAAC;AACd,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,aAAa,WAAqB,aAA8B;AAC9D,SAAK,YAAY;AACjB,SAAK,cAAc,eAAe,UAAU,IAAI,MAAM,UAAU;AAChE,SAAK,gBAAgB;AAAA,EACvB;AACF;","names":["recordInferenceUsage"]}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@semiont/inference",
-  "version": "0.4.21",
+  "version": "0.4.22",
   "type": "module",
   "description": "AI inference capabilities for entity extraction, text generation, and resource creation",
   "main": "./dist/index.js",
@@ -26,7 +26,8 @@
   "dependencies": {
     "@anthropic-ai/sdk": "^0.84.0",
     "@semiont/api-client": "*",
-    "@semiont/core": "*"
+    "@semiont/core": "*",
+    "@semiont/observability": "*"
   },
   "devDependencies": {
     "@vitest/coverage-v8": "^4.1.0",