agentic-flow 2.0.7 → 2.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.json +133 -9
- package/.claude/skills/agentic-flow-quickstart/skill.md +69 -0
- package/.claude/skills/hooks-automation/skill.md +155 -0
- package/.claude/skills/memory-patterns/skill.md +110 -0
- package/.claude/skills/sparc-methodology/skill.md +137 -0
- package/.claude/skills/swarm-coordination/skill.md +94 -0
- package/.claude/skills/worker-benchmarks/skill.md +135 -0
- package/.claude/skills/worker-integration/skill.md +154 -0
- package/.claude/statusline.mjs +109 -0
- package/.claude/statusline.sh +71 -0
- package/README.md +1797 -624
- package/dist/.tsbuildinfo +1 -1
- package/dist/agentdb/index.d.ts +2 -0
- package/dist/agentdb/index.d.ts.map +1 -1
- package/dist/agentdb/index.js +5 -0
- package/dist/agentdb/index.js.map +1 -1
- package/dist/agentdb/prerequisites.d.ts +25 -0
- package/dist/agentdb/prerequisites.d.ts.map +1 -0
- package/dist/agentdb/prerequisites.js +180 -0
- package/dist/agentdb/prerequisites.js.map +1 -0
- package/dist/benchmarks/embeddings-benchmark.d.ts +38 -0
- package/dist/benchmarks/embeddings-benchmark.d.ts.map +1 -0
- package/dist/benchmarks/embeddings-benchmark.js +282 -0
- package/dist/benchmarks/embeddings-benchmark.js.map +1 -0
- package/dist/billing/mcp/tools.js +1 -0
- package/dist/billing/mcp/tools.js.map +1 -1
- package/dist/cli/commands/embeddings.d.ts +12 -0
- package/dist/cli/commands/embeddings.d.ts.map +1 -0
- package/dist/cli/commands/embeddings.js +386 -0
- package/dist/cli/commands/embeddings.js.map +1 -0
- package/dist/cli/commands/hooks.d.ts.map +1 -1
- package/dist/cli/commands/hooks.js +79 -4
- package/dist/cli/commands/hooks.js.map +1 -1
- package/dist/cli/commands/init.d.ts +8 -0
- package/dist/cli/commands/init.d.ts.map +1 -0
- package/dist/cli/commands/init.js +514 -0
- package/dist/cli/commands/init.js.map +1 -0
- package/dist/cli/commands/workers.d.ts +9 -0
- package/dist/cli/commands/workers.d.ts.map +1 -0
- package/dist/cli/commands/workers.js +991 -0
- package/dist/cli/commands/workers.js.map +1 -0
- package/dist/cli/config-wizard.d.ts.map +1 -1
- package/dist/cli/config-wizard.js +47 -25
- package/dist/cli/config-wizard.js.map +1 -1
- package/dist/cli-proxy.js +89 -1
- package/dist/cli-proxy.js.map +1 -1
- package/dist/core/agentdb-fast.js +3 -3
- package/dist/core/agentdb-fast.js.map +1 -1
- package/dist/core/agentdb-wrapper-enhanced.d.ts.map +1 -1
- package/dist/core/agentdb-wrapper-enhanced.js +37 -11
- package/dist/core/agentdb-wrapper-enhanced.js.map +1 -1
- package/dist/core/agentdb-wrapper.d.ts +3 -2
- package/dist/core/agentdb-wrapper.d.ts.map +1 -1
- package/dist/core/agentdb-wrapper.js +15 -1
- package/dist/core/agentdb-wrapper.js.map +1 -1
- package/dist/core/attention-native.d.ts +4 -0
- package/dist/core/attention-native.d.ts.map +1 -1
- package/dist/core/attention-native.js +14 -2
- package/dist/core/attention-native.js.map +1 -1
- package/dist/core/gnn-wrapper.d.ts.map +1 -1
- package/dist/core/gnn-wrapper.js +14 -0
- package/dist/core/gnn-wrapper.js.map +1 -1
- package/dist/embeddings/index.d.ts +17 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +17 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/neural-substrate.d.ts +206 -0
- package/dist/embeddings/neural-substrate.d.ts.map +1 -0
- package/dist/embeddings/neural-substrate.js +629 -0
- package/dist/embeddings/neural-substrate.js.map +1 -0
- package/dist/embeddings/optimized-embedder.d.ts +103 -0
- package/dist/embeddings/optimized-embedder.d.ts.map +1 -0
- package/dist/embeddings/optimized-embedder.js +730 -0
- package/dist/embeddings/optimized-embedder.js.map +1 -0
- package/dist/examples/embedding-geometry.d.ts +105 -0
- package/dist/examples/embedding-geometry.d.ts.map +1 -0
- package/dist/examples/embedding-geometry.js +528 -0
- package/dist/examples/embedding-geometry.js.map +1 -0
- package/dist/federation/SecurityManager.d.ts +11 -2
- package/dist/federation/SecurityManager.d.ts.map +1 -1
- package/dist/federation/SecurityManager.js +50 -17
- package/dist/federation/SecurityManager.js.map +1 -1
- package/dist/federation/integrations/supabase-adapter-debug.js +3 -3
- package/dist/federation/integrations/supabase-adapter-debug.js.map +1 -1
- package/dist/hooks/swarm-learning-optimizer.js +5 -5
- package/dist/hooks/swarm-learning-optimizer.js.map +1 -1
- package/dist/index.d.ts +5 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +89 -51
- package/dist/index.js.map +1 -1
- package/dist/intelligence/IntelligenceStore.d.ts +35 -26
- package/dist/intelligence/IntelligenceStore.d.ts.map +1 -1
- package/dist/intelligence/IntelligenceStore.js +308 -123
- package/dist/intelligence/IntelligenceStore.js.map +1 -1
- package/dist/intelligence/RuVectorIntelligence.d.ts +26 -1
- package/dist/intelligence/RuVectorIntelligence.d.ts.map +1 -1
- package/dist/intelligence/RuVectorIntelligence.js +49 -10
- package/dist/intelligence/RuVectorIntelligence.js.map +1 -1
- package/dist/intelligence/agent-booster-enhanced.d.ts +1 -0
- package/dist/intelligence/agent-booster-enhanced.d.ts.map +1 -1
- package/dist/intelligence/agent-booster-enhanced.js +24 -3
- package/dist/intelligence/agent-booster-enhanced.js.map +1 -1
- package/dist/intelligence/index.d.ts +29 -3
- package/dist/intelligence/index.d.ts.map +1 -1
- package/dist/intelligence/index.js +13 -3
- package/dist/intelligence/index.js.map +1 -1
- package/dist/mcp/claudeFlowSdkServer.d.ts.map +1 -1
- package/dist/mcp/claudeFlowSdkServer.js +162 -115
- package/dist/mcp/claudeFlowSdkServer.js.map +1 -1
- package/dist/mcp/fastmcp/tools/hooks/intelligence-bridge.js +5 -5
- package/dist/mcp/fastmcp/tools/hooks/intelligence-bridge.js.map +1 -1
- package/dist/mcp/fastmcp/tools/swarm/init.d.ts.map +1 -1
- package/dist/mcp/fastmcp/tools/swarm/init.js +36 -7
- package/dist/mcp/fastmcp/tools/swarm/init.js.map +1 -1
- package/dist/mcp/fastmcp/tools/swarm/spawn.d.ts.map +1 -1
- package/dist/mcp/fastmcp/tools/swarm/spawn.js +47 -8
- package/dist/mcp/fastmcp/tools/swarm/spawn.js.map +1 -1
- package/dist/mcp/tools/agent-booster-tools.d.ts +15 -1
- package/dist/mcp/tools/agent-booster-tools.d.ts.map +1 -1
- package/dist/mcp/tools/agent-booster-tools.js +79 -63
- package/dist/mcp/tools/agent-booster-tools.js.map +1 -1
- package/dist/mcp/tools/sona-tools.d.ts.map +1 -1
- package/dist/mcp/tools/sona-tools.js +6 -0
- package/dist/mcp/tools/sona-tools.js.map +1 -1
- package/dist/memory/SharedMemoryPool.d.ts +103 -0
- package/dist/memory/SharedMemoryPool.d.ts.map +1 -0
- package/dist/memory/SharedMemoryPool.js +263 -0
- package/dist/memory/SharedMemoryPool.js.map +1 -0
- package/dist/optimizations/agent-booster-migration.d.ts +2 -1
- package/dist/optimizations/agent-booster-migration.d.ts.map +1 -1
- package/dist/optimizations/agent-booster-migration.js +69 -45
- package/dist/optimizations/agent-booster-migration.js.map +1 -1
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.d.ts.map +1 -0
- package/dist/orchestration/index.js +10 -0
- package/dist/orchestration/index.js.map +1 -0
- package/dist/orchestration/memory-plane-types.d.ts +23 -0
- package/dist/orchestration/memory-plane-types.d.ts.map +1 -0
- package/dist/orchestration/memory-plane-types.js +5 -0
- package/dist/orchestration/memory-plane-types.js.map +1 -0
- package/dist/orchestration/memory-plane.d.ts +41 -0
- package/dist/orchestration/memory-plane.d.ts.map +1 -0
- package/dist/orchestration/memory-plane.js +84 -0
- package/dist/orchestration/memory-plane.js.map +1 -0
- package/dist/orchestration/orchestration-client.d.ts +104 -0
- package/dist/orchestration/orchestration-client.d.ts.map +1 -0
- package/dist/orchestration/orchestration-client.js +94 -0
- package/dist/orchestration/orchestration-client.js.map +1 -0
- package/dist/orchestration/orchestration-runtime.d.ts +26 -0
- package/dist/orchestration/orchestration-runtime.d.ts.map +1 -0
- package/dist/orchestration/orchestration-runtime.js +78 -0
- package/dist/orchestration/orchestration-runtime.js.map +1 -0
- package/dist/orchestration/orchestration-types.d.ts +124 -0
- package/dist/orchestration/orchestration-types.d.ts.map +1 -0
- package/dist/orchestration/orchestration-types.js +7 -0
- package/dist/orchestration/orchestration-types.js.map +1 -0
- package/dist/proxy/anthropic-to-openrouter.js.map +1 -1
- package/dist/proxy/anthropic-to-requesty.js.map +1 -1
- package/dist/proxy/quic-proxy.d.ts +0 -1
- package/dist/proxy/quic-proxy.d.ts.map +1 -1
- package/dist/proxy/quic-proxy.js +2 -2
- package/dist/proxy/quic-proxy.js.map +1 -1
- package/dist/reasoningbank/AdvancedMemory.js +1 -1
- package/dist/reasoningbank/AdvancedMemory.js.map +1 -1
- package/dist/reasoningbank/HybridBackend.d.ts.map +1 -1
- package/dist/reasoningbank/HybridBackend.js +2 -5
- package/dist/reasoningbank/HybridBackend.js.map +1 -1
- package/dist/reasoningbank/backend-selector.d.ts +10 -0
- package/dist/reasoningbank/backend-selector.d.ts.map +1 -1
- package/dist/reasoningbank/backend-selector.js +45 -5
- package/dist/reasoningbank/backend-selector.js.map +1 -1
- package/dist/reasoningbank/core/consolidate.d.ts.map +1 -1
- package/dist/reasoningbank/core/consolidate.js +113 -45
- package/dist/reasoningbank/core/consolidate.js.map +1 -1
- package/dist/reasoningbank/index-new.d.ts +1 -6
- package/dist/reasoningbank/index-new.d.ts.map +1 -1
- package/dist/reasoningbank/index-new.js +1 -6
- package/dist/reasoningbank/index-new.js.map +1 -1
- package/dist/reasoningbank/index.d.ts +6 -0
- package/dist/reasoningbank/index.d.ts.map +1 -1
- package/dist/reasoningbank/index.js +6 -13
- package/dist/reasoningbank/index.js.map +1 -1
- package/dist/reasoningbank/utils/embeddings.d.ts +1 -0
- package/dist/reasoningbank/utils/embeddings.d.ts.map +1 -1
- package/dist/reasoningbank/utils/embeddings.js +53 -26
- package/dist/reasoningbank/utils/embeddings.js.map +1 -1
- package/dist/router/index.d.ts +2 -1
- package/dist/router/index.d.ts.map +1 -1
- package/dist/router/index.js +1 -0
- package/dist/router/index.js.map +1 -1
- package/dist/router/providers/ollama.d.ts +20 -0
- package/dist/router/providers/ollama.d.ts.map +1 -0
- package/dist/router/providers/ollama.js +242 -0
- package/dist/router/providers/ollama.js.map +1 -0
- package/dist/router/providers/onnx-local-optimized.d.ts +2 -0
- package/dist/router/providers/onnx-local-optimized.d.ts.map +1 -1
- package/dist/router/providers/onnx-local-optimized.js +10 -0
- package/dist/router/providers/onnx-local-optimized.js.map +1 -1
- package/dist/router/providers/onnx-local.d.ts +1 -0
- package/dist/router/providers/onnx-local.d.ts.map +1 -1
- package/dist/router/providers/onnx-local.js +22 -5
- package/dist/router/providers/onnx-local.js.map +1 -1
- package/dist/router/router.d.ts.map +1 -1
- package/dist/router/router.js +39 -23
- package/dist/router/router.js.map +1 -1
- package/dist/sdk/index.d.ts +2 -1
- package/dist/sdk/index.d.ts.map +1 -1
- package/dist/sdk/index.js +3 -1
- package/dist/sdk/index.js.map +1 -1
- package/dist/services/embedding-service.js.map +1 -1
- package/dist/services/sona-agent-training.d.ts +1 -0
- package/dist/services/sona-agent-training.d.ts.map +1 -1
- package/dist/services/sona-agent-training.js.map +1 -1
- package/dist/services/sona-agentdb-integration.d.ts.map +1 -1
- package/dist/services/sona-agentdb-integration.js +9 -6
- package/dist/services/sona-agentdb-integration.js.map +1 -1
- package/dist/services/sona-service.d.ts.map +1 -1
- package/dist/services/sona-service.js +6 -5
- package/dist/services/sona-service.js.map +1 -1
- package/dist/utils/cli.d.ts +1 -1
- package/dist/utils/cli.d.ts.map +1 -1
- package/dist/utils/cli.js +21 -0
- package/dist/utils/cli.js.map +1 -1
- package/dist/utils/index.d.ts +6 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +6 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/model-cache.d.ts +61 -0
- package/dist/utils/model-cache.d.ts.map +1 -0
- package/dist/utils/model-cache.js +176 -0
- package/dist/utils/model-cache.js.map +1 -0
- package/dist/utils/suppress-warnings.d.ts +19 -0
- package/dist/utils/suppress-warnings.d.ts.map +1 -0
- package/dist/utils/suppress-warnings.js +59 -0
- package/dist/utils/suppress-warnings.js.map +1 -0
- package/dist/workers/consolidated-phases.d.ts +40 -0
- package/dist/workers/consolidated-phases.d.ts.map +1 -0
- package/dist/workers/consolidated-phases.js +497 -0
- package/dist/workers/consolidated-phases.js.map +1 -0
- package/dist/workers/custom-worker-config.d.ts +133 -0
- package/dist/workers/custom-worker-config.d.ts.map +1 -0
- package/dist/workers/custom-worker-config.js +215 -0
- package/dist/workers/custom-worker-config.js.map +1 -0
- package/dist/workers/custom-worker-factory.d.ts +89 -0
- package/dist/workers/custom-worker-factory.d.ts.map +1 -0
- package/dist/workers/custom-worker-factory.js +404 -0
- package/dist/workers/custom-worker-factory.js.map +1 -0
- package/dist/workers/dispatch-service.d.ts +123 -0
- package/dist/workers/dispatch-service.d.ts.map +1 -0
- package/dist/workers/dispatch-service.js +1024 -0
- package/dist/workers/dispatch-service.js.map +1 -0
- package/dist/workers/hooks-integration.d.ts +79 -0
- package/dist/workers/hooks-integration.d.ts.map +1 -0
- package/dist/workers/hooks-integration.js +286 -0
- package/dist/workers/hooks-integration.js.map +1 -0
- package/dist/workers/index.d.ts +42 -0
- package/dist/workers/index.d.ts.map +1 -0
- package/dist/workers/index.js +52 -0
- package/dist/workers/index.js.map +1 -0
- package/dist/workers/mcp-tools.d.ts +56 -0
- package/dist/workers/mcp-tools.d.ts.map +1 -0
- package/dist/workers/mcp-tools.js +359 -0
- package/dist/workers/mcp-tools.js.map +1 -0
- package/dist/workers/phase-executors.d.ts +22 -0
- package/dist/workers/phase-executors.d.ts.map +1 -0
- package/dist/workers/phase-executors.js +445 -0
- package/dist/workers/phase-executors.js.map +1 -0
- package/dist/workers/resource-governor.d.ts +75 -0
- package/dist/workers/resource-governor.d.ts.map +1 -0
- package/dist/workers/resource-governor.js +187 -0
- package/dist/workers/resource-governor.js.map +1 -0
- package/dist/workers/ruvector-integration.d.ts +163 -0
- package/dist/workers/ruvector-integration.d.ts.map +1 -0
- package/dist/workers/ruvector-integration.js +543 -0
- package/dist/workers/ruvector-integration.js.map +1 -0
- package/dist/workers/ruvector-native-integration.d.ts +91 -0
- package/dist/workers/ruvector-native-integration.d.ts.map +1 -0
- package/dist/workers/ruvector-native-integration.js +254 -0
- package/dist/workers/ruvector-native-integration.js.map +1 -0
- package/dist/workers/trigger-detector.d.ts +68 -0
- package/dist/workers/trigger-detector.d.ts.map +1 -0
- package/dist/workers/trigger-detector.js +281 -0
- package/dist/workers/trigger-detector.js.map +1 -0
- package/dist/workers/types.d.ts +145 -0
- package/dist/workers/types.d.ts.map +1 -0
- package/dist/workers/types.js +6 -0
- package/dist/workers/types.js.map +1 -0
- package/dist/workers/worker-agent-integration.d.ts +140 -0
- package/dist/workers/worker-agent-integration.d.ts.map +1 -0
- package/dist/workers/worker-agent-integration.js +471 -0
- package/dist/workers/worker-agent-integration.js.map +1 -0
- package/dist/workers/worker-benchmarks.d.ts +88 -0
- package/dist/workers/worker-benchmarks.d.ts.map +1 -0
- package/dist/workers/worker-benchmarks.js +452 -0
- package/dist/workers/worker-benchmarks.js.map +1 -0
- package/dist/workers/worker-registry.d.ts +85 -0
- package/dist/workers/worker-registry.d.ts.map +1 -0
- package/dist/workers/worker-registry.js +547 -0
- package/dist/workers/worker-registry.js.map +1 -0
- package/docs/embeddings/EMBEDDING_GEOMETRY.md +935 -0
- package/package.json +27 -9
- package/scripts/postinstall.js +45 -4
- package/wasm/reasoningbank/reasoningbank_wasm.js +1 -1
- package/wasm/reasoningbank/reasoningbank_wasm_bg.js +54 -54
- package/wasm/reasoningbank/reasoningbank_wasm_bg.wasm +0 -0
- package/wasm/reasoningbank/reasoningbank_wasm_bg.wasm.d.ts +4 -3
- package/.claude/agents/test-neural.md +0 -14
- /package/.claude/agents/analysis/{code-review/analyze-code-quality.md → analyze-code-quality.md} +0 -0
- /package/.claude/agents/architecture/{system-design/arch-system-design.md → arch-system-design.md} +0 -0
- /package/.claude/agents/data/{ml/data-ml-model.md → data-ml-model.md} +0 -0
- /package/.claude/agents/development/{backend/dev-backend-api.md → dev-backend-api.md} +0 -0
- /package/.claude/agents/devops/{ci-cd/ops-cicd-github.md → ops-cicd-github.md} +0 -0
- /package/.claude/agents/documentation/{api-docs/docs-api-openapi.md → docs-api-openapi.md} +0 -0
- /package/.claude/agents/specialized/{mobile/spec-mobile-react-native.md → spec-mobile-react-native.md} +0 -0
- /package/.claude/agents/testing/{validation/production-validator.md → production-validator.md} +0 -0
- /package/.claude/agents/testing/{unit/tdd-london-swarm.md → tdd-london-swarm.md} +0 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
// Ollama provider - OpenAI-compatible chat completions
|
|
2
|
+
// Works with both ollama.com Cloud (requires OLLAMA_API_KEY) and self-hosted
|
|
3
|
+
// (typically http://localhost:11434, no API key required).
|
|
4
|
+
import axios from 'axios';
|
|
5
|
+
const DEFAULT_BASE_URL = 'http://localhost:11434';
|
|
6
|
+
export class OllamaProvider {
|
|
7
|
+
name = 'ollama';
|
|
8
|
+
type = 'ollama';
|
|
9
|
+
supportsStreaming = true;
|
|
10
|
+
supportsTools = true;
|
|
11
|
+
supportsMCP = false; // Requires translation
|
|
12
|
+
client;
|
|
13
|
+
config;
|
|
14
|
+
constructor(config) {
|
|
15
|
+
this.config = config;
|
|
16
|
+
const baseUrl = (config.baseUrl || DEFAULT_BASE_URL).replace(/\/+$/, '');
|
|
17
|
+
const headers = {
|
|
18
|
+
'Content-Type': 'application/json',
|
|
19
|
+
};
|
|
20
|
+
// API key is optional for self-hosted Ollama; required for ollama.com Cloud
|
|
21
|
+
if (config.apiKey) {
|
|
22
|
+
headers['Authorization'] = `Bearer ${config.apiKey}`;
|
|
23
|
+
}
|
|
24
|
+
this.client = axios.create({
|
|
25
|
+
baseURL: baseUrl,
|
|
26
|
+
headers,
|
|
27
|
+
timeout: config.timeout || 180000,
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
validateCapabilities(features) {
|
|
31
|
+
const supported = ['chat', 'streaming', 'tools'];
|
|
32
|
+
return features.every((f) => supported.includes(f));
|
|
33
|
+
}
|
|
34
|
+
async chat(params) {
|
|
35
|
+
try {
|
|
36
|
+
const body = this.formatRequest(params, false);
|
|
37
|
+
const response = await this.client.post('/v1/chat/completions', body);
|
|
38
|
+
return this.formatResponse(response.data, params.model);
|
|
39
|
+
}
|
|
40
|
+
catch (error) {
|
|
41
|
+
throw this.handleError(error);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
async *stream(params) {
|
|
45
|
+
try {
|
|
46
|
+
const body = this.formatRequest(params, true);
|
|
47
|
+
const response = await this.client.post('/v1/chat/completions', body, {
|
|
48
|
+
responseType: 'stream',
|
|
49
|
+
});
|
|
50
|
+
let buffer = '';
|
|
51
|
+
for await (const chunk of response.data) {
|
|
52
|
+
buffer += chunk.toString();
|
|
53
|
+
const lines = buffer.split('\n');
|
|
54
|
+
buffer = lines.pop() ?? '';
|
|
55
|
+
for (const line of lines) {
|
|
56
|
+
const trimmed = line.trim();
|
|
57
|
+
if (!trimmed.startsWith('data: '))
|
|
58
|
+
continue;
|
|
59
|
+
const data = trimmed.slice(6);
|
|
60
|
+
if (data === '[DONE]') {
|
|
61
|
+
yield { type: 'message_stop' };
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
try {
|
|
65
|
+
const parsed = JSON.parse(data);
|
|
66
|
+
const out = this.formatStreamChunk(parsed);
|
|
67
|
+
if (out)
|
|
68
|
+
yield out;
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
// skip malformed chunks
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
catch (error) {
|
|
77
|
+
throw this.handleError(error);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
formatRequest(params, stream) {
|
|
81
|
+
const messages = params.messages.map((msg) => ({
|
|
82
|
+
role: msg.role,
|
|
83
|
+
content: typeof msg.content === 'string'
|
|
84
|
+
? msg.content
|
|
85
|
+
: msg.content
|
|
86
|
+
.map((block) => {
|
|
87
|
+
if (block.type === 'text')
|
|
88
|
+
return block.text || '';
|
|
89
|
+
if (block.type === 'tool_use')
|
|
90
|
+
return JSON.stringify({
|
|
91
|
+
tool: block.name,
|
|
92
|
+
input: block.input,
|
|
93
|
+
});
|
|
94
|
+
if (block.type === 'tool_result')
|
|
95
|
+
return typeof block.content === 'string'
|
|
96
|
+
? block.content
|
|
97
|
+
: JSON.stringify(block.content);
|
|
98
|
+
return '';
|
|
99
|
+
})
|
|
100
|
+
.join('\n'),
|
|
101
|
+
}));
|
|
102
|
+
const body = {
|
|
103
|
+
model: params.model,
|
|
104
|
+
messages,
|
|
105
|
+
temperature: params.temperature ?? 0.7,
|
|
106
|
+
stream,
|
|
107
|
+
};
|
|
108
|
+
if (params.maxTokens) {
|
|
109
|
+
body.max_tokens = params.maxTokens;
|
|
110
|
+
}
|
|
111
|
+
if (params.tools && params.tools.length > 0) {
|
|
112
|
+
body.tools = params.tools.map((tool) => ({
|
|
113
|
+
type: 'function',
|
|
114
|
+
function: {
|
|
115
|
+
name: tool.name,
|
|
116
|
+
description: tool.description,
|
|
117
|
+
parameters: tool.input_schema,
|
|
118
|
+
},
|
|
119
|
+
}));
|
|
120
|
+
if (params.toolChoice) {
|
|
121
|
+
if (params.toolChoice === 'auto' || params.toolChoice === 'none') {
|
|
122
|
+
body.tool_choice = params.toolChoice;
|
|
123
|
+
}
|
|
124
|
+
else if (typeof params.toolChoice === 'object') {
|
|
125
|
+
body.tool_choice = {
|
|
126
|
+
type: 'function',
|
|
127
|
+
function: { name: params.toolChoice.name },
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return body;
|
|
133
|
+
}
|
|
134
|
+
formatResponse(data, model) {
|
|
135
|
+
const choice = data.choices?.[0];
|
|
136
|
+
if (!choice) {
|
|
137
|
+
throw new Error(`Ollama returned no choices: ${JSON.stringify(data).slice(0, 200)}`);
|
|
138
|
+
}
|
|
139
|
+
const message = choice.message ?? {};
|
|
140
|
+
const content = [];
|
|
141
|
+
if (message.content) {
|
|
142
|
+
content.push({ type: 'text', text: String(message.content) });
|
|
143
|
+
}
|
|
144
|
+
if (Array.isArray(message.tool_calls)) {
|
|
145
|
+
for (const tc of message.tool_calls) {
|
|
146
|
+
const fnName = tc.function?.name;
|
|
147
|
+
const fnArgs = tc.function?.arguments;
|
|
148
|
+
if (!fnName)
|
|
149
|
+
continue;
|
|
150
|
+
let parsedInput = {};
|
|
151
|
+
if (typeof fnArgs === 'string') {
|
|
152
|
+
try {
|
|
153
|
+
parsedInput = JSON.parse(fnArgs);
|
|
154
|
+
}
|
|
155
|
+
catch {
|
|
156
|
+
parsedInput = { raw: fnArgs };
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
else if (fnArgs && typeof fnArgs === 'object') {
|
|
160
|
+
parsedInput = fnArgs;
|
|
161
|
+
}
|
|
162
|
+
content.push({
|
|
163
|
+
type: 'tool_use',
|
|
164
|
+
id: tc.id || `ollama-${Date.now()}-${content.length}`,
|
|
165
|
+
name: fnName,
|
|
166
|
+
input: parsedInput,
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return {
|
|
171
|
+
id: data.id || `ollama-${Date.now()}`,
|
|
172
|
+
model: data.model || model,
|
|
173
|
+
content,
|
|
174
|
+
stopReason: this.mapFinishReason(choice.finish_reason),
|
|
175
|
+
usage: {
|
|
176
|
+
inputTokens: data.usage?.prompt_tokens ?? 0,
|
|
177
|
+
outputTokens: data.usage?.completion_tokens ?? 0,
|
|
178
|
+
},
|
|
179
|
+
metadata: {
|
|
180
|
+
provider: 'ollama',
|
|
181
|
+
cost: 0, // self-hosted = free; cloud users track via their plan
|
|
182
|
+
latency: 0, // set by router
|
|
183
|
+
},
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
formatStreamChunk(data) {
|
|
187
|
+
const choice = data.choices?.[0];
|
|
188
|
+
if (!choice)
|
|
189
|
+
return null;
|
|
190
|
+
const delta = choice.delta;
|
|
191
|
+
if (delta?.content) {
|
|
192
|
+
return {
|
|
193
|
+
type: 'content_block_delta',
|
|
194
|
+
delta: { type: 'text_delta', text: delta.content },
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
if (delta?.tool_calls?.length) {
|
|
198
|
+
const tc = delta.tool_calls[0];
|
|
199
|
+
return {
|
|
200
|
+
type: 'content_block_delta',
|
|
201
|
+
delta: {
|
|
202
|
+
type: 'input_json_delta',
|
|
203
|
+
partial_json: tc.function?.arguments || '',
|
|
204
|
+
},
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
if (choice.finish_reason) {
|
|
208
|
+
return {
|
|
209
|
+
type: 'message_stop',
|
|
210
|
+
usage: data.usage
|
|
211
|
+
? {
|
|
212
|
+
inputTokens: data.usage.prompt_tokens || 0,
|
|
213
|
+
outputTokens: data.usage.completion_tokens || 0,
|
|
214
|
+
}
|
|
215
|
+
: undefined,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
return null;
|
|
219
|
+
}
|
|
220
|
+
mapFinishReason(reason) {
|
|
221
|
+
switch (reason) {
|
|
222
|
+
case 'stop':
|
|
223
|
+
return 'end_turn';
|
|
224
|
+
case 'length':
|
|
225
|
+
return 'max_tokens';
|
|
226
|
+
case 'tool_calls':
|
|
227
|
+
return 'tool_use';
|
|
228
|
+
default:
|
|
229
|
+
return 'end_turn';
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
handleError(error) {
|
|
233
|
+
const wrapped = new Error(`Ollama provider error: ${error?.response?.data?.error?.message || error.message}`);
|
|
234
|
+
wrapped.provider = 'ollama';
|
|
235
|
+
wrapped.statusCode = error?.response?.status;
|
|
236
|
+
// Network / 5xx are retryable; 4xx (other than 429) are not
|
|
237
|
+
const status = wrapped.statusCode ?? 0;
|
|
238
|
+
wrapped.retryable = !status || status >= 500 || status === 429;
|
|
239
|
+
return wrapped;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
//# sourceMappingURL=ollama.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ollama.js","sourceRoot":"","sources":["../../../src/router/providers/ollama.ts"],"names":[],"mappings":"AAAA,uDAAuD;AACvD,6EAA6E;AAC7E,2DAA2D;AAC3D,OAAO,KAAwB,MAAM,OAAO,CAAC;AAW7C,MAAM,gBAAgB,GAAG,wBAAwB,CAAC;AAElD,MAAM,OAAO,cAAc;IACzB,IAAI,GAAG,QAAQ,CAAC;IAChB,IAAI,GAAG,QAAiB,CAAC;IACzB,iBAAiB,GAAG,IAAI,CAAC;IACzB,aAAa,GAAG,IAAI,CAAC;IACrB,WAAW,GAAG,KAAK,CAAC,CAAC,uBAAuB;IAEpC,MAAM,CAAgB;IACtB,MAAM,CAAiB;IAE/B,YAAY,MAAsB;QAChC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QAErB,MAAM,OAAO,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,gBAAgB,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACzE,MAAM,OAAO,GAA2B;YACtC,cAAc,EAAE,kBAAkB;SACnC,CAAC;QAEF,4EAA4E;QAC5E,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClB,OAAO,CAAC,eAAe,CAAC,GAAG,UAAU,MAAM,CAAC,MAAM,EAAE,CAAC;QACvD,CAAC;QAED,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,OAAO;YAChB,OAAO;YACP,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,MAAM;SAClC,CAAC,CAAC;IACL,CAAC;IAED,oBAAoB,CAAC,QAAkB;QACrC,MAAM,SAAS,GAAG,CAAC,MAAM,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;QACjD,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;YAC/C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,sBAAsB,EAAE,IAAI,CAAC,CAAC;YACtE,OAAO,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QAC1D,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,CAAC,MAAM,CAAC,MAAkB;QAC9B,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAC9C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,sBAAsB,EAAE,IAAI,EAAE;gBACpE,YAAY,EAAE,QAAQ;aACvB,CAAC,CAAC;YAEH,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACxC,MAAM,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;gBAC3B,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACjC,MAAM,GAAG,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;gBAE3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;oBAC5B,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC;wBAAE,SAAS;oBAC5C,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;oBAC9B,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;wBACtB,MAAM,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC;wBAC/B,OAAO;oBACT,CAAC;oBACD,IAAI,CAAC;wBACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;wBAChC,MAAM,GAAG,GAAG,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;wBAC3C,IAAI,GAAG;4BAAE,MAAM,GAAG,CAAC;oBACrB,CAAC;oBAAC,MAAM,CAAC;wBACP,wBAAwB;oBAC1B,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAEO,aAAa,CAAC,MAAkB,EAAE,MAAe;QACvD,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;YAC7C,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,OAAO,EACL,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7B,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO;qBACR,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;oBACb,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM;wBAAE,OAAO,KAAK,CAAC,IAAI,IAAI,EAAE,CAAC;oBACnD,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU;wBAC3B,OAAO,IAAI,CAAC,SAAS,CAAC;4BACpB,IAAI,EAAE,KAAK,CAAC,IAAI;4BAChB,KAAK,EAAE,KAAK,CAAC,KAAK;yBACnB,CAAC,CAAC;oBACL,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa;wBAC9B,OAAO,OAAO,KAAK,CAAC,OAAO,KAAK,QAAQ;4BACtC,CAAC,CAAC,KAAK,CAAC,OAAO;4BACf,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;oBACpC,OAAO,EAAE,CAAC;gBACZ,CAAC,CAAC;qBACD,IAAI,CAAC,IAAI,CAAC;SACpB,CAAC,CAAC,CAAC;QAEJ,MAAM,IAAI,GAAQ;YAChB,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,QAAQ;YACR,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG;YACtC,MAAM;SACP,CAAC;QAEF,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;YACrB,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,SAAS,CAAC;QACrC,CAAC;QAED,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5C,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;gBACvC,IAAI,EAAE,UAAU;gBAChB,QAAQ,EAAE;oBACR,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,UAAU,EAAE,IAAI,CAAC,YAAY;iBAC9B;aACF,CAAC,CAAC,CAAC;YAEJ,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;gBACtB,IAAI,MAAM,CAAC,UAAU,KAAK,MAAM,IAAI,MAAM,CAAC,UAAU,KAAK,MAAM,EAAE,CAAC;oBACjE,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,UAAU,CAAC;gBACvC,CAAC;qBAAM,IAAI,OAAO,MAAM,CAAC,UAAU,KAAK,QAAQ,EAAE,CAAC;oBACjD,IAAI,CAAC,WAAW,GAAG;wBACjB,IAAI,EAAE,UAAU;wBAChB,QAAQ,EAAE,EAAE,IAAI,EAAE,MAAM,CAAC,UAAU,CAAC,IAAI,EAAE;qBAC3C,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,cAAc,CAAC,IAAS,EAAE,KAAa;QAC7C,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;QACjC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,+BAA+B,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QACvF,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;QACrC,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;YACpB,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAChE,CAAC;QAED,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YACtC,KAAK,MAAM,EAAE,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;gBACpC,MAAM,MAAM,GAAG,EAAE,CAAC,QAAQ,EAAE,IAAI,CAAC;gBACjC,MAAM,MAAM,GAAG,EAAE,CAAC,QAAQ,EAAE,SAAS,CAAC;gBACtC,IAAI,CAAC,MAAM;oBAAE,SAAS;gBACtB,IAAI,WAAW,GAAY,EAAE,CAAC;gBAC9B,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;oBAC/B,IAAI,CAAC;wBACH,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;oBACnC,CAAC;oBAAC,MAAM,CAAC;wBACP,WAAW,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC;oBAChC,CAAC;gBACH,CAAC;qBAAM,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;oBAChD,WAAW,GAAG,MAAM,CAAC;gBACvB,CAAC;gBACD,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,UAAU;oBAChB,EAAE,EAAE,EAAE,CAAC,EAAE,IAAI,UAAU,IAAI,CAAC,GAAG,EAAE,IAAI,OAAO,CAAC,MAAM,EAAE;oBACrD,IAAI,EAAE,MAAM;oBACZ,KAAK,EAAE,WAAW;iBACnB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO;YACL,EAAE,EAAE,IAAI,CAAC,EAAE,IAAI,UAAU,IAAI,CAAC,GAAG,EAAE,EAAE;YACrC,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,KAAK;YAC1B,OAAO;YACP,UAAU,EAAE,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,aAAa,CAAC;YACtD,KAAK,EAAE;gBACL,WAAW,EAAE,IAAI,CAAC,KAAK,EAAE,aAAa,IAAI,CAAC;gBAC3C,YAAY,EAAE,IAAI,CAAC,KAAK,EAAE,iBAAiB,IAAI,CAAC;aACjD;YACD,QAAQ,EAAE;gBACR,QAAQ,EAAE,QAAQ;gBAClB,IAAI,EAAE,CAAC,EAAE,uDAAuD;gBAChE,OAAO,EAAE,CAAC,EAAE,gBAAgB;aAC7B;SACF,CAAC;IACJ,CAAC;IAEO,iBAAiB,CAAC,IAAS;QACjC,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;QACjC,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAC;QACzB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;QAE3B,IAAI,KAAK,EAAE,OAAO,EAAE,CAAC;YACnB,OAAO;gBACL,IAAI,EAAE,qBAAqB;gBAC3B,KAAK,EAAE,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,KAAK,CAAC,OAAO,EAAE;aACnD,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC;YAC9B,MAAM,EAAE,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAC/B,OAAO;gBACL,IAAI,EAAE,qBAAqB;gBAC3B,KAAK,EAAE;oBACL,IAAI,EAAE,kBAAkB;oBACxB,YAAY,EAAE,EAAE,CAAC,QAAQ,EAAE,SAAS,IAAI,EAAE;iBAC3C;aACF,CAAC;QACJ,CAAC;QAED,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YACzB,OAAO;gBACL,IAAI,EAAE,cAAc;gBACpB,KAAK,EAAE,IAAI,CAAC,KAAK;oBACf,CAAC,CAAC;wBACE,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa,IAAI,CAAC;wBAC1C,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,iBAAiB,IAAI,CAAC;qBAChD;oBACH,CAAC,CAAC,SAAS;aACd,CAAC;QACJ,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,eAAe,CAAC,MAAe;QACrC,QAAQ,MAAM,EAAE,CAAC;YACf,KAAK,MAAM;gBACT,OAAO,UAAU,CAAC;YACpB,KAAK,QAAQ;gBACX,OAAO,YAAY,CAAC;YACtB,KAAK,YAAY;gBACf,OAAO,UAAU,CAAC;YACpB;gBACE,OAAO,UAAU,CAAC;QACtB,CAAC;IACH,CAAC;IAEO,WAAW,CAAC,KAAU;QAC5B,MAAM,OAAO,GAAG,IAAI,KAAK,CACvB,0BAA0B,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,OAAO,EAAE,CAClE,CAAC;QACnB,OAAO,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAC5B,OAAO,CAAC,UAAU,GAAG,KAAK,EAAE,QAAQ,EAAE,MAAM,CAAC;QAC7C,4DAA4D;QAC5D,MAAM,MAAM,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;QACvC,OAAO,CAAC,SAAS,GAAG,CAAC,MAAM,IAAI,MAAM,IAAI,GAAG,IAAI,MAAM,KAAK,GAAG,CAAC;QAC/D,OAAO,OAAO,CAAC;IACjB,CAAC;CACF","sourcesContent":["// Ollama provider - OpenAI-compatible chat completions\n// Works with both ollama.com Cloud (requires OLLAMA_API_KEY) and self-hosted\n// (typically http://localhost:11434, no API key required).\nimport axios, { AxiosInstance } from 'axios';\nimport {\n LLMProvider,\n ChatParams,\n ChatResponse,\n StreamChunk,\n ProviderConfig,\n ProviderError,\n ContentBlock,\n} from '../types.js';\n\nconst DEFAULT_BASE_URL = 'http://localhost:11434';\n\nexport class OllamaProvider implements LLMProvider {\n name = 'ollama';\n type = 'ollama' as const;\n supportsStreaming = true;\n supportsTools = true;\n supportsMCP = false; // Requires translation\n\n private client: AxiosInstance;\n private config: ProviderConfig;\n\n constructor(config: ProviderConfig) {\n this.config = config;\n\n const baseUrl = (config.baseUrl || DEFAULT_BASE_URL).replace(/\\/+$/, '');\n const headers: Record<string, string> = {\n 'Content-Type': 'application/json',\n };\n\n // API key is optional for self-hosted Ollama; required for ollama.com Cloud\n if (config.apiKey) {\n headers['Authorization'] = `Bearer ${config.apiKey}`;\n }\n\n this.client = axios.create({\n baseURL: baseUrl,\n headers,\n timeout: config.timeout || 180000,\n });\n }\n\n validateCapabilities(features: string[]): boolean {\n const supported = ['chat', 'streaming', 'tools'];\n return features.every((f) => supported.includes(f));\n }\n\n async chat(params: ChatParams): Promise<ChatResponse> {\n try {\n const body = this.formatRequest(params, false);\n const response = await this.client.post('/v1/chat/completions', body);\n return this.formatResponse(response.data, params.model);\n } catch (error: any) {\n throw this.handleError(error);\n }\n }\n\n async *stream(params: ChatParams): AsyncGenerator<StreamChunk> {\n try {\n const body = this.formatRequest(params, true);\n const response = await this.client.post('/v1/chat/completions', body, {\n responseType: 'stream',\n });\n\n let buffer = '';\n for await (const chunk of response.data) {\n buffer += chunk.toString();\n const lines = buffer.split('\\n');\n buffer = lines.pop() ?? '';\n\n for (const line of lines) {\n const trimmed = line.trim();\n if (!trimmed.startsWith('data: ')) continue;\n const data = trimmed.slice(6);\n if (data === '[DONE]') {\n yield { type: 'message_stop' };\n return;\n }\n try {\n const parsed = JSON.parse(data);\n const out = this.formatStreamChunk(parsed);\n if (out) yield out;\n } catch {\n // skip malformed chunks\n }\n }\n }\n } catch (error: any) {\n throw this.handleError(error);\n }\n }\n\n private formatRequest(params: ChatParams, stream: boolean): any {\n const messages = params.messages.map((msg) => ({\n role: msg.role,\n content:\n typeof msg.content === 'string'\n ? msg.content\n : msg.content\n .map((block) => {\n if (block.type === 'text') return block.text || '';\n if (block.type === 'tool_use')\n return JSON.stringify({\n tool: block.name,\n input: block.input,\n });\n if (block.type === 'tool_result')\n return typeof block.content === 'string'\n ? block.content\n : JSON.stringify(block.content);\n return '';\n })\n .join('\\n'),\n }));\n\n const body: any = {\n model: params.model,\n messages,\n temperature: params.temperature ?? 0.7,\n stream,\n };\n\n if (params.maxTokens) {\n body.max_tokens = params.maxTokens;\n }\n\n if (params.tools && params.tools.length > 0) {\n body.tools = params.tools.map((tool) => ({\n type: 'function',\n function: {\n name: tool.name,\n description: tool.description,\n parameters: tool.input_schema,\n },\n }));\n\n if (params.toolChoice) {\n if (params.toolChoice === 'auto' || params.toolChoice === 'none') {\n body.tool_choice = params.toolChoice;\n } else if (typeof params.toolChoice === 'object') {\n body.tool_choice = {\n type: 'function',\n function: { name: params.toolChoice.name },\n };\n }\n }\n }\n\n return body;\n }\n\n private formatResponse(data: any, model: string): ChatResponse {\n const choice = data.choices?.[0];\n if (!choice) {\n throw new Error(`Ollama returned no choices: ${JSON.stringify(data).slice(0, 200)}`);\n }\n\n const message = choice.message ?? {};\n const content: ContentBlock[] = [];\n\n if (message.content) {\n content.push({ type: 'text', text: String(message.content) });\n }\n\n if (Array.isArray(message.tool_calls)) {\n for (const tc of message.tool_calls) {\n const fnName = tc.function?.name;\n const fnArgs = tc.function?.arguments;\n if (!fnName) continue;\n let parsedInput: unknown = {};\n if (typeof fnArgs === 'string') {\n try {\n parsedInput = JSON.parse(fnArgs);\n } catch {\n parsedInput = { raw: fnArgs };\n }\n } else if (fnArgs && typeof fnArgs === 'object') {\n parsedInput = fnArgs;\n }\n content.push({\n type: 'tool_use',\n id: tc.id || `ollama-${Date.now()}-${content.length}`,\n name: fnName,\n input: parsedInput,\n });\n }\n }\n\n return {\n id: data.id || `ollama-${Date.now()}`,\n model: data.model || model,\n content,\n stopReason: this.mapFinishReason(choice.finish_reason),\n usage: {\n inputTokens: data.usage?.prompt_tokens ?? 0,\n outputTokens: data.usage?.completion_tokens ?? 0,\n },\n metadata: {\n provider: 'ollama',\n cost: 0, // self-hosted = free; cloud users track via their plan\n latency: 0, // set by router\n },\n };\n }\n\n private formatStreamChunk(data: any): StreamChunk | null {\n const choice = data.choices?.[0];\n if (!choice) return null;\n const delta = choice.delta;\n\n if (delta?.content) {\n return {\n type: 'content_block_delta',\n delta: { type: 'text_delta', text: delta.content },\n };\n }\n\n if (delta?.tool_calls?.length) {\n const tc = delta.tool_calls[0];\n return {\n type: 'content_block_delta',\n delta: {\n type: 'input_json_delta',\n partial_json: tc.function?.arguments || '',\n },\n };\n }\n\n if (choice.finish_reason) {\n return {\n type: 'message_stop',\n usage: data.usage\n ? {\n inputTokens: data.usage.prompt_tokens || 0,\n outputTokens: data.usage.completion_tokens || 0,\n }\n : undefined,\n };\n }\n\n return null;\n }\n\n private mapFinishReason(reason?: string): ChatResponse['stopReason'] {\n switch (reason) {\n case 'stop':\n return 'end_turn';\n case 'length':\n return 'max_tokens';\n case 'tool_calls':\n return 'tool_use';\n default:\n return 'end_turn';\n }\n }\n\n private handleError(error: any): ProviderError {\n const wrapped = new Error(\n `Ollama provider error: ${error?.response?.data?.error?.message || error.message}`\n ) as ProviderError;\n wrapped.provider = 'ollama';\n wrapped.statusCode = error?.response?.status;\n // Network / 5xx are retryable; 4xx (other than 429) are not\n const status = wrapped.statusCode ?? 0;\n wrapped.retryable = !status || status >= 500 || status === 429;\n return wrapped;\n }\n}\n"]}
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
* - KV cache pooling for 20-30% faster generation
|
|
8
8
|
* - Better generation parameters for code tasks
|
|
9
9
|
* - System prompt caching
|
|
10
|
+
*
|
|
11
|
+
* Note: onnxruntime-node is optional - will error if not installed
|
|
10
12
|
*/
|
|
11
13
|
import type { ChatParams, ChatResponse } from '../types.js';
|
|
12
14
|
import { ONNXLocalProvider, ONNXLocalConfig } from './onnx-local.js';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-local-optimized.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"onnx-local-optimized.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAaH,OAAO,KAAK,EACV,UAAU,EACV,YAAY,EAIb,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAErE,MAAM,WAAW,mBAAoB,SAAQ,eAAe;IAC1D,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,qBAAa,qBAAsB,SAAQ,iBAAiB;IAC1D,OAAO,CAAC,eAAe,CAAgC;IACvD,OAAO,CAAC,WAAW,CAA+B;IAClD,OAAO,CAAC,iBAAiB,CAAmE;gBAEhF,MAAM,GAAE,mBAAwB;IAkB5C;;OAEG;IACH,OAAO,CAAC,cAAc;IAKtB;;OAEG;IACH,OAAO,CAAC,eAAe;IAmDvB;;OAEG;IACH,OAAO,CAAC,cAAc;IA8BtB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA+BrD;;OAEG;IACH,mBAAmB;;;;;;;;;;;;;;;;;;;;IAoBnB;;OAEG;IACH,WAAW;CAKZ"}
|
|
@@ -7,7 +7,17 @@
|
|
|
7
7
|
* - KV cache pooling for 20-30% faster generation
|
|
8
8
|
* - Better generation parameters for code tasks
|
|
9
9
|
* - System prompt caching
|
|
10
|
+
*
|
|
11
|
+
* Note: onnxruntime-node is optional - will error if not installed
|
|
10
12
|
*/
|
|
13
|
+
let ort = null;
|
|
14
|
+
// Dynamic import for optional onnxruntime-node
|
|
15
|
+
try {
|
|
16
|
+
ort = await import('onnxruntime-node');
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
// Will be handled at runtime
|
|
20
|
+
}
|
|
11
21
|
import { ONNXLocalProvider } from './onnx-local.js';
|
|
12
22
|
export class OptimizedONNXProvider extends ONNXLocalProvider {
|
|
13
23
|
optimizedConfig;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-local-optimized.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAYH,OAAO,EAAE,iBAAiB,EAAmB,MAAM,iBAAiB,CAAC;AAYrE,MAAM,OAAO,qBAAsB,SAAQ,iBAAiB;IAClD,eAAe,CAAgC;IAC/C,WAAW,GAAqB,IAAI,GAAG,EAAE,CAAC;IAC1C,iBAAiB,GAAyD,IAAI,GAAG,EAAE,CAAC;IAE5F,YAAY,SAA8B,EAAE;QAC1C,KAAK,CAAC,MAAM,CAAC,CAAC;QAEd,IAAI,CAAC,eAAe,GAAG;YACrB,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,iFAAiF;YAChH,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG,EAAG,sCAAsC;YAC/E,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,IAAI,EAAG,sBAAsB;YAC1E,aAAa,EAAE,MAAM,CAAC,aAAa,KAAK,KAAK,EAAG,eAAe;YAC/D,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,EAAE;YACvB,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,GAAG;YACxB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,GAAG;SACnD,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY;QACjC,qDAAqD;QACrD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,QAAmB;QACzC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC;YACxC,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC;QACxD,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,MAAM,SAAS,GAAc,EAAE,CAAC;QAEhC,wCAAwC;QACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QAC1D,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,OAAO,GAAG,OAAO,SAAS,CAAC,OAAO,KAAK,QAAQ;gBACnD,CAAC,CAAC,SAAS,CAAC,OAAO;gBACnB,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEzE,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC1B,WAAW,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAED,+CAA+C;QAC/C,KAAK,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAExB,yCAAyC;YACzC,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ;gBAAE,SAAS;YAEpC,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAE5C,IAAI,WAAW,GAAG,MAAM,GAAG,SAAS,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CAAC,4BAA4B,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,eAAe,WAAW,cAAc,CAAC,CAAC;gBACpH,MAAM;YACR,CAAC;YAED,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACvB,WAAW,IAAI,MAAM,CAAC;QACxB,CAAC;QAED,sCAAsC;QACtC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,EAAE,CAAC;YACtE,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;YAC5E,IAAI,WAAW;gBAAE,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC/C,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,kBAAkB,EAAE,CAAC;YAC7C,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE;YACnC,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;oBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAEnE,wCAAwC;gBACxC,MAAM,UAAU,GAAG,0DAA0D,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAE5F,IAAI,UAAU,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC5E,MAAM,eAAe,GAAG,GAAG,OAAO,kHAAkH,CAAC;oBAErJ,OAAO;wBACL,GAAG,GAAG;wBACN,OAAO,EAAE,eAAe;qBACzB,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,4CAA4C;QAC5C,IAAI,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAErD,uCAAuC;QACvC,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;QAEzC,2DAA2D;QAC3D,MAAM,cAAc,GAAG;YACrB,GAAG,MAAM;YACT,QAAQ;YACR,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,eAAe,CAAC,WAAW;YACnE,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS;SAC9D,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAElD,4BAA4B;QAC5B,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;YACtB,QAAQ,CAAC,QAAQ,CAAC,aAAa,GAAG;gBAChC,cAAc,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBAClD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,mBAAmB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC5D,oBAAoB,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM;gBAC5C,qBAAqB,EAAE,QAAQ,CAAC,MAAM;aACvC,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,mBAAmB;QACjB,OAAO;YACL,GAAG,KAAK,CAAC,YAAY,EAAE;YACvB,aAAa,EAAE;gBACb,gBAAgB,EAAE,IAAI,CAAC,eAAe,CAAC,gBAAgB;gBACvD,aAAa,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBACjD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,WAAW,EAAE,IAAI,CAAC,eAAe,CAAC,WAAW;gBAC7C,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,iBAAiB,EAAE,IAAI,CAAC,eAAe,CAAC,iBAAiB;aAC1D;YACD,UAAU,EAAE;gBACV,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;gBACtC,qBAAqB,EAAE,IAAI,CAAC,iBAAiB,CAAC,IAAI;aACnD;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,WAAW;QACT,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACnC,CAAC;CACF","sourcesContent":["/**\n * Optimized ONNX Runtime Local Inference Provider\n *\n * Improvements over base implementation:\n * - Context pruning for 2-4x speed improvement\n * - Prompt optimization for 30-50% quality improvement\n * - KV cache pooling for 20-30% faster generation\n * - Better generation parameters for code tasks\n * - System prompt caching\n */\n\nimport * as ort from 'onnxruntime-node';\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n ChatParams,\n ChatResponse,\n Message,\n ContentBlock,\n ProviderError\n} from '../types.js';\nimport { ONNXLocalProvider, ONNXLocalConfig } from './onnx-local.js';\n\nexport interface OptimizedONNXConfig extends ONNXLocalConfig {\n maxContextTokens?: number;\n slidingWindow?: boolean;\n cacheSystemPrompts?: boolean;\n promptOptimization?: boolean;\n topK?: number;\n topP?: number;\n repetitionPenalty?: number;\n}\n\nexport class OptimizedONNXProvider extends ONNXLocalProvider {\n private optimizedConfig: Required<OptimizedONNXConfig>;\n private kvCachePool: Map<string, any> = new Map();\n private systemPromptCache: Map<string, { tokens: number[]; timestamp: number }> = new Map();\n\n constructor(config: OptimizedONNXConfig = {}) {\n super(config);\n\n this.optimizedConfig = {\n modelPath: config.modelPath || './models/phi-4-mini/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 200,\n temperature: config.temperature || 0.3, // Lower for code (more deterministic)\n maxContextTokens: config.maxContextTokens || 2048, // Keep under 4K limit\n slidingWindow: config.slidingWindow !== false, // Default true\n cacheSystemPrompts: config.cacheSystemPrompts !== false, // Default true\n promptOptimization: config.promptOptimization !== false, // Default true\n topK: config.topK || 50,\n topP: config.topP || 0.9,\n repetitionPenalty: config.repetitionPenalty || 1.1\n };\n }\n\n /**\n * Estimate token count for a string\n */\n private estimateTokens(text: string): number {\n // Rough estimate: 1 token ≈ 4 characters for English\n return Math.ceil(text.length / 4);\n }\n\n /**\n * Optimize messages using sliding window context pruning\n */\n private optimizeContext(messages: Message[]): Message[] {\n if (!this.optimizedConfig.slidingWindow) {\n return messages;\n }\n\n const maxTokens = this.optimizedConfig.maxContextTokens;\n let totalTokens = 0;\n const optimized: Message[] = [];\n\n // Always keep system message if present\n const systemMsg = messages.find(m => m.role === 'system');\n if (systemMsg) {\n const content = typeof systemMsg.content === 'string'\n ? systemMsg.content\n : systemMsg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n optimized.push(systemMsg);\n totalTokens += this.estimateTokens(content);\n }\n\n // Add recent messages from end (most relevant)\n for (let i = messages.length - 1; i >= 0; i--) {\n const msg = messages[i];\n\n // Skip if already added (system message)\n if (msg.role === 'system') continue;\n\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n const tokens = this.estimateTokens(content);\n\n if (totalTokens + tokens > maxTokens) {\n console.log(`📊 Context pruned: Saved ${messages.length - optimized.length} messages, ~${totalTokens} tokens kept`);\n break;\n }\n\n optimized.unshift(msg);\n totalTokens += tokens;\n }\n\n // Ensure at least user message exists\n if (optimized.length === 0 || !optimized.some(m => m.role === 'user')) {\n const lastUserMsg = messages.slice().reverse().find(m => m.role === 'user');\n if (lastUserMsg) optimized.push(lastUserMsg);\n }\n\n return optimized;\n }\n\n /**\n * Optimize prompt for better quality output\n */\n private optimizePrompt(messages: Message[]): Message[] {\n if (!this.optimizedConfig.promptOptimization) {\n return messages;\n }\n\n const optimized = messages.map(msg => {\n if (msg.role === 'user') {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n // Add quality indicators for code tasks\n const isCodeTask = /write|create|implement|generate|code|function|class|api/i.test(content);\n\n if (isCodeTask && !content.includes('include') && !content.includes('with')) {\n const enhancedContent = `${content}. Include: proper error handling, type hints/types, and edge case handling. Return clean, production-ready code.`;\n\n return {\n ...msg,\n content: enhancedContent\n };\n }\n }\n\n return msg;\n });\n\n return optimized;\n }\n\n /**\n * Enhanced chat with optimization\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n // Step 1: Optimize context (sliding window)\n let messages = this.optimizeContext(params.messages);\n\n // Step 2: Optimize prompts for quality\n messages = this.optimizePrompt(messages);\n\n // Step 3: Call base implementation with optimized messages\n const enhancedParams = {\n ...params,\n messages,\n temperature: params.temperature || this.optimizedConfig.temperature,\n maxTokens: params.maxTokens || this.optimizedConfig.maxTokens\n };\n\n const response = await super.chat(enhancedParams);\n\n // Add optimization metadata\n if (response.metadata) {\n response.metadata.optimizations = {\n contextPruning: this.optimizedConfig.slidingWindow,\n promptOptimization: this.optimizedConfig.promptOptimization,\n systemPromptCaching: this.optimizedConfig.cacheSystemPrompts,\n originalMessageCount: params.messages.length,\n optimizedMessageCount: messages.length\n };\n }\n\n return response;\n }\n\n /**\n * Get optimization info\n */\n getOptimizationInfo() {\n return {\n ...super.getModelInfo(),\n optimizations: {\n maxContextTokens: this.optimizedConfig.maxContextTokens,\n slidingWindow: this.optimizedConfig.slidingWindow,\n cacheSystemPrompts: this.optimizedConfig.cacheSystemPrompts,\n promptOptimization: this.optimizedConfig.promptOptimization,\n temperature: this.optimizedConfig.temperature,\n topK: this.optimizedConfig.topK,\n topP: this.optimizedConfig.topP,\n repetitionPenalty: this.optimizedConfig.repetitionPenalty\n },\n cacheStats: {\n kvCachePoolSize: this.kvCachePool.size,\n systemPromptCacheSize: this.systemPromptCache.size\n }\n };\n }\n\n /**\n * Clear caches\n */\n clearCaches() {\n this.kvCachePool.clear();\n this.systemPromptCache.clear();\n console.log('🧹 Caches cleared');\n }\n}\n"]}
|
|
1
|
+
{"version":3,"file":"onnx-local-optimized.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,IAAI,GAAG,GAAQ,IAAI,CAAC;AAEpB,+CAA+C;AAC/C,IAAI,CAAC;IACH,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;AACzC,CAAC;AAAC,MAAM,CAAC;IACP,6BAA6B;AAC/B,CAAC;AAWD,OAAO,EAAE,iBAAiB,EAAmB,MAAM,iBAAiB,CAAC;AAYrE,MAAM,OAAO,qBAAsB,SAAQ,iBAAiB;IAClD,eAAe,CAAgC;IAC/C,WAAW,GAAqB,IAAI,GAAG,EAAE,CAAC;IAC1C,iBAAiB,GAAyD,IAAI,GAAG,EAAE,CAAC;IAE5F,YAAY,SAA8B,EAAE;QAC1C,KAAK,CAAC,MAAM,CAAC,CAAC;QAEd,IAAI,CAAC,eAAe,GAAG;YACrB,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,iFAAiF;YAChH,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG,EAAG,sCAAsC;YAC/E,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,IAAI,EAAG,sBAAsB;YAC1E,aAAa,EAAE,MAAM,CAAC,aAAa,KAAK,KAAK,EAAG,eAAe;YAC/D,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,EAAE;YACvB,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,GAAG;YACxB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,GAAG;SACnD,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY;QACjC,qDAAqD;QACrD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,QAAmB;QACzC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC;YACxC,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC;QACxD,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,MAAM,SAAS,GAAc,EAAE,CAAC;QAEhC,wCAAwC;QACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QAC1D,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,OAAO,GAAG,OAAO,SAAS,CAAC,OAAO,KAAK,QAAQ;gBACnD,CAAC,CAAC,SAAS,CAAC,OAAO;gBACnB,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEzE,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC1B,WAAW,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAED,+CAA+C;QAC/C,KAAK,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAExB,yCAAyC;YACzC,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ;gBAAE,SAAS;YAEpC,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAE5C,IAAI,WAAW,GAAG,MAAM,GAAG,SAAS,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CAAC,4BAA4B,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,eAAe,WAAW,cAAc,CAAC,CAAC;gBACpH,MAAM;YACR,CAAC;YAED,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACvB,WAAW,IAAI,MAAM,CAAC;QACxB,CAAC;QAED,sCAAsC;QACtC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,EAAE,CAAC;YACtE,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;YAC5E,IAAI,WAAW;gBAAE,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC/C,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,kBAAkB,EAAE,CAAC;YAC7C,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE;YACnC,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;oBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAEnE,wCAAwC;gBACxC,MAAM,UAAU,GAAG,0DAA0D,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAE5F,IAAI,UAAU,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC5E,MAAM,eAAe,GAAG,GAAG,OAAO,kHAAkH,CAAC;oBAErJ,OAAO;wBACL,GAAG,GAAG;wBACN,OAAO,EAAE,eAAe;qBACzB,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,4CAA4C;QAC5C,IAAI,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAErD,uCAAuC;QACvC,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;QAEzC,2DAA2D;QAC3D,MAAM,cAAc,GAAG;YACrB,GAAG,MAAM;YACT,QAAQ;YACR,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,eAAe,CAAC,WAAW;YACnE,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS;SAC9D,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAElD,4BAA4B;QAC5B,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;YACtB,QAAQ,CAAC,QAAQ,CAAC,aAAa,GAAG;gBAChC,cAAc,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBAClD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,mBAAmB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC5D,oBAAoB,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM;gBAC5C,qBAAqB,EAAE,QAAQ,CAAC,MAAM;aACvC,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,mBAAmB;QACjB,OAAO;YACL,GAAG,KAAK,CAAC,YAAY,EAAE;YACvB,aAAa,EAAE;gBACb,gBAAgB,EAAE,IAAI,CAAC,eAAe,CAAC,gBAAgB;gBACvD,aAAa,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBACjD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,WAAW,EAAE,IAAI,CAAC,eAAe,CAAC,WAAW;gBAC7C,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,iBAAiB,EAAE,IAAI,CAAC,eAAe,CAAC,iBAAiB;aAC1D;YACD,UAAU,EAAE;gBACV,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;gBACtC,qBAAqB,EAAE,IAAI,CAAC,iBAAiB,CAAC,IAAI;aACnD;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,WAAW;QACT,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACnC,CAAC;CACF","sourcesContent":["/**\n * Optimized ONNX Runtime Local Inference Provider\n *\n * Improvements over base implementation:\n * - Context pruning for 2-4x speed improvement\n * - Prompt optimization for 30-50% quality improvement\n * - KV cache pooling for 20-30% faster generation\n * - Better generation parameters for code tasks\n * - System prompt caching\n *\n * Note: onnxruntime-node is optional - will error if not installed\n */\n\nlet ort: any = null;\n\n// Dynamic import for optional onnxruntime-node\ntry {\n ort = await import('onnxruntime-node');\n} catch {\n // Will be handled at runtime\n}\n\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n ChatParams,\n ChatResponse,\n Message,\n ContentBlock,\n ProviderError\n} from '../types.js';\nimport { ONNXLocalProvider, ONNXLocalConfig } from './onnx-local.js';\n\nexport interface OptimizedONNXConfig extends ONNXLocalConfig {\n maxContextTokens?: number;\n slidingWindow?: boolean;\n cacheSystemPrompts?: boolean;\n promptOptimization?: boolean;\n topK?: number;\n topP?: number;\n repetitionPenalty?: number;\n}\n\nexport class OptimizedONNXProvider extends ONNXLocalProvider {\n private optimizedConfig: Required<OptimizedONNXConfig>;\n private kvCachePool: Map<string, any> = new Map();\n private systemPromptCache: Map<string, { tokens: number[]; timestamp: number }> = new Map();\n\n constructor(config: OptimizedONNXConfig = {}) {\n super(config);\n\n this.optimizedConfig = {\n modelPath: config.modelPath || './models/phi-4-mini/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 200,\n temperature: config.temperature || 0.3, // Lower for code (more deterministic)\n maxContextTokens: config.maxContextTokens || 2048, // Keep under 4K limit\n slidingWindow: config.slidingWindow !== false, // Default true\n cacheSystemPrompts: config.cacheSystemPrompts !== false, // Default true\n promptOptimization: config.promptOptimization !== false, // Default true\n topK: config.topK || 50,\n topP: config.topP || 0.9,\n repetitionPenalty: config.repetitionPenalty || 1.1\n };\n }\n\n /**\n * Estimate token count for a string\n */\n private estimateTokens(text: string): number {\n // Rough estimate: 1 token ≈ 4 characters for English\n return Math.ceil(text.length / 4);\n }\n\n /**\n * Optimize messages using sliding window context pruning\n */\n private optimizeContext(messages: Message[]): Message[] {\n if (!this.optimizedConfig.slidingWindow) {\n return messages;\n }\n\n const maxTokens = this.optimizedConfig.maxContextTokens;\n let totalTokens = 0;\n const optimized: Message[] = [];\n\n // Always keep system message if present\n const systemMsg = messages.find(m => m.role === 'system');\n if (systemMsg) {\n const content = typeof systemMsg.content === 'string'\n ? systemMsg.content\n : systemMsg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n optimized.push(systemMsg);\n totalTokens += this.estimateTokens(content);\n }\n\n // Add recent messages from end (most relevant)\n for (let i = messages.length - 1; i >= 0; i--) {\n const msg = messages[i];\n\n // Skip if already added (system message)\n if (msg.role === 'system') continue;\n\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n const tokens = this.estimateTokens(content);\n\n if (totalTokens + tokens > maxTokens) {\n console.log(`📊 Context pruned: Saved ${messages.length - optimized.length} messages, ~${totalTokens} tokens kept`);\n break;\n }\n\n optimized.unshift(msg);\n totalTokens += tokens;\n }\n\n // Ensure at least user message exists\n if (optimized.length === 0 || !optimized.some(m => m.role === 'user')) {\n const lastUserMsg = messages.slice().reverse().find(m => m.role === 'user');\n if (lastUserMsg) optimized.push(lastUserMsg);\n }\n\n return optimized;\n }\n\n /**\n * Optimize prompt for better quality output\n */\n private optimizePrompt(messages: Message[]): Message[] {\n if (!this.optimizedConfig.promptOptimization) {\n return messages;\n }\n\n const optimized = messages.map(msg => {\n if (msg.role === 'user') {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n // Add quality indicators for code tasks\n const isCodeTask = /write|create|implement|generate|code|function|class|api/i.test(content);\n\n if (isCodeTask && !content.includes('include') && !content.includes('with')) {\n const enhancedContent = `${content}. Include: proper error handling, type hints/types, and edge case handling. Return clean, production-ready code.`;\n\n return {\n ...msg,\n content: enhancedContent\n };\n }\n }\n\n return msg;\n });\n\n return optimized;\n }\n\n /**\n * Enhanced chat with optimization\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n // Step 1: Optimize context (sliding window)\n let messages = this.optimizeContext(params.messages);\n\n // Step 2: Optimize prompts for quality\n messages = this.optimizePrompt(messages);\n\n // Step 3: Call base implementation with optimized messages\n const enhancedParams = {\n ...params,\n messages,\n temperature: params.temperature || this.optimizedConfig.temperature,\n maxTokens: params.maxTokens || this.optimizedConfig.maxTokens\n };\n\n const response = await super.chat(enhancedParams);\n\n // Add optimization metadata\n if (response.metadata) {\n response.metadata.optimizations = {\n contextPruning: this.optimizedConfig.slidingWindow,\n promptOptimization: this.optimizedConfig.promptOptimization,\n systemPromptCaching: this.optimizedConfig.cacheSystemPrompts,\n originalMessageCount: params.messages.length,\n optimizedMessageCount: messages.length\n };\n }\n\n return response;\n }\n\n /**\n * Get optimization info\n */\n getOptimizationInfo() {\n return {\n ...super.getModelInfo(),\n optimizations: {\n maxContextTokens: this.optimizedConfig.maxContextTokens,\n slidingWindow: this.optimizedConfig.slidingWindow,\n cacheSystemPrompts: this.optimizedConfig.cacheSystemPrompts,\n promptOptimization: this.optimizedConfig.promptOptimization,\n temperature: this.optimizedConfig.temperature,\n topK: this.optimizedConfig.topK,\n topP: this.optimizedConfig.topP,\n repetitionPenalty: this.optimizedConfig.repetitionPenalty\n },\n cacheStats: {\n kvCachePoolSize: this.kvCachePool.size,\n systemPromptCacheSize: this.systemPromptCache.size\n }\n };\n }\n\n /**\n * Clear caches\n */\n clearCaches() {\n this.kvCachePool.clear();\n this.systemPromptCache.clear();\n console.log('🧹 Caches cleared');\n }\n}\n"]}
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* ONNX Runtime Local Inference Provider for Phi-4
|
|
3
3
|
*
|
|
4
4
|
* Uses onnxruntime-node for true local CPU/GPU inference
|
|
5
|
+
* Falls back gracefully when native module isn't available (Windows)
|
|
5
6
|
*/
|
|
6
7
|
import type { LLMProvider, ChatParams, ChatResponse, StreamChunk } from '../types.js';
|
|
7
8
|
export interface ONNXLocalConfig {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-local.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"onnx-local.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAiBH,OAAO,KAAK,EACV,WAAW,EACX,UAAU,EACV,YAAY,EACZ,WAAW,EAIZ,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,eAAe;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,iBAAkB,YAAW,WAAW;IACnD,IAAI,SAAgB;IACpB,IAAI,EAAG,QAAQ,CAAU;IACzB,iBAAiB,UAAS;IAC1B,aAAa,UAAS;IACtB,WAAW,UAAS;IAEpB,OAAO,CAAC,OAAO,CAAa;IAC5B,OAAO,CAAC,MAAM,CAA4B;IAC1C,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,QAAQ,CAAa;gBAEjB,MAAM,GAAE,eAAoB;IASxC;;OAEG;YACW,aAAa;IAc3B;;OAEG;IACH,OAAO,CAAC,MAAM;IAId;;OAEG;IACH,OAAO,CAAC,MAAM;IAgBd;;OAEG;YACW,iBAAiB;IAiD/B;;OAEG;IACH,OAAO,CAAC,cAAc;IAqBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IA8BzB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA8IrD;;OAEG;IACI,MAAM,CAAC,MAAM,EAAE,UAAU,GAAG,cAAc,CAAC,WAAW,CAAC;IAI9D;;OAEG;IACH,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO;IAKjD;;OAEG;IACH,YAAY;;;;;;IASZ;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAU/B"}
|
|
@@ -2,8 +2,18 @@
|
|
|
2
2
|
* ONNX Runtime Local Inference Provider for Phi-4
|
|
3
3
|
*
|
|
4
4
|
* Uses onnxruntime-node for true local CPU/GPU inference
|
|
5
|
+
* Falls back gracefully when native module isn't available (Windows)
|
|
5
6
|
*/
|
|
6
|
-
|
|
7
|
+
let ort = null;
|
|
8
|
+
let ortAvailable = false;
|
|
9
|
+
// Dynamic import for optional onnxruntime-node
|
|
10
|
+
try {
|
|
11
|
+
ort = await import('onnxruntime-node');
|
|
12
|
+
ortAvailable = true;
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
console.warn('[ONNX] onnxruntime-node not available - local inference disabled');
|
|
16
|
+
}
|
|
7
17
|
import { get_encoding } from 'tiktoken';
|
|
8
18
|
import { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';
|
|
9
19
|
export class ONNXLocalProvider {
|
|
@@ -72,6 +82,9 @@ export class ONNXLocalProvider {
|
|
|
72
82
|
async initializeSession() {
|
|
73
83
|
if (this.session)
|
|
74
84
|
return;
|
|
85
|
+
if (!ortAvailable || !ort) {
|
|
86
|
+
throw new Error('onnxruntime-node not available - install with: npm install onnxruntime-node');
|
|
87
|
+
}
|
|
75
88
|
try {
|
|
76
89
|
// Ensure model is downloaded
|
|
77
90
|
console.log(`🔍 Checking for Phi-4 ONNX model...`);
|
|
@@ -135,12 +148,14 @@ export class ONNXLocalProvider {
|
|
|
135
148
|
const numKVHeads = 8;
|
|
136
149
|
const headDim = 128; // 3072 / 24 = 128
|
|
137
150
|
const kvCache = {};
|
|
151
|
+
// Get Tensor constructor - use any for flexible access
|
|
152
|
+
const TensorClass = ort.Tensor;
|
|
138
153
|
// Initialize empty cache for each layer (key and value)
|
|
139
154
|
for (let i = 0; i < numLayers; i++) {
|
|
140
155
|
// Empty cache: [batch_size, num_kv_heads, 0, head_dim]
|
|
141
156
|
const emptyCache = new Float32Array(0);
|
|
142
|
-
kvCache[`past_key_values.${i}.key`] = new
|
|
143
|
-
kvCache[`past_key_values.${i}.value`] = new
|
|
157
|
+
kvCache[`past_key_values.${i}.key`] = new TensorClass('float32', emptyCache, [batchSize, numKVHeads, 0, headDim]);
|
|
158
|
+
kvCache[`past_key_values.${i}.value`] = new TensorClass('float32', emptyCache, [batchSize, numKVHeads, 0, headDim]);
|
|
144
159
|
}
|
|
145
160
|
return kvCache;
|
|
146
161
|
}
|
|
@@ -168,11 +183,13 @@ export class ONNXLocalProvider {
|
|
|
168
183
|
// For first step, use all input tokens; for subsequent steps, use only last token
|
|
169
184
|
const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];
|
|
170
185
|
const currentSeqLen = currentInputIds.length;
|
|
186
|
+
// Get Tensor constructor - use any for flexible access
|
|
187
|
+
const TensorClass = ort.Tensor;
|
|
171
188
|
// Create input tensor for current step
|
|
172
|
-
const inputTensor = new
|
|
189
|
+
const inputTensor = new TensorClass('int64', BigInt64Array.from(currentInputIds.map(BigInt)), [1, currentSeqLen]);
|
|
173
190
|
// Create attention mask for current step
|
|
174
191
|
const totalSeqLen = allTokenIds.length;
|
|
175
|
-
const attentionMask = new
|
|
192
|
+
const attentionMask = new TensorClass('int64', BigInt64Array.from(Array(totalSeqLen).fill(1n)), [1, totalSeqLen]);
|
|
176
193
|
// Build feeds with input, attention mask, and KV cache
|
|
177
194
|
const feeds = {
|
|
178
195
|
input_ids: inputTensor,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-local.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,GAAG,MAAM,kBAAkB,CAAC;AAGxC,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAkBnF,MAAM,OAAO,iBAAiB;IAC5B,IAAI,GAAG,YAAY,CAAC;IACpB,IAAI,GAAG,QAAiB,CAAC;IACzB,iBAAiB,GAAG,KAAK,CAAC,CAAC,mDAAmD;IAC9E,aAAa,GAAG,KAAK,CAAC;IACtB,WAAW,GAAG,KAAK,CAAC;IAEZ,OAAO,GAAgC,IAAI,CAAC;IAC5C,MAAM,CAA4B;IAClC,SAAS,GAAQ,IAAI,CAAC;IACtB,QAAQ,GAAQ,IAAI,CAAC;IAE7B,YAAY,SAA0B,EAAE;QACtC,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,4EAA4E;YAC3G,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa;QACzB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,IAAI,CAAC;YACH,qDAAqD;YACrD,IAAI,CAAC,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;YAE5C,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QAC3D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,IAAY;QACzB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,GAAa;QAC1B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3D,6CAA6C;YAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAChC,OAAO,OAAO,CAAC;YACjB,CAAC;iBAAM,IAAI,OAAO,YAAY,UAAU,IAAI,OAAO,YAAY,MAAM,EAAE,CAAC;gBACtE,OAAO,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC3C,CAAC;YACD,OAAO,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;YACxD,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB;QAC7B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO;QAEzB,IAAI,CAAC;YACH,6BAA6B;YAC7B,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;YAEnD,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,CAAC,QAAQ,EAAE,EAAE;gBACnD,IAAI,QAAQ,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,iBAAiB;oBACnD,OAAO,CAAC,GAAG,CAAC,sBAAsB,eAAe,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,uCAAuC;YACvC,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC;YAElC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAE/D,IAAI,CAAC,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAC9C,IAAI,CAAC,MAAM,CAAC,SAAS,EACrB;gBACE,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAyB;gBACzD,sBAAsB,EAAE,KAAK;gBAC7B,iBAAiB,EAAE,IAAI;gBACvB,gBAAgB,EAAE,IAAI;aACvB,CACF,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEpF,iBAAiB;YACjB,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAE7B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,eAAe;gBACrB,OAAO,EAAE,oCAAoC,KAAK,EAAE;gBACpD,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC1B,MAAM,IAAI,eAAe,OAAO,WAAW,CAAC;YAC9C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC/B,MAAM,IAAI,aAAa,OAAO,WAAW,CAAC;YAC5C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACpC,MAAM,IAAI,kBAAkB,OAAO,WAAW,CAAC;YACjD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,iBAAiB,CAAC;QAC5B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,SAAiB,EAAE,cAAsB;QACjE,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,CAAC,CAAC;QACrB,MAAM,OAAO,GAAG,GAAG,CAAC,CAAC,kBAAkB;QACvC,MAAM,OAAO,GAA+B,EAAE,CAAC;QAE/C,wDAAwD;QACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,uDAAuD;YACvD,MAAM,UAAU,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;YAEvC,OAAO,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAClD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;YAEF,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CACpD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;QACJ,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEpD,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAEnD,2CAA2C;YAC3C,IAAI,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAE/C,6BAA6B;YAC7B,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;YAClC,MAAM,SAAS,GAAa,EAAE,CAAC;YAE/B,8CAA8C;YAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEhF,iCAAiC;YACjC,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;YAE/D,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,YAAY,EAAE,IAAI,EAAE,EAAE,CAAC;gBAC/C,kFAAkF;gBAClF,MAAM,eAAe,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClF,MAAM,aAAa,GAAG,eAAe,CAAC,MAAM,CAAC;gBAE7C,uCAAuC;gBACvC,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,MAAM,CAChC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,aAAa,CAAC,CACnB,CAAC;gBAEF,yCAAyC;gBACzC,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;gBACvC,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,MAAM,CAClC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,WAAW,CAAC,CACjB,CAAC;gBAEF,uDAAuD;gBACvD,MAAM,KAAK,GAA+B;oBACxC,SAAS,EAAE,WAAW;oBACtB,cAAc,EAAE,aAAa;oBAC7B,GAAG,WAAW;iBACf,CAAC;gBAEF,gBAAgB;gBAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAE/C,4CAA4C;gBAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,IAAoB,CAAC;gBACnD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAEtE,gCAAgC;gBAChC,MAAM,qBAAqB,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;gBAE9D,uCAAuC;gBACvC,IAAI,SAAS,GAAG,CAAC,CAAC;gBAClB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAC;gBAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnC,MAAM,KAAK,GAAG,MAAM,CAAC,qBAAqB,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;oBAClG,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;wBACnB,MAAM,GAAG,KAAK,CAAC;wBACf,SAAS,GAAG,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC1B,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAE5B,wDAAwD;gBACxD,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;oBACvC,OAAO,CAAC,GAAG,CAAC,2BAA2B,SAAS,EAAE,CAAC,CAAC;oBACpD,MAAM;gBACR,CAAC;gBAED,kDAAkD;gBAClD,WAAW,GAAG,EAAE,CAAC;gBACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,WAAW,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACtE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAC5E,CAAC;gBAED,qBAAqB;gBACrB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;YAED,yCAAyC;YACzC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACvC,MAAM,eAAe,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAEzE,OAAO,CAAC,GAAG,CAAC,gBAAgB,aAAa,EAAE,CAAC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,OAAO,eAAe,cAAc,CAAC,CAAC;YAEzE,MAAM,OAAO,GAAmB,CAAC;oBAC/B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,aAAa,CAAC,IAAI,EAAE;iBAC3B,CAAC,CAAC;YAEH,OAAO;gBACL,EAAE,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,EAAE;gBAC9B,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC5B,OAAO;gBACP,UAAU,EAAE,UAAU;gBACtB,KAAK,EAAE;oBACL,WAAW,EAAE,QAAQ,CAAC,MAAM;oBAC5B,YAAY,EAAE,SAAS,CAAC,MAAM;iBAC/B;gBACD,QAAQ,EAAE;oBACR,QAAQ,EAAE,YAAY;oBACtB,KAAK,EAAE,0BAA0B;oBACjC,OAAO;oBACP,IAAI,EAAE,CAAC,EAAE,0BAA0B;oBACnC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;oBAClD,eAAe,EAAE,UAAU,CAAC,eAAe,CAAC;iBAC7C;aACF,CAAC;QAEJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,oBAAoB;gBAC1B,OAAO,EAAE,0BAA0B,KAAK,EAAE;gBAC1C,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,IAAI;aAChB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,CAAC,MAAM,CAAC,MAAkB;QAC9B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC5E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,QAAkB;QACrC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;YAClD,WAAW,EAAE,IAAI,CAAC,OAAO,KAAK,IAAI;YAClC,eAAe,EAAE,IAAI,CAAC,QAAQ,KAAK,IAAI;SACxC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,gEAAgE;YAChE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * ONNX Runtime Local Inference Provider for Phi-4\n *\n * Uses onnxruntime-node for true local CPU/GPU inference\n */\n\nimport * as ort from 'onnxruntime-node';\nimport * as fs from 'fs';\nimport * as path from 'path';\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n LLMProvider,\n ChatParams,\n ChatResponse,\n StreamChunk,\n ProviderError,\n Message,\n ContentBlock\n} from '../types.js';\n\nexport interface ONNXLocalConfig {\n modelPath?: string;\n executionProviders?: string[];\n maxTokens?: number;\n temperature?: number;\n}\n\nexport class ONNXLocalProvider implements LLMProvider {\n name = 'onnx-local';\n type = 'custom' as const;\n supportsStreaming = false; // Streaming requires complex token generation loop\n supportsTools = false;\n supportsMCP = false;\n\n private session: ort.InferenceSession | null = null;\n private config: Required<ONNXLocalConfig>;\n private tokenizer: any = null;\n private tiktoken: any = null;\n\n constructor(config: ONNXLocalConfig = {}) {\n this.config = {\n modelPath: config.modelPath || './models/phi-4/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 100,\n temperature: config.temperature || 0.7\n };\n }\n\n /**\n * Load optimized tiktoken tokenizer (cl100k_base for Phi-4)\n */\n private async loadTokenizer(): Promise<void> {\n if (this.tiktoken) return;\n\n try {\n // Use cl100k_base encoding (GPT-4, similar to Phi-4)\n this.tiktoken = get_encoding('cl100k_base');\n\n console.log('✅ Tokenizer loaded (tiktoken cl100k_base)');\n } catch (error) {\n console.error('❌ Failed to load tiktoken:', error);\n throw new Error(`Tokenizer loading failed: ${error}`);\n }\n }\n\n /**\n * Encode text using tiktoken (fast BPE)\n */\n private encode(text: string): number[] {\n return Array.from(this.tiktoken.encode(text));\n }\n\n /**\n * Decode tokens using tiktoken\n */\n private decode(ids: number[]): string {\n try {\n const decoded = this.tiktoken.decode(new Uint32Array(ids));\n // tiktoken returns buffer, convert to string\n if (typeof decoded === 'string') {\n return decoded;\n } else if (decoded instanceof Uint8Array || decoded instanceof Buffer) {\n return new TextDecoder().decode(decoded);\n }\n return String(decoded);\n } catch (error) {\n console.warn('Decode error, returning raw IDs:', error);\n return ids.join(',');\n }\n }\n\n /**\n * Initialize ONNX session (with automatic model download)\n */\n private async initializeSession(): Promise<void> {\n if (this.session) return;\n\n try {\n // Ensure model is downloaded\n console.log(`🔍 Checking for Phi-4 ONNX model...`);\n\n const modelPath = await ensurePhi4Model((progress) => {\n if (progress.percentage % 10 < 1) { // Log every ~10%\n console.log(` 📥 Downloading: ${ModelDownloader.formatProgress(progress)}`);\n }\n });\n\n // Update config with actual model path\n this.config.modelPath = modelPath;\n\n console.log(`📦 Loading ONNX model: ${this.config.modelPath}`);\n\n this.session = await ort.InferenceSession.create(\n this.config.modelPath,\n {\n executionProviders: this.config.executionProviders as any,\n graphOptimizationLevel: 'all',\n enableCpuMemArena: true,\n enableMemPattern: true\n }\n );\n\n console.log(`✅ ONNX model loaded`);\n console.log(`🔧 Execution providers: ${this.config.executionProviders.join(', ')}`);\n\n // Load tokenizer\n await this.loadTokenizer();\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInitError',\n message: `Failed to initialize ONNX model: ${error}`,\n provider: 'onnx-local',\n retryable: false\n };\n throw providerError;\n }\n }\n\n /**\n * Format messages for Phi-4 chat template\n */\n private formatMessages(messages: Message[]): string {\n let prompt = '';\n\n for (const msg of messages) {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n if (msg.role === 'system') {\n prompt += `<|system|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'user') {\n prompt += `<|user|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'assistant') {\n prompt += `<|assistant|>\\n${content}<|end|>\\n`;\n }\n }\n\n prompt += '<|assistant|>\\n';\n return prompt;\n }\n\n /**\n * Initialize KV cache tensors for all 32 layers\n * Phi-4 architecture: 32 layers, 8 KV heads, 128 head_dim\n */\n private initializeKVCache(batchSize: number, sequenceLength: number) {\n const numLayers = 32;\n const numKVHeads = 8;\n const headDim = 128; // 3072 / 24 = 128\n const kvCache: Record<string, ort.Tensor> = {};\n\n // Initialize empty cache for each layer (key and value)\n for (let i = 0; i < numLayers; i++) {\n // Empty cache: [batch_size, num_kv_heads, 0, head_dim]\n const emptyCache = new Float32Array(0);\n\n kvCache[`past_key_values.${i}.key`] = new ort.Tensor(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n\n kvCache[`past_key_values.${i}.value`] = new ort.Tensor(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n }\n\n return kvCache;\n }\n\n /**\n * Chat completion using ONNX with KV cache\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n await this.initializeSession();\n\n const startTime = Date.now();\n const prompt = this.formatMessages(params.messages);\n\n try {\n // Tokenize input using optimized tiktoken\n const inputIds = this.encode(prompt);\n console.log(`📝 Input tokens: ${inputIds.length}`);\n\n // Initialize KV cache (reusable for batch)\n let pastKVCache = this.initializeKVCache(1, 0);\n\n // Track all generated tokens\n const allTokenIds = [...inputIds];\n const outputIds: number[] = [];\n\n // Pre-allocate tensor buffers for performance\n const maxSeqLen = inputIds.length + (params.maxTokens || this.config.maxTokens);\n\n // Autoregressive generation loop\n const maxNewTokens = params.maxTokens || this.config.maxTokens;\n\n for (let step = 0; step < maxNewTokens; step++) {\n // For first step, use all input tokens; for subsequent steps, use only last token\n const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];\n const currentSeqLen = currentInputIds.length;\n\n // Create input tensor for current step\n const inputTensor = new ort.Tensor(\n 'int64',\n BigInt64Array.from(currentInputIds.map(BigInt)),\n [1, currentSeqLen]\n );\n\n // Create attention mask for current step\n const totalSeqLen = allTokenIds.length;\n const attentionMask = new ort.Tensor(\n 'int64',\n BigInt64Array.from(Array(totalSeqLen).fill(1n)),\n [1, totalSeqLen]\n );\n\n // Build feeds with input, attention mask, and KV cache\n const feeds: Record<string, ort.Tensor> = {\n input_ids: inputTensor,\n attention_mask: attentionMask,\n ...pastKVCache\n };\n\n // Run inference\n const results = await this.session!.run(feeds);\n\n // Get logits for next token (last position)\n const logits = results.logits.data as Float32Array;\n const vocabSize = results.logits.dims[results.logits.dims.length - 1];\n\n // Extract logits for last token\n const lastTokenLogitsOffset = (currentSeqLen - 1) * vocabSize;\n\n // Apply temperature and get next token\n let nextToken = 0;\n let maxVal = -Infinity;\n\n for (let i = 0; i < vocabSize; i++) {\n const logit = logits[lastTokenLogitsOffset + i] / (params.temperature || this.config.temperature);\n if (logit > maxVal) {\n maxVal = logit;\n nextToken = i;\n }\n }\n\n // Add to output\n outputIds.push(nextToken);\n allTokenIds.push(nextToken);\n\n // Check for end token (2 is typical EOS for Phi models)\n if (nextToken === 2 || nextToken === 0) {\n console.log(`🛑 Stop token detected: ${nextToken}`);\n break;\n }\n\n // Update KV cache from outputs for next iteration\n pastKVCache = {};\n for (let i = 0; i < 32; i++) {\n pastKVCache[`past_key_values.${i}.key`] = results[`present.${i}.key`];\n pastKVCache[`past_key_values.${i}.value`] = results[`present.${i}.value`];\n }\n\n // Progress indicator\n if ((step + 1) % 10 === 0) {\n console.log(`🔄 Generated ${step + 1} tokens...`);\n }\n }\n\n // Decode output using optimized tiktoken\n const generatedText = this.decode(outputIds);\n const latency = Date.now() - startTime;\n const tokensPerSecond = (outputIds.length / (latency / 1000)).toFixed(1);\n\n console.log(`✅ Generated: ${generatedText}`);\n console.log(`⏱️ Latency: ${latency}ms (${tokensPerSecond} tokens/sec)`);\n\n const content: ContentBlock[] = [{\n type: 'text',\n text: generatedText.trim()\n }];\n\n return {\n id: `onnx-local-${Date.now()}`,\n model: this.config.modelPath,\n content,\n stopReason: 'end_turn',\n usage: {\n inputTokens: inputIds.length,\n outputTokens: outputIds.length\n },\n metadata: {\n provider: 'onnx-local',\n model: 'Phi-4-mini-instruct-onnx',\n latency,\n cost: 0, // Local inference is free\n executionProviders: this.config.executionProviders,\n tokensPerSecond: parseFloat(tokensPerSecond)\n }\n };\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInferenceError',\n message: `ONNX inference failed: ${error}`,\n provider: 'onnx-local',\n retryable: true\n };\n throw providerError;\n }\n }\n\n /**\n * Streaming not implemented (requires complex generation loop)\n */\n async *stream(params: ChatParams): AsyncGenerator<StreamChunk> {\n throw new Error('Streaming not yet implemented for ONNX local inference');\n }\n\n /**\n * Validate capabilities\n */\n validateCapabilities(features: string[]): boolean {\n const supported = ['chat'];\n return features.every(f => supported.includes(f));\n }\n\n /**\n * Get model info\n */\n getModelInfo() {\n return {\n modelPath: this.config.modelPath,\n executionProviders: this.config.executionProviders,\n initialized: this.session !== null,\n tokenizerLoaded: this.tiktoken !== null\n };\n }\n\n /**\n * Cleanup resources\n */\n async dispose(): Promise<void> {\n if (this.session) {\n // ONNX Runtime sessions don't have explicit disposal in Node.js\n this.session = null;\n }\n if (this.tiktoken) {\n this.tiktoken.free();\n this.tiktoken = null;\n }\n }\n}\n"]}
|
|
1
|
+
{"version":3,"file":"onnx-local.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,IAAI,GAAG,GAAQ,IAAI,CAAC;AACpB,IAAI,YAAY,GAAG,KAAK,CAAC;AAEzB,+CAA+C;AAC/C,IAAI,CAAC;IACH,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;IACvC,YAAY,GAAG,IAAI,CAAC;AACtB,CAAC;AAAC,MAAM,CAAC;IACP,OAAO,CAAC,IAAI,CAAC,kEAAkE,CAAC,CAAC;AACnF,CAAC;AAID,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAkBnF,MAAM,OAAO,iBAAiB;IAC5B,IAAI,GAAG,YAAY,CAAC;IACpB,IAAI,GAAG,QAAiB,CAAC;IACzB,iBAAiB,GAAG,KAAK,CAAC,CAAC,mDAAmD;IAC9E,aAAa,GAAG,KAAK,CAAC;IACtB,WAAW,GAAG,KAAK,CAAC;IAEZ,OAAO,GAAQ,IAAI,CAAC;IACpB,MAAM,CAA4B;IAClC,SAAS,GAAQ,IAAI,CAAC;IACtB,QAAQ,GAAQ,IAAI,CAAC;IAE7B,YAAY,SAA0B,EAAE;QACtC,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,4EAA4E;YAC3G,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa;QACzB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,IAAI,CAAC;YACH,qDAAqD;YACrD,IAAI,CAAC,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;YAE5C,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QAC3D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,IAAY;QACzB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,GAAa;QAC1B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3D,6CAA6C;YAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAChC,OAAO,OAAO,CAAC;YACjB,CAAC;iBAAM,IAAI,OAAO,YAAY,UAAU,IAAI,OAAO,YAAY,MAAM,EAAE,CAAC;gBACtE,OAAO,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC3C,CAAC;YACD,OAAO,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;YACxD,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB;QAC7B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO;QAEzB,IAAI,CAAC,YAAY,IAAI,CAAC,GAAG,EAAE,CAAC;YAC1B,MAAM,IAAI,KAAK,CAAC,6EAA6E,CAAC,CAAC;QACjG,CAAC;QAED,IAAI,CAAC;YACH,6BAA6B;YAC7B,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;YAEnD,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,CAAC,QAAQ,EAAE,EAAE;gBACnD,IAAI,QAAQ,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,iBAAiB;oBACnD,OAAO,CAAC,GAAG,CAAC,sBAAsB,eAAe,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,uCAAuC;YACvC,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC;YAElC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAE/D,IAAI,CAAC,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAC9C,IAAI,CAAC,MAAM,CAAC,SAAS,EACrB;gBACE,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAyB;gBACzD,sBAAsB,EAAE,KAAK;gBAC7B,iBAAiB,EAAE,IAAI;gBACvB,gBAAgB,EAAE,IAAI;aACvB,CACF,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEpF,iBAAiB;YACjB,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAE7B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,eAAe;gBACrB,OAAO,EAAE,oCAAoC,KAAK,EAAE;gBACpD,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC1B,MAAM,IAAI,eAAe,OAAO,WAAW,CAAC;YAC9C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC/B,MAAM,IAAI,aAAa,OAAO,WAAW,CAAC;YAC5C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACpC,MAAM,IAAI,kBAAkB,OAAO,WAAW,CAAC;YACjD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,iBAAiB,CAAC;QAC5B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,SAAiB,EAAE,cAAsB;QACjE,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,CAAC,CAAC;QACrB,MAAM,OAAO,GAAG,GAAG,CAAC,CAAC,kBAAkB;QACvC,MAAM,OAAO,GAAwB,EAAE,CAAC;QAExC,uDAAuD;QACvD,MAAM,WAAW,GAAI,GAAW,CAAC,MAAM,CAAC;QAExC,wDAAwD;QACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,uDAAuD;YACvD,MAAM,UAAU,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;YAEvC,OAAO,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,IAAI,WAAW,CACnD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;YAEF,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,IAAI,WAAW,CACrD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;QACJ,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEpD,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAEnD,2CAA2C;YAC3C,IAAI,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAE/C,6BAA6B;YAC7B,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;YAClC,MAAM,SAAS,GAAa,EAAE,CAAC;YAE/B,8CAA8C;YAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEhF,iCAAiC;YACjC,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;YAE/D,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,YAAY,EAAE,IAAI,EAAE,EAAE,CAAC;gBAC/C,kFAAkF;gBAClF,MAAM,eAAe,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClF,MAAM,aAAa,GAAG,eAAe,CAAC,MAAM,CAAC;gBAE7C,uDAAuD;gBACvD,MAAM,WAAW,GAAI,GAAW,CAAC,MAAM,CAAC;gBAExC,uCAAuC;gBACvC,MAAM,WAAW,GAAG,IAAI,WAAW,CACjC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,aAAa,CAAC,CACnB,CAAC;gBAEF,yCAAyC;gBACzC,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;gBACvC,MAAM,aAAa,GAAG,IAAI,WAAW,CACnC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,WAAW,CAAC,CACjB,CAAC;gBAEF,uDAAuD;gBACvD,MAAM,KAAK,GAAwB;oBACjC,SAAS,EAAE,WAAW;oBACtB,cAAc,EAAE,aAAa;oBAC7B,GAAG,WAAW;iBACf,CAAC;gBAEF,gBAAgB;gBAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAE/C,4CAA4C;gBAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,IAAoB,CAAC;gBACnD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAEtE,gCAAgC;gBAChC,MAAM,qBAAqB,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;gBAE9D,uCAAuC;gBACvC,IAAI,SAAS,GAAG,CAAC,CAAC;gBAClB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAC;gBAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnC,MAAM,KAAK,GAAG,MAAM,CAAC,qBAAqB,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;oBAClG,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;wBACnB,MAAM,GAAG,KAAK,CAAC;wBACf,SAAS,GAAG,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC1B,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAE5B,wDAAwD;gBACxD,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;oBACvC,OAAO,CAAC,GAAG,CAAC,2BAA2B,SAAS,EAAE,CAAC,CAAC;oBACpD,MAAM;gBACR,CAAC;gBAED,kDAAkD;gBAClD,WAAW,GAAG,EAAE,CAAC;gBACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,WAAW,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACtE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAC5E,CAAC;gBAED,qBAAqB;gBACrB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;YAED,yCAAyC;YACzC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACvC,MAAM,eAAe,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAEzE,OAAO,CAAC,GAAG,CAAC,gBAAgB,aAAa,EAAE,CAAC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,OAAO,eAAe,cAAc,CAAC,CAAC;YAEzE,MAAM,OAAO,GAAmB,CAAC;oBAC/B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,aAAa,CAAC,IAAI,EAAE;iBAC3B,CAAC,CAAC;YAEH,OAAO;gBACL,EAAE,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,EAAE;gBAC9B,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC5B,OAAO;gBACP,UAAU,EAAE,UAAU;gBACtB,KAAK,EAAE;oBACL,WAAW,EAAE,QAAQ,CAAC,MAAM;oBAC5B,YAAY,EAAE,SAAS,CAAC,MAAM;iBAC/B;gBACD,QAAQ,EAAE;oBACR,QAAQ,EAAE,YAAY;oBACtB,KAAK,EAAE,0BAA0B;oBACjC,OAAO;oBACP,IAAI,EAAE,CAAC,EAAE,0BAA0B;oBACnC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;oBAClD,eAAe,EAAE,UAAU,CAAC,eAAe,CAAC;iBAC7C;aACF,CAAC;QAEJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,oBAAoB;gBAC1B,OAAO,EAAE,0BAA0B,KAAK,EAAE;gBAC1C,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,IAAI;aAChB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,CAAC,MAAM,CAAC,MAAkB;QAC9B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC5E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,QAAkB;QACrC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;YAClD,WAAW,EAAE,IAAI,CAAC,OAAO,KAAK,IAAI;YAClC,eAAe,EAAE,IAAI,CAAC,QAAQ,KAAK,IAAI;SACxC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,gEAAgE;YAChE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * ONNX Runtime Local Inference Provider for Phi-4\n *\n * Uses onnxruntime-node for true local CPU/GPU inference\n * Falls back gracefully when native module isn't available (Windows)\n */\n\nlet ort: any = null;\nlet ortAvailable = false;\n\n// Dynamic import for optional onnxruntime-node\ntry {\n ort = await import('onnxruntime-node');\n ortAvailable = true;\n} catch {\n console.warn('[ONNX] onnxruntime-node not available - local inference disabled');\n}\n\nimport * as fs from 'fs';\nimport * as path from 'path';\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n LLMProvider,\n ChatParams,\n ChatResponse,\n StreamChunk,\n ProviderError,\n Message,\n ContentBlock\n} from '../types.js';\n\nexport interface ONNXLocalConfig {\n modelPath?: string;\n executionProviders?: string[];\n maxTokens?: number;\n temperature?: number;\n}\n\nexport class ONNXLocalProvider implements LLMProvider {\n name = 'onnx-local';\n type = 'custom' as const;\n supportsStreaming = false; // Streaming requires complex token generation loop\n supportsTools = false;\n supportsMCP = false;\n\n private session: any = null;\n private config: Required<ONNXLocalConfig>;\n private tokenizer: any = null;\n private tiktoken: any = null;\n\n constructor(config: ONNXLocalConfig = {}) {\n this.config = {\n modelPath: config.modelPath || './models/phi-4/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 100,\n temperature: config.temperature || 0.7\n };\n }\n\n /**\n * Load optimized tiktoken tokenizer (cl100k_base for Phi-4)\n */\n private async loadTokenizer(): Promise<void> {\n if (this.tiktoken) return;\n\n try {\n // Use cl100k_base encoding (GPT-4, similar to Phi-4)\n this.tiktoken = get_encoding('cl100k_base');\n\n console.log('✅ Tokenizer loaded (tiktoken cl100k_base)');\n } catch (error) {\n console.error('❌ Failed to load tiktoken:', error);\n throw new Error(`Tokenizer loading failed: ${error}`);\n }\n }\n\n /**\n * Encode text using tiktoken (fast BPE)\n */\n private encode(text: string): number[] {\n return Array.from(this.tiktoken.encode(text));\n }\n\n /**\n * Decode tokens using tiktoken\n */\n private decode(ids: number[]): string {\n try {\n const decoded = this.tiktoken.decode(new Uint32Array(ids));\n // tiktoken returns buffer, convert to string\n if (typeof decoded === 'string') {\n return decoded;\n } else if (decoded instanceof Uint8Array || decoded instanceof Buffer) {\n return new TextDecoder().decode(decoded);\n }\n return String(decoded);\n } catch (error) {\n console.warn('Decode error, returning raw IDs:', error);\n return ids.join(',');\n }\n }\n\n /**\n * Initialize ONNX session (with automatic model download)\n */\n private async initializeSession(): Promise<void> {\n if (this.session) return;\n\n if (!ortAvailable || !ort) {\n throw new Error('onnxruntime-node not available - install with: npm install onnxruntime-node');\n }\n\n try {\n // Ensure model is downloaded\n console.log(`🔍 Checking for Phi-4 ONNX model...`);\n\n const modelPath = await ensurePhi4Model((progress) => {\n if (progress.percentage % 10 < 1) { // Log every ~10%\n console.log(` 📥 Downloading: ${ModelDownloader.formatProgress(progress)}`);\n }\n });\n\n // Update config with actual model path\n this.config.modelPath = modelPath;\n\n console.log(`📦 Loading ONNX model: ${this.config.modelPath}`);\n\n this.session = await ort.InferenceSession.create(\n this.config.modelPath,\n {\n executionProviders: this.config.executionProviders as any,\n graphOptimizationLevel: 'all',\n enableCpuMemArena: true,\n enableMemPattern: true\n }\n );\n\n console.log(`✅ ONNX model loaded`);\n console.log(`🔧 Execution providers: ${this.config.executionProviders.join(', ')}`);\n\n // Load tokenizer\n await this.loadTokenizer();\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInitError',\n message: `Failed to initialize ONNX model: ${error}`,\n provider: 'onnx-local',\n retryable: false\n };\n throw providerError;\n }\n }\n\n /**\n * Format messages for Phi-4 chat template\n */\n private formatMessages(messages: Message[]): string {\n let prompt = '';\n\n for (const msg of messages) {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n if (msg.role === 'system') {\n prompt += `<|system|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'user') {\n prompt += `<|user|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'assistant') {\n prompt += `<|assistant|>\\n${content}<|end|>\\n`;\n }\n }\n\n prompt += '<|assistant|>\\n';\n return prompt;\n }\n\n /**\n * Initialize KV cache tensors for all 32 layers\n * Phi-4 architecture: 32 layers, 8 KV heads, 128 head_dim\n */\n private initializeKVCache(batchSize: number, sequenceLength: number) {\n const numLayers = 32;\n const numKVHeads = 8;\n const headDim = 128; // 3072 / 24 = 128\n const kvCache: Record<string, any> = {};\n\n // Get Tensor constructor - use any for flexible access\n const TensorClass = (ort as any).Tensor;\n\n // Initialize empty cache for each layer (key and value)\n for (let i = 0; i < numLayers; i++) {\n // Empty cache: [batch_size, num_kv_heads, 0, head_dim]\n const emptyCache = new Float32Array(0);\n\n kvCache[`past_key_values.${i}.key`] = new TensorClass(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n\n kvCache[`past_key_values.${i}.value`] = new TensorClass(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n }\n\n return kvCache;\n }\n\n /**\n * Chat completion using ONNX with KV cache\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n await this.initializeSession();\n\n const startTime = Date.now();\n const prompt = this.formatMessages(params.messages);\n\n try {\n // Tokenize input using optimized tiktoken\n const inputIds = this.encode(prompt);\n console.log(`📝 Input tokens: ${inputIds.length}`);\n\n // Initialize KV cache (reusable for batch)\n let pastKVCache = this.initializeKVCache(1, 0);\n\n // Track all generated tokens\n const allTokenIds = [...inputIds];\n const outputIds: number[] = [];\n\n // Pre-allocate tensor buffers for performance\n const maxSeqLen = inputIds.length + (params.maxTokens || this.config.maxTokens);\n\n // Autoregressive generation loop\n const maxNewTokens = params.maxTokens || this.config.maxTokens;\n\n for (let step = 0; step < maxNewTokens; step++) {\n // For first step, use all input tokens; for subsequent steps, use only last token\n const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];\n const currentSeqLen = currentInputIds.length;\n\n // Get Tensor constructor - use any for flexible access\n const TensorClass = (ort as any).Tensor;\n\n // Create input tensor for current step\n const inputTensor = new TensorClass(\n 'int64',\n BigInt64Array.from(currentInputIds.map(BigInt)),\n [1, currentSeqLen]\n );\n\n // Create attention mask for current step\n const totalSeqLen = allTokenIds.length;\n const attentionMask = new TensorClass(\n 'int64',\n BigInt64Array.from(Array(totalSeqLen).fill(1n)),\n [1, totalSeqLen]\n );\n\n // Build feeds with input, attention mask, and KV cache\n const feeds: Record<string, any> = {\n input_ids: inputTensor,\n attention_mask: attentionMask,\n ...pastKVCache\n };\n\n // Run inference\n const results = await this.session!.run(feeds);\n\n // Get logits for next token (last position)\n const logits = results.logits.data as Float32Array;\n const vocabSize = results.logits.dims[results.logits.dims.length - 1];\n\n // Extract logits for last token\n const lastTokenLogitsOffset = (currentSeqLen - 1) * vocabSize;\n\n // Apply temperature and get next token\n let nextToken = 0;\n let maxVal = -Infinity;\n\n for (let i = 0; i < vocabSize; i++) {\n const logit = logits[lastTokenLogitsOffset + i] / (params.temperature || this.config.temperature);\n if (logit > maxVal) {\n maxVal = logit;\n nextToken = i;\n }\n }\n\n // Add to output\n outputIds.push(nextToken);\n allTokenIds.push(nextToken);\n\n // Check for end token (2 is typical EOS for Phi models)\n if (nextToken === 2 || nextToken === 0) {\n console.log(`🛑 Stop token detected: ${nextToken}`);\n break;\n }\n\n // Update KV cache from outputs for next iteration\n pastKVCache = {};\n for (let i = 0; i < 32; i++) {\n pastKVCache[`past_key_values.${i}.key`] = results[`present.${i}.key`];\n pastKVCache[`past_key_values.${i}.value`] = results[`present.${i}.value`];\n }\n\n // Progress indicator\n if ((step + 1) % 10 === 0) {\n console.log(`🔄 Generated ${step + 1} tokens...`);\n }\n }\n\n // Decode output using optimized tiktoken\n const generatedText = this.decode(outputIds);\n const latency = Date.now() - startTime;\n const tokensPerSecond = (outputIds.length / (latency / 1000)).toFixed(1);\n\n console.log(`✅ Generated: ${generatedText}`);\n console.log(`⏱️ Latency: ${latency}ms (${tokensPerSecond} tokens/sec)`);\n\n const content: ContentBlock[] = [{\n type: 'text',\n text: generatedText.trim()\n }];\n\n return {\n id: `onnx-local-${Date.now()}`,\n model: this.config.modelPath,\n content,\n stopReason: 'end_turn',\n usage: {\n inputTokens: inputIds.length,\n outputTokens: outputIds.length\n },\n metadata: {\n provider: 'onnx-local',\n model: 'Phi-4-mini-instruct-onnx',\n latency,\n cost: 0, // Local inference is free\n executionProviders: this.config.executionProviders,\n tokensPerSecond: parseFloat(tokensPerSecond)\n }\n };\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInferenceError',\n message: `ONNX inference failed: ${error}`,\n provider: 'onnx-local',\n retryable: true\n };\n throw providerError;\n }\n }\n\n /**\n * Streaming not implemented (requires complex generation loop)\n */\n async *stream(params: ChatParams): AsyncGenerator<StreamChunk> {\n throw new Error('Streaming not yet implemented for ONNX local inference');\n }\n\n /**\n * Validate capabilities\n */\n validateCapabilities(features: string[]): boolean {\n const supported = ['chat'];\n return features.every(f => supported.includes(f));\n }\n\n /**\n * Get model info\n */\n getModelInfo() {\n return {\n modelPath: this.config.modelPath,\n executionProviders: this.config.executionProviders,\n initialized: this.session !== null,\n tokenizerLoaded: this.tiktoken !== null\n };\n }\n\n /**\n * Cleanup resources\n */\n async dispose(): Promise<void> {\n if (this.session) {\n // ONNX Runtime sessions don't have explicit disposal in Node.js\n this.session = null;\n }\n if (this.tiktoken) {\n this.tiktoken.free();\n this.tiktoken = null;\n }\n }\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"router.d.ts","sourceRoot":"","sources":["../../src/router/router.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,WAAW,EACX,YAAY,EACZ,UAAU,EACV,YAAY,EACZ,WAAW,EACX,YAAY,EACZ,aAAa,EAEd,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"router.d.ts","sourceRoot":"","sources":["../../src/router/router.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,WAAW,EACX,YAAY,EACZ,UAAU,EACV,YAAY,EACZ,WAAW,EACX,YAAY,EACZ,aAAa,EAEd,MAAM,YAAY,CAAC;AAOpB,qBAAa,WAAW;IACtB,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,SAAS,CAA6C;IAC9D,OAAO,CAAC,OAAO,CAAgB;gBAEnB,UAAU,CAAC,EAAE,MAAM;IAM/B,OAAO,CAAC,UAAU;IAwBlB,OAAO,CAAC,mBAAmB;IAiD3B,OAAO,CAAC,iBAAiB;IAwBzB,OAAO,CAAC,mBAAmB;IAkE3B,OAAO,CAAC,iBAAiB;IAUnB,IAAI,CAAC,MAAM,EAAE,UAAU,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAuBlE,MAAM,CAAC,MAAM,EAAE,UAAU,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,cAAc,CAAC,WAAW,CAAC;YAapE,cAAc;IAgC5B,OAAO,CAAC,kBAAkB;IAQ1B,OAAO,CAAC,aAAa;IAgBrB,OAAO,CAAC,WAAW;IAkBnB,OAAO,CAAC,YAAY;IAgBpB,OAAO,CAAC,mBAAmB;YAqBb,mBAAmB;IA4BjC,OAAO,CAAC,aAAa;IA8CrB,UAAU,IAAI,aAAa;IAI3B,SAAS,IAAI,YAAY;IAIzB,YAAY,IAAI,GAAG,CAAC,YAAY,EAAE,WAAW,CAAC;CAG/C"}
|