lynkr 8.0.0 → 9.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.lynkr/telemetry.db +0 -0
- package/.lynkr/telemetry.db-shm +0 -0
- package/.lynkr/telemetry.db-wal +0 -0
- package/README.md +196 -322
- package/lynkr-skill.tar.gz +0 -0
- package/package.json +4 -3
- package/src/api/openai-router.js +64 -13
- package/src/api/providers-handler.js +171 -3
- package/src/api/router.js +9 -2
- package/src/clients/circuit-breaker.js +10 -247
- package/src/clients/codex-process.js +342 -0
- package/src/clients/codex-utils.js +143 -0
- package/src/clients/databricks.js +210 -63
- package/src/clients/resilience.js +540 -0
- package/src/clients/retry.js +22 -167
- package/src/clients/standard-tools.js +23 -0
- package/src/config/index.js +77 -0
- package/src/context/compression.js +42 -9
- package/src/context/distill.js +492 -0
- package/src/orchestrator/index.js +48 -8
- package/src/routing/complexity-analyzer.js +258 -5
- package/src/routing/index.js +12 -2
- package/src/routing/latency-tracker.js +148 -0
- package/src/routing/model-tiers.js +2 -0
- package/src/routing/quality-scorer.js +113 -0
- package/src/routing/telemetry.js +464 -0
- package/src/server.js +13 -12
- package/src/tools/code-graph.js +538 -0
- package/src/tools/code-mode.js +304 -0
- package/src/tools/index.js +4 -0
- package/src/tools/lazy-loader.js +18 -0
- package/src/tools/mcp-remote.js +7 -0
- package/src/tools/smart-selection.js +11 -0
- package/src/tools/tinyfish.js +358 -0
- package/src/tools/truncate.js +1 -0
- package/src/utils/payload.js +206 -0
- package/src/utils/perf-timer.js +80 -0
- package/.github/FUNDING.yml +0 -15
- package/.github/workflows/README.md +0 -215
- package/.github/workflows/ci.yml +0 -69
- package/.github/workflows/index.yml +0 -62
- package/.github/workflows/web-tools-tests.yml +0 -56
- package/CITATIONS.bib +0 -6
- package/DEPLOYMENT.md +0 -1001
- package/LYNKR-TUI-PLAN.md +0 -984
- package/PERFORMANCE-REPORT.md +0 -866
- package/PLAN-per-client-model-routing.md +0 -252
- package/docs/42642f749da6234f41b6b425c3bb07c9.txt +0 -1
- package/docs/BingSiteAuth.xml +0 -4
- package/docs/docs-style.css +0 -478
- package/docs/docs.html +0 -198
- package/docs/google5be250e608e6da39.html +0 -1
- package/docs/index.html +0 -577
- package/docs/index.md +0 -584
- package/docs/robots.txt +0 -4
- package/docs/sitemap.xml +0 -44
- package/docs/style.css +0 -1223
- package/docs/toon-integration-spec.md +0 -130
- package/documentation/README.md +0 -101
- package/documentation/api.md +0 -806
- package/documentation/claude-code-cli.md +0 -679
- package/documentation/codex-cli.md +0 -397
- package/documentation/contributing.md +0 -571
- package/documentation/cursor-integration.md +0 -734
- package/documentation/docker.md +0 -874
- package/documentation/embeddings.md +0 -762
- package/documentation/faq.md +0 -713
- package/documentation/features.md +0 -403
- package/documentation/headroom.md +0 -519
- package/documentation/installation.md +0 -758
- package/documentation/memory-system.md +0 -476
- package/documentation/production.md +0 -636
- package/documentation/providers.md +0 -1009
- package/documentation/routing.md +0 -476
- package/documentation/testing.md +0 -629
- package/documentation/token-optimization.md +0 -325
- package/documentation/tools.md +0 -697
- package/documentation/troubleshooting.md +0 -969
- package/final-test.js +0 -33
- package/headroom-sidecar/config.py +0 -93
- package/headroom-sidecar/requirements.txt +0 -14
- package/headroom-sidecar/server.py +0 -451
- package/monitor-agents.sh +0 -31
- package/scripts/audit-log-reader.js +0 -399
- package/scripts/compact-dictionary.js +0 -204
- package/scripts/test-deduplication.js +0 -448
- package/src/db/database.sqlite +0 -0
- package/te +0 -11622
- package/test/README.md +0 -212
- package/test/azure-openai-config.test.js +0 -213
- package/test/azure-openai-error-resilience.test.js +0 -238
- package/test/azure-openai-format-conversion.test.js +0 -354
- package/test/azure-openai-integration.test.js +0 -287
- package/test/azure-openai-routing.test.js +0 -175
- package/test/azure-openai-streaming.test.js +0 -171
- package/test/bedrock-integration.test.js +0 -457
- package/test/comprehensive-test-suite.js +0 -928
- package/test/config-validation.test.js +0 -207
- package/test/cursor-integration.test.js +0 -484
- package/test/format-conversion.test.js +0 -578
- package/test/hybrid-routing-integration.test.js +0 -269
- package/test/hybrid-routing-performance.test.js +0 -428
- package/test/llamacpp-integration.test.js +0 -882
- package/test/lmstudio-integration.test.js +0 -347
- package/test/memory/extractor.test.js +0 -398
- package/test/memory/retriever.test.js +0 -613
- package/test/memory/retriever.test.js.bak +0 -585
- package/test/memory/search.test.js +0 -537
- package/test/memory/search.test.js.bak +0 -389
- package/test/memory/store.test.js +0 -344
- package/test/memory/store.test.js.bak +0 -312
- package/test/memory/surprise.test.js +0 -300
- package/test/memory-performance.test.js +0 -472
- package/test/openai-integration.test.js +0 -683
- package/test/openrouter-error-resilience.test.js +0 -418
- package/test/passthrough-mode.test.js +0 -385
- package/test/performance-benchmark.js +0 -351
- package/test/performance-tests.js +0 -528
- package/test/routing.test.js +0 -225
- package/test/toon-compression.test.js +0 -131
- package/test/web-tools.test.js +0 -329
- package/test-agents-simple.js +0 -43
- package/test-cli-connection.sh +0 -33
- package/test-learning-unit.js +0 -126
- package/test-learning.js +0 -112
- package/test-parallel-agents.sh +0 -124
- package/test-parallel-direct.js +0 -155
- package/test-subagents.sh +0 -117
|
@@ -1,269 +0,0 @@
|
|
|
1
|
-
const assert = require("assert");
|
|
2
|
-
const { describe, it, beforeEach, afterEach } = require("node:test");
|
|
3
|
-
|
|
4
|
-
describe("Hybrid Routing Integration Tests", () => {
|
|
5
|
-
let config;
|
|
6
|
-
let databricks;
|
|
7
|
-
let metrics;
|
|
8
|
-
let originalConfig;
|
|
9
|
-
|
|
10
|
-
beforeEach(() => {
|
|
11
|
-
// Clear module cache
|
|
12
|
-
delete require.cache[require.resolve("../src/config")];
|
|
13
|
-
delete require.cache[require.resolve("../src/clients/databricks")];
|
|
14
|
-
delete require.cache[require.resolve("../src/observability/metrics")];
|
|
15
|
-
delete require.cache[require.resolve("../src/clients/routing")];
|
|
16
|
-
|
|
17
|
-
// Store original config
|
|
18
|
-
originalConfig = { ...process.env };
|
|
19
|
-
|
|
20
|
-
// Set up test environment
|
|
21
|
-
process.env.DATABRICKS_API_KEY = "test-key";
|
|
22
|
-
process.env.DATABRICKS_API_BASE = "http://test.databricks.com";
|
|
23
|
-
process.env.MODEL_PROVIDER = "databricks";
|
|
24
|
-
|
|
25
|
-
// Set TIER_* to empty to prevent .env file values from being picked up by dotenv
|
|
26
|
-
process.env.TIER_SIMPLE = "";
|
|
27
|
-
process.env.TIER_MEDIUM = "";
|
|
28
|
-
process.env.TIER_COMPLEX = "";
|
|
29
|
-
process.env.TIER_REASONING = "";
|
|
30
|
-
});
|
|
31
|
-
|
|
32
|
-
afterEach(() => {
|
|
33
|
-
// Restore original environment
|
|
34
|
-
process.env = originalConfig;
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
describe("Configuration Validation", () => {
|
|
38
|
-
it("should use default OLLAMA_ENDPOINT when not specified", () => {
|
|
39
|
-
process.env.MODEL_PROVIDER = "ollama";
|
|
40
|
-
delete process.env.OLLAMA_ENDPOINT;
|
|
41
|
-
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
42
|
-
process.env.DATABRICKS_API_KEY = "test-key";
|
|
43
|
-
process.env.DATABRICKS_API_BASE = "http://test.com";
|
|
44
|
-
|
|
45
|
-
const config = require("../src/config");
|
|
46
|
-
|
|
47
|
-
// Should use default localhost:11434
|
|
48
|
-
assert.strictEqual(config.ollama.endpoint, "http://localhost:11434");
|
|
49
|
-
});
|
|
50
|
-
|
|
51
|
-
it("should reject invalid FALLBACK_PROVIDER", () => {
|
|
52
|
-
process.env.MODEL_PROVIDER = "ollama";
|
|
53
|
-
process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
|
|
54
|
-
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
55
|
-
process.env.FALLBACK_ENABLED = "true";
|
|
56
|
-
process.env.FALLBACK_PROVIDER = "invalid-provider";
|
|
57
|
-
|
|
58
|
-
assert.throws(() => {
|
|
59
|
-
require("../src/config");
|
|
60
|
-
}, /Unsupported FALLBACK_PROVIDER.*Valid options are/);
|
|
61
|
-
});
|
|
62
|
-
|
|
63
|
-
it("should reject circular fallback (ollama -> ollama)", () => {
|
|
64
|
-
process.env.MODEL_PROVIDER = "ollama";
|
|
65
|
-
process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
|
|
66
|
-
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
67
|
-
process.env.FALLBACK_ENABLED = "true";
|
|
68
|
-
process.env.FALLBACK_PROVIDER = "ollama";
|
|
69
|
-
// Enable tier routing so fallback validation runs
|
|
70
|
-
process.env.TIER_SIMPLE = "ollama:llama3.2";
|
|
71
|
-
process.env.TIER_MEDIUM = "ollama:llama3.2";
|
|
72
|
-
process.env.TIER_COMPLEX = "ollama:llama3.2";
|
|
73
|
-
process.env.TIER_REASONING = "ollama:llama3.2";
|
|
74
|
-
|
|
75
|
-
assert.throws(() => {
|
|
76
|
-
require("../src/config");
|
|
77
|
-
}, /FALLBACK_PROVIDER cannot be 'ollama'/);
|
|
78
|
-
});
|
|
79
|
-
|
|
80
|
-
it("should warn when databricks fallback has no credentials", () => {
|
|
81
|
-
process.env.MODEL_PROVIDER = "ollama";
|
|
82
|
-
process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
|
|
83
|
-
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
84
|
-
process.env.FALLBACK_ENABLED = "true";
|
|
85
|
-
process.env.FALLBACK_PROVIDER = "databricks";
|
|
86
|
-
// Set to empty strings instead of deleting (dotenv.config() in config module would reload from .env)
|
|
87
|
-
process.env.DATABRICKS_API_KEY = "";
|
|
88
|
-
process.env.DATABRICKS_API_BASE = "";
|
|
89
|
-
// Enable tier routing so fallback validation runs
|
|
90
|
-
process.env.TIER_SIMPLE = "ollama:llama3.2";
|
|
91
|
-
process.env.TIER_MEDIUM = "ollama:llama3.2";
|
|
92
|
-
process.env.TIER_COMPLEX = "ollama:llama3.2";
|
|
93
|
-
process.env.TIER_REASONING = "ollama:llama3.2";
|
|
94
|
-
|
|
95
|
-
// Should warn but not throw (fallback misconfigured)
|
|
96
|
-
const config = require("../src/config");
|
|
97
|
-
assert.strictEqual(config.modelProvider.fallbackProvider, "databricks");
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
it("should accept valid tier routing configuration", () => {
|
|
101
|
-
process.env.MODEL_PROVIDER = "ollama";
|
|
102
|
-
process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
|
|
103
|
-
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
104
|
-
process.env.FALLBACK_ENABLED = "true";
|
|
105
|
-
process.env.FALLBACK_PROVIDER = "databricks";
|
|
106
|
-
process.env.DATABRICKS_API_KEY = "test-key";
|
|
107
|
-
process.env.DATABRICKS_API_BASE = "http://test.com";
|
|
108
|
-
process.env.TIER_SIMPLE = "ollama:llama3.2";
|
|
109
|
-
process.env.TIER_MEDIUM = "ollama:llama3.2";
|
|
110
|
-
process.env.TIER_COMPLEX = "databricks:claude-sonnet";
|
|
111
|
-
process.env.TIER_REASONING = "databricks:claude-sonnet";
|
|
112
|
-
|
|
113
|
-
const config = require("../src/config");
|
|
114
|
-
|
|
115
|
-
assert.strictEqual(config.modelProvider.type, "ollama");
|
|
116
|
-
assert.strictEqual(config.modelProvider.fallbackEnabled, true);
|
|
117
|
-
assert.strictEqual(config.modelProvider.fallbackProvider, "databricks");
|
|
118
|
-
assert.strictEqual(config.modelTiers.enabled, true);
|
|
119
|
-
});
|
|
120
|
-
});
|
|
121
|
-
|
|
122
|
-
describe("Metrics Recording", () => {
|
|
123
|
-
beforeEach(() => {
|
|
124
|
-
process.env.MODEL_PROVIDER = "ollama";
|
|
125
|
-
process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
|
|
126
|
-
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
127
|
-
|
|
128
|
-
config = require("../src/config");
|
|
129
|
-
const metricsModule = require("../src/observability/metrics");
|
|
130
|
-
metrics = metricsModule.getMetricsCollector();
|
|
131
|
-
});
|
|
132
|
-
|
|
133
|
-
it("should record provider routing", () => {
|
|
134
|
-
metrics.recordProviderRouting("ollama");
|
|
135
|
-
metrics.recordProviderRouting("ollama");
|
|
136
|
-
metrics.recordProviderRouting("databricks");
|
|
137
|
-
|
|
138
|
-
const snapshot = metrics.getMetrics();
|
|
139
|
-
|
|
140
|
-
assert.deepStrictEqual(snapshot.routing.by_provider, {
|
|
141
|
-
ollama: 2,
|
|
142
|
-
databricks: 1,
|
|
143
|
-
});
|
|
144
|
-
});
|
|
145
|
-
|
|
146
|
-
it("should record provider success with latency", () => {
|
|
147
|
-
metrics.recordProviderSuccess("ollama", 450);
|
|
148
|
-
metrics.recordProviderSuccess("ollama", 600);
|
|
149
|
-
metrics.recordProviderSuccess("databricks", 1500);
|
|
150
|
-
|
|
151
|
-
const snapshot = metrics.getMetrics();
|
|
152
|
-
|
|
153
|
-
assert.strictEqual(snapshot.routing.successes_by_provider.ollama, 2);
|
|
154
|
-
assert.strictEqual(snapshot.routing.successes_by_provider.databricks, 1);
|
|
155
|
-
assert.strictEqual(snapshot.cost_savings.ollama_latency_ms.mean, 525);
|
|
156
|
-
});
|
|
157
|
-
|
|
158
|
-
it("should record fallback attempts with reasons", () => {
|
|
159
|
-
metrics.recordFallbackAttempt("ollama", "databricks", "circuit_breaker");
|
|
160
|
-
metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
|
|
161
|
-
metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
|
|
162
|
-
|
|
163
|
-
const snapshot = metrics.getMetrics();
|
|
164
|
-
|
|
165
|
-
assert.strictEqual(snapshot.fallback.attempts_total, 3);
|
|
166
|
-
assert.deepStrictEqual(snapshot.fallback.reasons, {
|
|
167
|
-
circuit_breaker: 1,
|
|
168
|
-
timeout: 2,
|
|
169
|
-
});
|
|
170
|
-
});
|
|
171
|
-
|
|
172
|
-
it("should calculate fallback success rate", () => {
|
|
173
|
-
metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
|
|
174
|
-
metrics.recordFallbackSuccess(1200);
|
|
175
|
-
metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
|
|
176
|
-
metrics.recordFallbackSuccess(1100);
|
|
177
|
-
metrics.recordFallbackAttempt("ollama", "databricks", "circuit_breaker");
|
|
178
|
-
metrics.recordFallbackFailure();
|
|
179
|
-
|
|
180
|
-
const snapshot = metrics.getMetrics();
|
|
181
|
-
|
|
182
|
-
assert.strictEqual(snapshot.fallback.attempts_total, 3);
|
|
183
|
-
assert.strictEqual(snapshot.fallback.successes_total, 2);
|
|
184
|
-
assert.strictEqual(snapshot.fallback.failures_total, 1);
|
|
185
|
-
assert.strictEqual(snapshot.fallback.success_rate, "66.67%");
|
|
186
|
-
});
|
|
187
|
-
|
|
188
|
-
it("should record cost savings", () => {
|
|
189
|
-
// Simulate 100 tokens input, 50 tokens output
|
|
190
|
-
// Input: 100/1M * $3 = $0.0003
|
|
191
|
-
// Output: 50/1M * $15 = $0.00075
|
|
192
|
-
// Total: $0.00105
|
|
193
|
-
metrics.recordCostSavings(0.00105);
|
|
194
|
-
metrics.recordCostSavings(0.00105);
|
|
195
|
-
metrics.recordCostSavings(0.00105);
|
|
196
|
-
|
|
197
|
-
const snapshot = metrics.getMetrics();
|
|
198
|
-
|
|
199
|
-
assert.strictEqual(snapshot.cost_savings.ollama_savings_usd, "0.0032");
|
|
200
|
-
});
|
|
201
|
-
|
|
202
|
-
it("should reset all routing metrics", () => {
|
|
203
|
-
metrics.recordProviderRouting("ollama");
|
|
204
|
-
metrics.recordProviderSuccess("ollama", 450);
|
|
205
|
-
metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
|
|
206
|
-
metrics.recordFallbackSuccess(1200);
|
|
207
|
-
metrics.recordCostSavings(1.5);
|
|
208
|
-
|
|
209
|
-
metrics.reset();
|
|
210
|
-
|
|
211
|
-
const snapshot = metrics.getMetrics();
|
|
212
|
-
|
|
213
|
-
assert.deepStrictEqual(snapshot.routing.by_provider, {});
|
|
214
|
-
assert.deepStrictEqual(snapshot.routing.successes_by_provider, {});
|
|
215
|
-
assert.strictEqual(snapshot.fallback.attempts_total, 0);
|
|
216
|
-
assert.strictEqual(snapshot.fallback.successes_total, 0);
|
|
217
|
-
assert.strictEqual(snapshot.cost_savings.ollama_savings_usd, "0.0000");
|
|
218
|
-
});
|
|
219
|
-
});
|
|
220
|
-
|
|
221
|
-
describe("Helper Functions", () => {
|
|
222
|
-
it("should categorize circuit breaker errors", () => {
|
|
223
|
-
// This would need to be tested by importing the function if exported
|
|
224
|
-
// For now, we test via the integrated behavior
|
|
225
|
-
process.env.MODEL_PROVIDER = "ollama";
|
|
226
|
-
process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
|
|
227
|
-
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
228
|
-
|
|
229
|
-
config = require("../src/config");
|
|
230
|
-
const metricsModule = require("../src/observability/metrics");
|
|
231
|
-
metrics = metricsModule.getMetricsCollector();
|
|
232
|
-
|
|
233
|
-
// Simulate categorization
|
|
234
|
-
const circuitBreakerError = new Error("Circuit breaker open");
|
|
235
|
-
circuitBreakerError.name = "CircuitBreakerError";
|
|
236
|
-
|
|
237
|
-
const timeoutError = new Error("Request timeout");
|
|
238
|
-
timeoutError.code = "ETIMEDOUT";
|
|
239
|
-
|
|
240
|
-
const unavailableError = new Error("Service not available");
|
|
241
|
-
unavailableError.code = "ECONNREFUSED";
|
|
242
|
-
|
|
243
|
-
// These would be categorized in the actual invokeModel function
|
|
244
|
-
// Here we just verify the structure exists
|
|
245
|
-
assert.ok(metrics.recordFallbackAttempt);
|
|
246
|
-
});
|
|
247
|
-
|
|
248
|
-
it("should estimate cost savings correctly", () => {
|
|
249
|
-
process.env.MODEL_PROVIDER = "ollama";
|
|
250
|
-
process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
|
|
251
|
-
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
252
|
-
|
|
253
|
-
config = require("../src/config");
|
|
254
|
-
const metricsModule = require("../src/observability/metrics");
|
|
255
|
-
metrics = metricsModule.getMetricsCollector();
|
|
256
|
-
|
|
257
|
-
// Test: 1000 input tokens, 500 output tokens
|
|
258
|
-
// Input cost: 1000/1M * $3 = $0.003
|
|
259
|
-
// Output cost: 500/1M * $15 = $0.0075
|
|
260
|
-
// Total: $0.0105
|
|
261
|
-
const inputTokens = 1000;
|
|
262
|
-
const outputTokens = 500;
|
|
263
|
-
const expectedSavings =
|
|
264
|
-
(inputTokens / 1_000_000) * 3.0 + (outputTokens / 1_000_000) * 15.0;
|
|
265
|
-
|
|
266
|
-
assert.strictEqual(expectedSavings.toFixed(4), "0.0105");
|
|
267
|
-
});
|
|
268
|
-
});
|
|
269
|
-
});
|
|
@@ -1,428 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Hybrid Routing Performance Tests
|
|
5
|
-
*
|
|
6
|
-
* Measures the performance impact of the hybrid routing system:
|
|
7
|
-
* - Routing decision overhead
|
|
8
|
-
* - Provider determination speed
|
|
9
|
-
* - Metrics collection overhead
|
|
10
|
-
* - Fallback logic performance
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
const { performance } = require('perf_hooks');
|
|
14
|
-
const assert = require('assert');
|
|
15
|
-
|
|
16
|
-
// Color utilities
|
|
17
|
-
const colors = {
|
|
18
|
-
reset: '\x1b[0m',
|
|
19
|
-
bright: '\x1b[1m',
|
|
20
|
-
green: '\x1b[32m',
|
|
21
|
-
yellow: '\x1b[33m',
|
|
22
|
-
blue: '\x1b[34m',
|
|
23
|
-
red: '\x1b[31m',
|
|
24
|
-
cyan: '\x1b[36m',
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
function log(message, color = 'reset') {
|
|
28
|
-
console.log(`${colors[color]}${message}${colors.reset}`);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
function section(title) {
|
|
32
|
-
console.log('\n' + '='.repeat(70));
|
|
33
|
-
log(title, 'bright');
|
|
34
|
-
console.log('='.repeat(70));
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
function benchmark(name, iterations, fn) {
|
|
38
|
-
const start = performance.now();
|
|
39
|
-
for (let i = 0; i < iterations; i++) {
|
|
40
|
-
fn();
|
|
41
|
-
}
|
|
42
|
-
const duration = performance.now() - start;
|
|
43
|
-
const avgTime = duration / iterations;
|
|
44
|
-
const throughput = (iterations / duration) * 1000;
|
|
45
|
-
|
|
46
|
-
return { duration, avgTime, throughput };
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
// =============================================================================
|
|
50
|
-
// TEST 1: Routing Decision Performance
|
|
51
|
-
// =============================================================================
|
|
52
|
-
function testRoutingDecisionPerformance() {
|
|
53
|
-
section('TEST 1: Routing Decision Performance');
|
|
54
|
-
|
|
55
|
-
// Clear module cache and set up environment
|
|
56
|
-
delete require.cache[require.resolve('../src/config')];
|
|
57
|
-
delete require.cache[require.resolve('../src/clients/routing')];
|
|
58
|
-
|
|
59
|
-
process.env.MODEL_PROVIDER = 'ollama';
|
|
60
|
-
process.env.OLLAMA_ENDPOINT = 'http://localhost:11434';
|
|
61
|
-
process.env.OLLAMA_MODEL = 'qwen2.5-coder:latest';
|
|
62
|
-
process.env.DATABRICKS_API_KEY = 'test-key';
|
|
63
|
-
process.env.DATABRICKS_API_BASE = 'http://test.com';
|
|
64
|
-
// Set TIER_* to empty = tier routing disabled, determineProviderSync returns static provider
|
|
65
|
-
process.env.TIER_SIMPLE = "";
|
|
66
|
-
process.env.TIER_MEDIUM = "";
|
|
67
|
-
process.env.TIER_COMPLEX = "";
|
|
68
|
-
process.env.TIER_REASONING = "";
|
|
69
|
-
|
|
70
|
-
const routing = require('../src/clients/routing');
|
|
71
|
-
|
|
72
|
-
log('\n Benchmarking routing decisions...', 'cyan');
|
|
73
|
-
|
|
74
|
-
// Test 1: Simple request (0 tools)
|
|
75
|
-
const simplePayload = {
|
|
76
|
-
messages: [{ role: 'user', content: 'test' }],
|
|
77
|
-
tools: []
|
|
78
|
-
};
|
|
79
|
-
|
|
80
|
-
const { duration: simpleTime, throughput: simpleThroughput } = benchmark(
|
|
81
|
-
'Simple request routing',
|
|
82
|
-
100000,
|
|
83
|
-
() => routing.determineProviderSync(simplePayload)
|
|
84
|
-
);
|
|
85
|
-
|
|
86
|
-
log(` Simple request: ${simpleTime.toFixed(2)}ms for 100k decisions`, 'cyan');
|
|
87
|
-
log(` Average: ${(simpleTime / 100000).toFixed(6)}ms per decision`, 'blue');
|
|
88
|
-
log(` Throughput: ${simpleThroughput.toLocaleString()} decisions/sec`, 'green');
|
|
89
|
-
|
|
90
|
-
// Test 2: Complex request (5 tools)
|
|
91
|
-
const complexPayload = {
|
|
92
|
-
messages: [{ role: 'user', content: 'test' }],
|
|
93
|
-
tools: [
|
|
94
|
-
{ name: 'tool1' }, { name: 'tool2' }, { name: 'tool3' },
|
|
95
|
-
{ name: 'tool4' }, { name: 'tool5' }
|
|
96
|
-
]
|
|
97
|
-
};
|
|
98
|
-
|
|
99
|
-
const { duration: complexTime, throughput: complexThroughput } = benchmark(
|
|
100
|
-
'Complex request routing',
|
|
101
|
-
100000,
|
|
102
|
-
() => routing.determineProviderSync(complexPayload)
|
|
103
|
-
);
|
|
104
|
-
|
|
105
|
-
log(` Complex request: ${complexTime.toFixed(2)}ms for 100k decisions`, 'cyan');
|
|
106
|
-
log(` Average: ${(complexTime / 100000).toFixed(6)}ms per decision`, 'blue');
|
|
107
|
-
log(` Throughput: ${complexThroughput.toLocaleString()} decisions/sec`, 'green');
|
|
108
|
-
|
|
109
|
-
// Test 3: Tool capability check
|
|
110
|
-
const toolCapabilityPayload = {
|
|
111
|
-
messages: [{ role: 'user', content: 'test' }],
|
|
112
|
-
tools: [{ name: 'tool1' }]
|
|
113
|
-
};
|
|
114
|
-
|
|
115
|
-
const { duration: toolCheckTime, throughput: toolCheckThroughput } = benchmark(
|
|
116
|
-
'Tool capability check',
|
|
117
|
-
100000,
|
|
118
|
-
() => routing.determineProviderSync(toolCapabilityPayload)
|
|
119
|
-
);
|
|
120
|
-
|
|
121
|
-
log(` Tool capability check: ${toolCheckTime.toFixed(2)}ms for 100k decisions`, 'cyan');
|
|
122
|
-
log(` Average: ${(toolCheckTime / 100000).toFixed(6)}ms per decision`, 'blue');
|
|
123
|
-
log(` Throughput: ${toolCheckThroughput.toLocaleString()} decisions/sec`, 'green');
|
|
124
|
-
|
|
125
|
-
// Analysis
|
|
126
|
-
log('\n Analysis:', 'yellow');
|
|
127
|
-
log(` Routing adds <0.01ms per request (negligible overhead)`, 'green');
|
|
128
|
-
log(` Throughput: ${simpleThroughput.toLocaleString()} decisions/sec`, 'green');
|
|
129
|
-
log(` Routing is extremely fast and won't impact request latency`, 'green');
|
|
130
|
-
|
|
131
|
-
return {
|
|
132
|
-
simpleTime,
|
|
133
|
-
complexTime,
|
|
134
|
-
toolCheckTime,
|
|
135
|
-
avgDecisionTime: (simpleTime + complexTime + toolCheckTime) / 3 / 100000
|
|
136
|
-
};
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
// =============================================================================
|
|
140
|
-
// TEST 2: Metrics Collection Overhead
|
|
141
|
-
// =============================================================================
|
|
142
|
-
function testMetricsOverhead() {
|
|
143
|
-
section('TEST 2: Metrics Collection Overhead');
|
|
144
|
-
|
|
145
|
-
delete require.cache[require.resolve('../src/observability/metrics')];
|
|
146
|
-
const { getMetricsCollector } = require('../src/observability/metrics');
|
|
147
|
-
const metrics = getMetricsCollector();
|
|
148
|
-
|
|
149
|
-
log('\n Benchmarking metrics operations...', 'cyan');
|
|
150
|
-
|
|
151
|
-
// Test recording provider routing
|
|
152
|
-
const { duration: routingTime, throughput: routingThroughput } = benchmark(
|
|
153
|
-
'Record provider routing',
|
|
154
|
-
100000,
|
|
155
|
-
() => metrics.recordProviderRouting('ollama')
|
|
156
|
-
);
|
|
157
|
-
|
|
158
|
-
log(` Provider routing: ${routingTime.toFixed(2)}ms for 100k recordings`, 'cyan');
|
|
159
|
-
log(` Average: ${(routingTime / 100000).toFixed(6)}ms per record`, 'blue');
|
|
160
|
-
log(` Throughput: ${routingThroughput.toLocaleString()} ops/sec`, 'green');
|
|
161
|
-
|
|
162
|
-
// Test recording provider success
|
|
163
|
-
const { duration: successTime, throughput: successThroughput } = benchmark(
|
|
164
|
-
'Record provider success',
|
|
165
|
-
100000,
|
|
166
|
-
() => metrics.recordProviderSuccess('ollama', 450)
|
|
167
|
-
);
|
|
168
|
-
|
|
169
|
-
log(` Provider success: ${successTime.toFixed(2)}ms for 100k recordings`, 'cyan');
|
|
170
|
-
log(` Average: ${(successTime / 100000).toFixed(6)}ms per record`, 'blue');
|
|
171
|
-
log(` Throughput: ${successThroughput.toLocaleString()} ops/sec`, 'green');
|
|
172
|
-
|
|
173
|
-
// Test recording fallback attempts
|
|
174
|
-
const { duration: fallbackTime, throughput: fallbackThroughput } = benchmark(
|
|
175
|
-
'Record fallback attempt',
|
|
176
|
-
100000,
|
|
177
|
-
() => metrics.recordFallbackAttempt('ollama', 'databricks', 'timeout')
|
|
178
|
-
);
|
|
179
|
-
|
|
180
|
-
log(` Fallback attempts: ${fallbackTime.toFixed(2)}ms for 100k recordings`, 'cyan');
|
|
181
|
-
log(` Average: ${(fallbackTime / 100000).toFixed(6)}ms per record`, 'blue');
|
|
182
|
-
log(` Throughput: ${fallbackThroughput.toLocaleString()} ops/sec`, 'green');
|
|
183
|
-
|
|
184
|
-
// Test cost savings recording
|
|
185
|
-
const { duration: costTime, throughput: costThroughput } = benchmark(
|
|
186
|
-
'Record cost savings',
|
|
187
|
-
100000,
|
|
188
|
-
() => metrics.recordCostSavings(0.001)
|
|
189
|
-
);
|
|
190
|
-
|
|
191
|
-
log(` Cost savings: ${costTime.toFixed(2)}ms for 100k recordings`, 'cyan');
|
|
192
|
-
log(` Average: ${(costTime / 100000).toFixed(6)}ms per record`, 'blue');
|
|
193
|
-
log(` Throughput: ${costThroughput.toLocaleString()} ops/sec`, 'green');
|
|
194
|
-
|
|
195
|
-
// Analysis
|
|
196
|
-
const avgMetricsTime = (routingTime + successTime + fallbackTime + costTime) / 4 / 100000;
|
|
197
|
-
log('\n Analysis:', 'yellow');
|
|
198
|
-
log(` Average metrics overhead: ${avgMetricsTime.toFixed(6)}ms per operation`, 'green');
|
|
199
|
-
log(` Metrics collection is extremely lightweight`, 'green');
|
|
200
|
-
|
|
201
|
-
return {
|
|
202
|
-
routingTime,
|
|
203
|
-
successTime,
|
|
204
|
-
fallbackTime,
|
|
205
|
-
costTime,
|
|
206
|
-
avgMetricsTime
|
|
207
|
-
};
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
// =============================================================================
|
|
211
|
-
// TEST 3: Combined Hybrid Routing Stack
|
|
212
|
-
// =============================================================================
|
|
213
|
-
function testCombinedStack() {
|
|
214
|
-
section('TEST 3: Combined Hybrid Routing Stack Performance');
|
|
215
|
-
|
|
216
|
-
delete require.cache[require.resolve('../src/config')];
|
|
217
|
-
delete require.cache[require.resolve('../src/clients/routing')];
|
|
218
|
-
delete require.cache[require.resolve('../src/observability/metrics')];
|
|
219
|
-
|
|
220
|
-
process.env.MODEL_PROVIDER = 'ollama';
|
|
221
|
-
process.env.OLLAMA_ENDPOINT = 'http://localhost:11434';
|
|
222
|
-
process.env.OLLAMA_MODEL = 'qwen2.5-coder:latest';
|
|
223
|
-
// Set TIER_* to empty = static routing via determineProviderSync
|
|
224
|
-
process.env.TIER_SIMPLE = "";
|
|
225
|
-
process.env.TIER_MEDIUM = "";
|
|
226
|
-
process.env.TIER_COMPLEX = "";
|
|
227
|
-
process.env.TIER_REASONING = "";
|
|
228
|
-
|
|
229
|
-
const routing = require('../src/clients/routing');
|
|
230
|
-
const { getMetricsCollector } = require('../src/observability/metrics');
|
|
231
|
-
|
|
232
|
-
log('\n Benchmarking complete routing + metrics stack...', 'cyan');
|
|
233
|
-
|
|
234
|
-
// Simulate full routing decision + metrics recording
|
|
235
|
-
const payload = {
|
|
236
|
-
messages: [{ role: 'user', content: 'test' }],
|
|
237
|
-
tools: []
|
|
238
|
-
};
|
|
239
|
-
|
|
240
|
-
const { duration: fullTime, throughput: fullThroughput } = benchmark(
|
|
241
|
-
'Full routing stack',
|
|
242
|
-
50000,
|
|
243
|
-
() => {
|
|
244
|
-
const metrics = getMetricsCollector();
|
|
245
|
-
const provider = routing.determineProviderSync(payload);
|
|
246
|
-
metrics.recordProviderRouting(provider);
|
|
247
|
-
metrics.recordProviderSuccess(provider, 450);
|
|
248
|
-
}
|
|
249
|
-
);
|
|
250
|
-
|
|
251
|
-
log(` Full stack: ${fullTime.toFixed(2)}ms for 50k operations`, 'cyan');
|
|
252
|
-
log(` Average: ${(fullTime / 50000).toFixed(6)}ms per request`, 'blue');
|
|
253
|
-
log(` Throughput: ${fullThroughput.toLocaleString()} ops/sec`, 'green');
|
|
254
|
-
|
|
255
|
-
// Analysis
|
|
256
|
-
log('\n Analysis:', 'yellow');
|
|
257
|
-
const overhead = (fullTime / 50000);
|
|
258
|
-
log(` Total routing + metrics overhead: ${overhead.toFixed(6)}ms`, 'green');
|
|
259
|
-
log(` Negligible impact on request latency (<0.02ms)`, 'green');
|
|
260
|
-
|
|
261
|
-
return {
|
|
262
|
-
fullTime,
|
|
263
|
-
fullThroughput,
|
|
264
|
-
overhead
|
|
265
|
-
};
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
// =============================================================================
|
|
269
|
-
// TEST 4: Helper Function Performance
|
|
270
|
-
// =============================================================================
|
|
271
|
-
function testHelperFunctions() {
|
|
272
|
-
section('TEST 4: Helper Function Performance');
|
|
273
|
-
|
|
274
|
-
delete require.cache[require.resolve('../src/clients/databricks')];
|
|
275
|
-
|
|
276
|
-
log('\n Benchmarking helper functions...', 'cyan');
|
|
277
|
-
|
|
278
|
-
// Test categorizeFailure (we'll simulate it)
|
|
279
|
-
const categorizeFailure = (error) => {
|
|
280
|
-
if (error.name === 'CircuitBreakerError' || error.code === 'circuit_breaker_open') {
|
|
281
|
-
return 'circuit_breaker';
|
|
282
|
-
}
|
|
283
|
-
if (error.name === 'AbortError' || error.code === 'ETIMEDOUT') {
|
|
284
|
-
return 'timeout';
|
|
285
|
-
}
|
|
286
|
-
if (error.message?.includes('not configured') ||
|
|
287
|
-
error.message?.includes('not available') ||
|
|
288
|
-
error.code === 'ECONNREFUSED') {
|
|
289
|
-
return 'service_unavailable';
|
|
290
|
-
}
|
|
291
|
-
return 'error';
|
|
292
|
-
};
|
|
293
|
-
|
|
294
|
-
const testErrors = [
|
|
295
|
-
{ name: 'CircuitBreakerError', message: 'Circuit breaker open' },
|
|
296
|
-
{ name: 'AbortError', message: 'Timeout' },
|
|
297
|
-
{ code: 'ECONNREFUSED', message: 'Connection refused' },
|
|
298
|
-
{ message: 'Generic error' }
|
|
299
|
-
];
|
|
300
|
-
|
|
301
|
-
const { duration: categorizeTime, throughput: categorizeThroughput } = benchmark(
|
|
302
|
-
'Categorize failure',
|
|
303
|
-
100000,
|
|
304
|
-
() => {
|
|
305
|
-
testErrors.forEach(err => categorizeFailure(err));
|
|
306
|
-
}
|
|
307
|
-
);
|
|
308
|
-
|
|
309
|
-
log(` Categorize failure: ${categorizeTime.toFixed(2)}ms for 400k operations`, 'cyan');
|
|
310
|
-
log(` Average: ${(categorizeTime / 400000).toFixed(6)}ms per categorization`, 'blue');
|
|
311
|
-
log(` Throughput: ${(categorizeThroughput * 4).toLocaleString()} ops/sec`, 'green');
|
|
312
|
-
|
|
313
|
-
// Test estimateCostSavings
|
|
314
|
-
const estimateCostSavings = (inputTokens, outputTokens) => {
|
|
315
|
-
const INPUT_COST_PER_1M = 3.00;
|
|
316
|
-
const OUTPUT_COST_PER_1M = 15.00;
|
|
317
|
-
const inputCost = (inputTokens / 1_000_000) * INPUT_COST_PER_1M;
|
|
318
|
-
const outputCost = (outputTokens / 1_000_000) * OUTPUT_COST_PER_1M;
|
|
319
|
-
return inputCost + outputCost;
|
|
320
|
-
};
|
|
321
|
-
|
|
322
|
-
const { duration: costCalcTime, throughput: costCalcThroughput } = benchmark(
|
|
323
|
-
'Estimate cost savings',
|
|
324
|
-
100000,
|
|
325
|
-
() => estimateCostSavings(1000, 500)
|
|
326
|
-
);
|
|
327
|
-
|
|
328
|
-
log(` Cost estimation: ${costCalcTime.toFixed(2)}ms for 100k calculations`, 'cyan');
|
|
329
|
-
log(` Average: ${(costCalcTime / 100000).toFixed(6)}ms per calculation`, 'blue');
|
|
330
|
-
log(` Throughput: ${costCalcThroughput.toLocaleString()} ops/sec`, 'green');
|
|
331
|
-
|
|
332
|
-
log('\n Analysis:', 'yellow');
|
|
333
|
-
log(` Helper functions add negligible overhead (<0.001ms)`, 'green');
|
|
334
|
-
log(` No performance impact from utility functions`, 'green');
|
|
335
|
-
|
|
336
|
-
return {
|
|
337
|
-
categorizeTime,
|
|
338
|
-
costCalcTime
|
|
339
|
-
};
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
// =============================================================================
|
|
343
|
-
// FINAL REPORT
|
|
344
|
-
// =============================================================================
|
|
345
|
-
function printFinalReport(results) {
|
|
346
|
-
section('HYBRID ROUTING PERFORMANCE SUMMARY');
|
|
347
|
-
|
|
348
|
-
console.log('\n');
|
|
349
|
-
console.log('+---------------------------------------------------------+');
|
|
350
|
-
console.log('| HYBRID ROUTING PERFORMANCE |');
|
|
351
|
-
console.log('+---------------------------------------------------------+');
|
|
352
|
-
|
|
353
|
-
log(`| 1. Routing Decisions |`, 'bright');
|
|
354
|
-
log(`| Average: ${results.routing.avgDecisionTime.toFixed(6)}ms per decision |`, 'cyan');
|
|
355
|
-
log(`| Overhead: ${colors.green}Negligible (<0.01ms)${colors.reset} |`);
|
|
356
|
-
|
|
357
|
-
console.log('+---------------------------------------------------------+');
|
|
358
|
-
|
|
359
|
-
log(`| 2. Metrics Collection |`, 'bright');
|
|
360
|
-
log(`| Average: ${results.metrics.avgMetricsTime.toFixed(6)}ms per operation |`, 'cyan');
|
|
361
|
-
log(`| Overhead: ${colors.green}Negligible (<0.01ms)${colors.reset} |`);
|
|
362
|
-
|
|
363
|
-
console.log('+---------------------------------------------------------+');
|
|
364
|
-
|
|
365
|
-
log(`| 3. Full Routing Stack |`, 'bright');
|
|
366
|
-
log(`| Average: ${results.combined.overhead.toFixed(6)}ms per request |`, 'cyan');
|
|
367
|
-
log(`| Throughput: ${results.combined.fullThroughput.toLocaleString()} ops/sec |`, 'cyan');
|
|
368
|
-
log(`| Impact: ${colors.green}Negligible (<0.02ms)${colors.reset} |`);
|
|
369
|
-
|
|
370
|
-
console.log('+---------------------------------------------------------+');
|
|
371
|
-
|
|
372
|
-
log(`| 4. Helper Functions |`, 'bright');
|
|
373
|
-
log(`| Overhead: ${colors.green}Negligible (<0.001ms)${colors.reset} |`);
|
|
374
|
-
|
|
375
|
-
console.log('+---------------------------------------------------------+');
|
|
376
|
-
|
|
377
|
-
// Overall assessment
|
|
378
|
-
console.log('\n');
|
|
379
|
-
log('Overall Performance Assessment:', 'bright');
|
|
380
|
-
log(' Routing overhead: <0.01ms per request', 'green');
|
|
381
|
-
log(' Metrics overhead: <0.01ms per request', 'green');
|
|
382
|
-
log(' Combined overhead: <0.02ms per request', 'green');
|
|
383
|
-
log(' No measurable impact on API latency', 'green');
|
|
384
|
-
|
|
385
|
-
console.log('\n Expected Real-World Performance:');
|
|
386
|
-
log(' Ollama (local): ~500-1000ms per request', 'cyan');
|
|
387
|
-
log(' Cloud (Databricks): ~1500-2000ms per request', 'cyan');
|
|
388
|
-
log(' Routing overhead: ~0.02ms (0.001-0.002% of total)', 'cyan');
|
|
389
|
-
log(' Latency savings with Ollama: 40-60% faster', 'green');
|
|
390
|
-
log(' Cost savings with Ollama: 100% (free)', 'green');
|
|
391
|
-
|
|
392
|
-
console.log('\n');
|
|
393
|
-
log('Conclusion: Hybrid routing adds negligible overhead while', 'bright');
|
|
394
|
-
log(' providing significant latency and cost improvements!', 'bright');
|
|
395
|
-
console.log('\n');
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
// =============================================================================
|
|
399
|
-
// RUN ALL TESTS
|
|
400
|
-
// =============================================================================
|
|
401
|
-
async function runAllTests() {
|
|
402
|
-
log('\n Starting Hybrid Routing Performance Test Suite\n', 'bright');
|
|
403
|
-
|
|
404
|
-
try {
|
|
405
|
-
const results = {
|
|
406
|
-
routing: testRoutingDecisionPerformance(),
|
|
407
|
-
metrics: testMetricsOverhead(),
|
|
408
|
-
combined: testCombinedStack(),
|
|
409
|
-
helpers: testHelperFunctions()
|
|
410
|
-
};
|
|
411
|
-
|
|
412
|
-
printFinalReport(results);
|
|
413
|
-
|
|
414
|
-
log('\n All performance tests completed successfully!\n', 'green');
|
|
415
|
-
process.exit(0);
|
|
416
|
-
} catch (error) {
|
|
417
|
-
log(`\n Performance test suite failed: ${error.message}\n`, 'red');
|
|
418
|
-
console.error(error);
|
|
419
|
-
process.exit(1);
|
|
420
|
-
}
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
// Run tests
|
|
424
|
-
if (require.main === module) {
|
|
425
|
-
runAllTests();
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
module.exports = { runAllTests };
|