lynkr 7.2.5 → 8.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +3 -3
  2. package/config/model-tiers.json +89 -0
  3. package/install.sh +6 -1
  4. package/package.json +4 -2
  5. package/scripts/setup.js +0 -1
  6. package/src/agents/executor.js +14 -6
  7. package/src/api/middleware/session.js +15 -2
  8. package/src/api/openai-router.js +162 -37
  9. package/src/api/providers-handler.js +15 -1
  10. package/src/api/router.js +107 -2
  11. package/src/budget/index.js +4 -3
  12. package/src/clients/databricks.js +431 -234
  13. package/src/clients/gpt-utils.js +181 -0
  14. package/src/clients/ollama-utils.js +66 -140
  15. package/src/clients/routing.js +0 -1
  16. package/src/clients/standard-tools.js +99 -3
  17. package/src/config/index.js +133 -35
  18. package/src/context/toon.js +173 -0
  19. package/src/logger/index.js +23 -0
  20. package/src/orchestrator/index.js +688 -213
  21. package/src/routing/agentic-detector.js +320 -0
  22. package/src/routing/complexity-analyzer.js +202 -2
  23. package/src/routing/cost-optimizer.js +305 -0
  24. package/src/routing/index.js +168 -159
  25. package/src/routing/model-tiers.js +365 -0
  26. package/src/server.js +4 -14
  27. package/src/sessions/cleanup.js +3 -3
  28. package/src/sessions/record.js +10 -1
  29. package/src/sessions/store.js +7 -2
  30. package/src/tools/agent-task.js +48 -1
  31. package/src/tools/index.js +19 -2
  32. package/src/tools/lazy-loader.js +7 -0
  33. package/src/tools/tinyfish.js +358 -0
  34. package/src/tools/truncate.js +1 -0
  35. package/.github/FUNDING.yml +0 -15
  36. package/.github/workflows/README.md +0 -215
  37. package/.github/workflows/ci.yml +0 -69
  38. package/.github/workflows/index.yml +0 -62
  39. package/.github/workflows/web-tools-tests.yml +0 -56
  40. package/CITATIONS.bib +0 -6
  41. package/CLAWROUTER_ROUTING_PLAN.md +0 -910
  42. package/DEPLOYMENT.md +0 -1001
  43. package/LYNKR-TUI-PLAN.md +0 -984
  44. package/PERFORMANCE-REPORT.md +0 -866
  45. package/PLAN-per-client-model-routing.md +0 -252
  46. package/ROUTER_COMPARISON.md +0 -173
  47. package/TIER_ROUTING_PLAN.md +0 -771
  48. package/docs/42642f749da6234f41b6b425c3bb07c9.txt +0 -1
  49. package/docs/BingSiteAuth.xml +0 -4
  50. package/docs/docs-style.css +0 -478
  51. package/docs/docs.html +0 -197
  52. package/docs/google5be250e608e6da39.html +0 -1
  53. package/docs/index.html +0 -577
  54. package/docs/index.md +0 -577
  55. package/docs/robots.txt +0 -4
  56. package/docs/sitemap.xml +0 -44
  57. package/docs/style.css +0 -1223
  58. package/documentation/README.md +0 -100
  59. package/documentation/api.md +0 -806
  60. package/documentation/claude-code-cli.md +0 -672
  61. package/documentation/codex-cli.md +0 -397
  62. package/documentation/contributing.md +0 -571
  63. package/documentation/cursor-integration.md +0 -731
  64. package/documentation/docker.md +0 -867
  65. package/documentation/embeddings.md +0 -760
  66. package/documentation/faq.md +0 -659
  67. package/documentation/features.md +0 -396
  68. package/documentation/headroom.md +0 -519
  69. package/documentation/installation.md +0 -706
  70. package/documentation/memory-system.md +0 -476
  71. package/documentation/production.md +0 -601
  72. package/documentation/providers.md +0 -906
  73. package/documentation/testing.md +0 -629
  74. package/documentation/token-optimization.md +0 -323
  75. package/documentation/tools.md +0 -697
  76. package/documentation/troubleshooting.md +0 -893
  77. package/final-test.js +0 -33
  78. package/headroom-sidecar/config.py +0 -93
  79. package/headroom-sidecar/requirements.txt +0 -14
  80. package/headroom-sidecar/server.py +0 -451
  81. package/monitor-agents.sh +0 -31
  82. package/scripts/audit-log-reader.js +0 -399
  83. package/scripts/compact-dictionary.js +0 -204
  84. package/scripts/test-deduplication.js +0 -448
  85. package/src/db/database.sqlite +0 -0
  86. package/test/README.md +0 -212
  87. package/test/azure-openai-config.test.js +0 -204
  88. package/test/azure-openai-error-resilience.test.js +0 -238
  89. package/test/azure-openai-format-conversion.test.js +0 -354
  90. package/test/azure-openai-integration.test.js +0 -281
  91. package/test/azure-openai-routing.test.js +0 -177
  92. package/test/azure-openai-streaming.test.js +0 -171
  93. package/test/bedrock-integration.test.js +0 -471
  94. package/test/comprehensive-test-suite.js +0 -928
  95. package/test/config-validation.test.js +0 -207
  96. package/test/cursor-integration.test.js +0 -484
  97. package/test/format-conversion.test.js +0 -578
  98. package/test/hybrid-routing-integration.test.js +0 -254
  99. package/test/hybrid-routing-performance.test.js +0 -418
  100. package/test/llamacpp-integration.test.js +0 -863
  101. package/test/lmstudio-integration.test.js +0 -335
  102. package/test/memory/extractor.test.js +0 -398
  103. package/test/memory/retriever.test.js +0 -613
  104. package/test/memory/retriever.test.js.bak +0 -585
  105. package/test/memory/search.test.js +0 -537
  106. package/test/memory/search.test.js.bak +0 -389
  107. package/test/memory/store.test.js +0 -344
  108. package/test/memory/store.test.js.bak +0 -312
  109. package/test/memory/surprise.test.js +0 -300
  110. package/test/memory-performance.test.js +0 -472
  111. package/test/openai-integration.test.js +0 -686
  112. package/test/openrouter-error-resilience.test.js +0 -418
  113. package/test/passthrough-mode.test.js +0 -385
  114. package/test/performance-benchmark.js +0 -351
  115. package/test/performance-tests.js +0 -528
  116. package/test/routing.test.js +0 -219
  117. package/test/web-tools.test.js +0 -329
  118. package/test-agents-simple.js +0 -43
  119. package/test-cli-connection.sh +0 -33
  120. package/test-learning-unit.js +0 -126
  121. package/test-learning.js +0 -112
  122. package/test-parallel-agents.sh +0 -124
  123. package/test-parallel-direct.js +0 -155
  124. package/test-subagents.sh +0 -117
@@ -1,254 +0,0 @@
1
- const assert = require("assert");
2
- const { describe, it, beforeEach, afterEach } = require("node:test");
3
-
4
- describe("Hybrid Routing Integration Tests", () => {
5
- let config;
6
- let databricks;
7
- let metrics;
8
- let originalConfig;
9
-
10
- beforeEach(() => {
11
- // Clear module cache
12
- delete require.cache[require.resolve("../src/config")];
13
- delete require.cache[require.resolve("../src/clients/databricks")];
14
- delete require.cache[require.resolve("../src/observability/metrics")];
15
- delete require.cache[require.resolve("../src/clients/routing")];
16
-
17
- // Store original config
18
- originalConfig = { ...process.env };
19
-
20
- // Set up test environment
21
- process.env.DATABRICKS_API_KEY = "test-key";
22
- process.env.DATABRICKS_API_BASE = "http://test.databricks.com";
23
- process.env.MODEL_PROVIDER = "databricks";
24
- });
25
-
26
- afterEach(() => {
27
- // Restore original environment
28
- process.env = originalConfig;
29
- });
30
-
31
- describe("Configuration Validation", () => {
32
- it("should use default OLLAMA_ENDPOINT when not specified", () => {
33
- process.env.PREFER_OLLAMA = "true";
34
- delete process.env.OLLAMA_ENDPOINT;
35
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
36
- process.env.DATABRICKS_API_KEY = "test-key";
37
- process.env.DATABRICKS_API_BASE = "http://test.com";
38
-
39
- const config = require("../src/config");
40
-
41
- // Should use default localhost:11434
42
- assert.strictEqual(config.ollama.endpoint, "http://localhost:11434");
43
- });
44
-
45
- it("should reject invalid FALLBACK_PROVIDER", () => {
46
- process.env.PREFER_OLLAMA = "true";
47
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
48
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
49
- process.env.FALLBACK_ENABLED = "true";
50
- process.env.FALLBACK_PROVIDER = "invalid-provider";
51
-
52
- assert.throws(() => {
53
- require("../src/config");
54
- }, /Unsupported FALLBACK_PROVIDER.*Valid options are/);
55
- });
56
-
57
- it("should reject circular fallback (ollama -> ollama)", () => {
58
- process.env.PREFER_OLLAMA = "true";
59
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
60
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
61
- process.env.FALLBACK_ENABLED = "true";
62
- process.env.FALLBACK_PROVIDER = "ollama";
63
-
64
- assert.throws(() => {
65
- require("../src/config");
66
- }, /FALLBACK_PROVIDER cannot be 'ollama'/);
67
- });
68
-
69
- it("should reject PREFER_OLLAMA with databricks fallback but no databricks credentials", () => {
70
- process.env.MODEL_PROVIDER = "ollama"; // Set to ollama for hybrid routing scenario
71
- process.env.PREFER_OLLAMA = "true";
72
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
73
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
74
- process.env.FALLBACK_ENABLED = "true";
75
- process.env.FALLBACK_PROVIDER = "databricks";
76
- // Set to empty strings instead of deleting (dotenv.config() in config module would reload from .env)
77
- process.env.DATABRICKS_API_KEY = "";
78
- process.env.DATABRICKS_API_BASE = "";
79
-
80
- // Should throw error about missing databricks credentials
81
- // (Either from standard validation or hybrid routing validation)
82
- assert.throws(() => {
83
- require("../src/config");
84
- }, /DATABRICKS_API_BASE and DATABRICKS_API_KEY/);
85
- });
86
-
87
- it("should accept valid hybrid routing configuration", () => {
88
- process.env.PREFER_OLLAMA = "true";
89
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
90
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
91
- process.env.FALLBACK_ENABLED = "true";
92
- process.env.FALLBACK_PROVIDER = "databricks";
93
- process.env.OLLAMA_MAX_TOOLS_FOR_ROUTING = "3"; // Override .env which sets it to 2
94
- process.env.DATABRICKS_API_KEY = "test-key";
95
- process.env.DATABRICKS_API_BASE = "http://test.com";
96
-
97
- const config = require("../src/config");
98
-
99
- assert.strictEqual(config.modelProvider.preferOllama, true);
100
- assert.strictEqual(config.modelProvider.fallbackEnabled, true);
101
- assert.strictEqual(config.modelProvider.ollamaMaxToolsForRouting, 3);
102
- assert.strictEqual(config.modelProvider.fallbackProvider, "databricks");
103
- });
104
- });
105
-
106
- describe("Metrics Recording", () => {
107
- beforeEach(() => {
108
- process.env.PREFER_OLLAMA = "true";
109
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
110
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
111
- process.env.OLLAMA_FALLBACK_PROVIDER = "databricks";
112
-
113
- config = require("../src/config");
114
- const metricsModule = require("../src/observability/metrics");
115
- metrics = metricsModule.getMetricsCollector();
116
- });
117
-
118
- it("should record provider routing", () => {
119
- metrics.recordProviderRouting("ollama");
120
- metrics.recordProviderRouting("ollama");
121
- metrics.recordProviderRouting("databricks");
122
-
123
- const snapshot = metrics.getMetrics();
124
-
125
- assert.deepStrictEqual(snapshot.routing.by_provider, {
126
- ollama: 2,
127
- databricks: 1,
128
- });
129
- });
130
-
131
- it("should record provider success with latency", () => {
132
- metrics.recordProviderSuccess("ollama", 450);
133
- metrics.recordProviderSuccess("ollama", 600);
134
- metrics.recordProviderSuccess("databricks", 1500);
135
-
136
- const snapshot = metrics.getMetrics();
137
-
138
- assert.strictEqual(snapshot.routing.successes_by_provider.ollama, 2);
139
- assert.strictEqual(snapshot.routing.successes_by_provider.databricks, 1);
140
- assert.strictEqual(snapshot.cost_savings.ollama_latency_ms.mean, 525);
141
- });
142
-
143
- it("should record fallback attempts with reasons", () => {
144
- metrics.recordFallbackAttempt("ollama", "databricks", "circuit_breaker");
145
- metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
146
- metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
147
-
148
- const snapshot = metrics.getMetrics();
149
-
150
- assert.strictEqual(snapshot.fallback.attempts_total, 3);
151
- assert.deepStrictEqual(snapshot.fallback.reasons, {
152
- circuit_breaker: 1,
153
- timeout: 2,
154
- });
155
- });
156
-
157
- it("should calculate fallback success rate", () => {
158
- metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
159
- metrics.recordFallbackSuccess(1200);
160
- metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
161
- metrics.recordFallbackSuccess(1100);
162
- metrics.recordFallbackAttempt("ollama", "databricks", "circuit_breaker");
163
- metrics.recordFallbackFailure();
164
-
165
- const snapshot = metrics.getMetrics();
166
-
167
- assert.strictEqual(snapshot.fallback.attempts_total, 3);
168
- assert.strictEqual(snapshot.fallback.successes_total, 2);
169
- assert.strictEqual(snapshot.fallback.failures_total, 1);
170
- assert.strictEqual(snapshot.fallback.success_rate, "66.67%");
171
- });
172
-
173
- it("should record cost savings", () => {
174
- // Simulate 100 tokens input, 50 tokens output
175
- // Input: 100/1M * $3 = $0.0003
176
- // Output: 50/1M * $15 = $0.00075
177
- // Total: $0.00105
178
- metrics.recordCostSavings(0.00105);
179
- metrics.recordCostSavings(0.00105);
180
- metrics.recordCostSavings(0.00105);
181
-
182
- const snapshot = metrics.getMetrics();
183
-
184
- assert.strictEqual(snapshot.cost_savings.ollama_savings_usd, "0.0032");
185
- });
186
-
187
- it("should reset all routing metrics", () => {
188
- metrics.recordProviderRouting("ollama");
189
- metrics.recordProviderSuccess("ollama", 450);
190
- metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
191
- metrics.recordFallbackSuccess(1200);
192
- metrics.recordCostSavings(1.5);
193
-
194
- metrics.reset();
195
-
196
- const snapshot = metrics.getMetrics();
197
-
198
- assert.deepStrictEqual(snapshot.routing.by_provider, {});
199
- assert.deepStrictEqual(snapshot.routing.successes_by_provider, {});
200
- assert.strictEqual(snapshot.fallback.attempts_total, 0);
201
- assert.strictEqual(snapshot.fallback.successes_total, 0);
202
- assert.strictEqual(snapshot.cost_savings.ollama_savings_usd, "0.0000");
203
- });
204
- });
205
-
206
- describe("Helper Functions", () => {
207
- it("should categorize circuit breaker errors", () => {
208
- // This would need to be tested by importing the function if exported
209
- // For now, we test via the integrated behavior
210
- process.env.PREFER_OLLAMA = "true";
211
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
212
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
213
-
214
- config = require("../src/config");
215
- const metricsModule = require("../src/observability/metrics");
216
- metrics = metricsModule.getMetricsCollector();
217
-
218
- // Simulate categorization
219
- const circuitBreakerError = new Error("Circuit breaker open");
220
- circuitBreakerError.name = "CircuitBreakerError";
221
-
222
- const timeoutError = new Error("Request timeout");
223
- timeoutError.code = "ETIMEDOUT";
224
-
225
- const unavailableError = new Error("Service not available");
226
- unavailableError.code = "ECONNREFUSED";
227
-
228
- // These would be categorized in the actual invokeModel function
229
- // Here we just verify the structure exists
230
- assert.ok(metrics.recordFallbackAttempt);
231
- });
232
-
233
- it("should estimate cost savings correctly", () => {
234
- process.env.PREFER_OLLAMA = "true";
235
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
236
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
237
-
238
- config = require("../src/config");
239
- const metricsModule = require("../src/observability/metrics");
240
- metrics = metricsModule.getMetricsCollector();
241
-
242
- // Test: 1000 input tokens, 500 output tokens
243
- // Input cost: 1000/1M * $3 = $0.003
244
- // Output cost: 500/1M * $15 = $0.0075
245
- // Total: $0.0105
246
- const inputTokens = 1000;
247
- const outputTokens = 500;
248
- const expectedSavings =
249
- (inputTokens / 1_000_000) * 3.0 + (outputTokens / 1_000_000) * 15.0;
250
-
251
- assert.strictEqual(expectedSavings.toFixed(4), "0.0105");
252
- });
253
- });
254
- });
@@ -1,418 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- /**
4
- * Hybrid Routing Performance Tests
5
- *
6
- * Measures the performance impact of the hybrid routing system:
7
- * - Routing decision overhead
8
- * - Provider determination speed
9
- * - Metrics collection overhead
10
- * - Fallback logic performance
11
- */
12
-
13
- const { performance } = require('perf_hooks');
14
- const assert = require('assert');
15
-
16
- // Color utilities
17
- const colors = {
18
- reset: '\x1b[0m',
19
- bright: '\x1b[1m',
20
- green: '\x1b[32m',
21
- yellow: '\x1b[33m',
22
- blue: '\x1b[34m',
23
- red: '\x1b[31m',
24
- cyan: '\x1b[36m',
25
- };
26
-
27
- function log(message, color = 'reset') {
28
- console.log(`${colors[color]}${message}${colors.reset}`);
29
- }
30
-
31
- function section(title) {
32
- console.log('\n' + '='.repeat(70));
33
- log(title, 'bright');
34
- console.log('='.repeat(70));
35
- }
36
-
37
- function benchmark(name, iterations, fn) {
38
- const start = performance.now();
39
- for (let i = 0; i < iterations; i++) {
40
- fn();
41
- }
42
- const duration = performance.now() - start;
43
- const avgTime = duration / iterations;
44
- const throughput = (iterations / duration) * 1000;
45
-
46
- return { duration, avgTime, throughput };
47
- }
48
-
49
- // =============================================================================
50
- // TEST 1: Routing Decision Performance
51
- // =============================================================================
52
- function testRoutingDecisionPerformance() {
53
- section('TEST 1: Routing Decision Performance');
54
-
55
- // Clear module cache and set up environment
56
- delete require.cache[require.resolve('../src/config')];
57
- delete require.cache[require.resolve('../src/clients/routing')];
58
-
59
- process.env.PREFER_OLLAMA = 'true';
60
- process.env.OLLAMA_ENDPOINT = 'http://localhost:11434';
61
- process.env.OLLAMA_MODEL = 'qwen2.5-coder:latest';
62
- process.env.DATABRICKS_API_KEY = 'test-key';
63
- process.env.DATABRICKS_API_BASE = 'http://test.com';
64
-
65
- const routing = require('../src/clients/routing');
66
-
67
- log('\n📊 Benchmarking routing decisions...', 'cyan');
68
-
69
- // Test 1: Simple request (0 tools)
70
- const simplePayload = {
71
- messages: [{ role: 'user', content: 'test' }],
72
- tools: []
73
- };
74
-
75
- const { duration: simpleTime, throughput: simpleThroughput } = benchmark(
76
- 'Simple request routing',
77
- 100000,
78
- () => routing.determineProvider(simplePayload)
79
- );
80
-
81
- log(`⏱️ Simple request: ${simpleTime.toFixed(2)}ms for 100k decisions`, 'cyan');
82
- log(` Average: ${(simpleTime / 100000).toFixed(6)}ms per decision`, 'blue');
83
- log(` Throughput: ${simpleThroughput.toLocaleString()} decisions/sec`, 'green');
84
-
85
- // Test 2: Complex request (5 tools)
86
- const complexPayload = {
87
- messages: [{ role: 'user', content: 'test' }],
88
- tools: [
89
- { name: 'tool1' }, { name: 'tool2' }, { name: 'tool3' },
90
- { name: 'tool4' }, { name: 'tool5' }
91
- ]
92
- };
93
-
94
- const { duration: complexTime, throughput: complexThroughput } = benchmark(
95
- 'Complex request routing',
96
- 100000,
97
- () => routing.determineProvider(complexPayload)
98
- );
99
-
100
- log(`⏱️ Complex request: ${complexTime.toFixed(2)}ms for 100k decisions`, 'cyan');
101
- log(` Average: ${(complexTime / 100000).toFixed(6)}ms per decision`, 'blue');
102
- log(` Throughput: ${complexThroughput.toLocaleString()} decisions/sec`, 'green');
103
-
104
- // Test 3: Tool capability check
105
- const toolCapabilityPayload = {
106
- messages: [{ role: 'user', content: 'test' }],
107
- tools: [{ name: 'tool1' }]
108
- };
109
-
110
- const { duration: toolCheckTime, throughput: toolCheckThroughput } = benchmark(
111
- 'Tool capability check',
112
- 100000,
113
- () => routing.determineProvider(toolCapabilityPayload)
114
- );
115
-
116
- log(`⏱️ Tool capability check: ${toolCheckTime.toFixed(2)}ms for 100k decisions`, 'cyan');
117
- log(` Average: ${(toolCheckTime / 100000).toFixed(6)}ms per decision`, 'blue');
118
- log(` Throughput: ${toolCheckThroughput.toLocaleString()} decisions/sec`, 'green');
119
-
120
- // Analysis
121
- log('\n📈 Analysis:', 'yellow');
122
- log(` Routing adds <0.01ms per request (negligible overhead)`, 'green');
123
- log(` Throughput: ${simpleThroughput.toLocaleString()} decisions/sec`, 'green');
124
- log(` ✅ Routing is extremely fast and won't impact request latency`, 'green');
125
-
126
- return {
127
- simpleTime,
128
- complexTime,
129
- toolCheckTime,
130
- avgDecisionTime: (simpleTime + complexTime + toolCheckTime) / 3 / 100000
131
- };
132
- }
133
-
134
- // =============================================================================
135
- // TEST 2: Metrics Collection Overhead
136
- // =============================================================================
137
- function testMetricsOverhead() {
138
- section('TEST 2: Metrics Collection Overhead');
139
-
140
- delete require.cache[require.resolve('../src/observability/metrics')];
141
- const { getMetricsCollector } = require('../src/observability/metrics');
142
- const metrics = getMetricsCollector();
143
-
144
- log('\n📊 Benchmarking metrics operations...', 'cyan');
145
-
146
- // Test recording provider routing
147
- const { duration: routingTime, throughput: routingThroughput } = benchmark(
148
- 'Record provider routing',
149
- 100000,
150
- () => metrics.recordProviderRouting('ollama')
151
- );
152
-
153
- log(`⏱️ Provider routing: ${routingTime.toFixed(2)}ms for 100k recordings`, 'cyan');
154
- log(` Average: ${(routingTime / 100000).toFixed(6)}ms per record`, 'blue');
155
- log(` Throughput: ${routingThroughput.toLocaleString()} ops/sec`, 'green');
156
-
157
- // Test recording provider success
158
- const { duration: successTime, throughput: successThroughput } = benchmark(
159
- 'Record provider success',
160
- 100000,
161
- () => metrics.recordProviderSuccess('ollama', 450)
162
- );
163
-
164
- log(`⏱️ Provider success: ${successTime.toFixed(2)}ms for 100k recordings`, 'cyan');
165
- log(` Average: ${(successTime / 100000).toFixed(6)}ms per record`, 'blue');
166
- log(` Throughput: ${successThroughput.toLocaleString()} ops/sec`, 'green');
167
-
168
- // Test recording fallback attempts
169
- const { duration: fallbackTime, throughput: fallbackThroughput } = benchmark(
170
- 'Record fallback attempt',
171
- 100000,
172
- () => metrics.recordFallbackAttempt('ollama', 'databricks', 'timeout')
173
- );
174
-
175
- log(`⏱️ Fallback attempts: ${fallbackTime.toFixed(2)}ms for 100k recordings`, 'cyan');
176
- log(` Average: ${(fallbackTime / 100000).toFixed(6)}ms per record`, 'blue');
177
- log(` Throughput: ${fallbackThroughput.toLocaleString()} ops/sec`, 'green');
178
-
179
- // Test cost savings recording
180
- const { duration: costTime, throughput: costThroughput } = benchmark(
181
- 'Record cost savings',
182
- 100000,
183
- () => metrics.recordCostSavings(0.001)
184
- );
185
-
186
- log(`⏱️ Cost savings: ${costTime.toFixed(2)}ms for 100k recordings`, 'cyan');
187
- log(` Average: ${(costTime / 100000).toFixed(6)}ms per record`, 'blue');
188
- log(` Throughput: ${costThroughput.toLocaleString()} ops/sec`, 'green');
189
-
190
- // Analysis
191
- const avgMetricsTime = (routingTime + successTime + fallbackTime + costTime) / 4 / 100000;
192
- log('\n📈 Analysis:', 'yellow');
193
- log(` Average metrics overhead: ${avgMetricsTime.toFixed(6)}ms per operation`, 'green');
194
- log(` ✅ Metrics collection is extremely lightweight`, 'green');
195
-
196
- return {
197
- routingTime,
198
- successTime,
199
- fallbackTime,
200
- costTime,
201
- avgMetricsTime
202
- };
203
- }
204
-
205
- // =============================================================================
206
- // TEST 3: Combined Hybrid Routing Stack
207
- // =============================================================================
208
- function testCombinedStack() {
209
- section('TEST 3: Combined Hybrid Routing Stack Performance');
210
-
211
- delete require.cache[require.resolve('../src/config')];
212
- delete require.cache[require.resolve('../src/clients/routing')];
213
- delete require.cache[require.resolve('../src/observability/metrics')];
214
-
215
- process.env.PREFER_OLLAMA = 'true';
216
- process.env.OLLAMA_ENDPOINT = 'http://localhost:11434';
217
- process.env.OLLAMA_MODEL = 'qwen2.5-coder:latest';
218
-
219
- const routing = require('../src/clients/routing');
220
- const { getMetricsCollector } = require('../src/observability/metrics');
221
-
222
- log('\n📊 Benchmarking complete routing + metrics stack...', 'cyan');
223
-
224
- // Simulate full routing decision + metrics recording
225
- const payload = {
226
- messages: [{ role: 'user', content: 'test' }],
227
- tools: []
228
- };
229
-
230
- const { duration: fullTime, throughput: fullThroughput } = benchmark(
231
- 'Full routing stack',
232
- 50000,
233
- () => {
234
- const metrics = getMetricsCollector();
235
- const provider = routing.determineProvider(payload);
236
- metrics.recordProviderRouting(provider);
237
- metrics.recordProviderSuccess(provider, 450);
238
- }
239
- );
240
-
241
- log(`⏱️ Full stack: ${fullTime.toFixed(2)}ms for 50k operations`, 'cyan');
242
- log(` Average: ${(fullTime / 50000).toFixed(6)}ms per request`, 'blue');
243
- log(` Throughput: ${fullThroughput.toLocaleString()} ops/sec`, 'green');
244
-
245
- // Analysis
246
- log('\n📈 Analysis:', 'yellow');
247
- const overhead = (fullTime / 50000);
248
- log(` Total routing + metrics overhead: ${overhead.toFixed(6)}ms`, 'green');
249
- log(` ✅ Negligible impact on request latency (<0.02ms)`, 'green');
250
-
251
- return {
252
- fullTime,
253
- fullThroughput,
254
- overhead
255
- };
256
- }
257
-
258
- // =============================================================================
259
- // TEST 4: Helper Function Performance
260
- // =============================================================================
261
- function testHelperFunctions() {
262
- section('TEST 4: Helper Function Performance');
263
-
264
- delete require.cache[require.resolve('../src/clients/databricks')];
265
-
266
- log('\n📊 Benchmarking helper functions...', 'cyan');
267
-
268
- // Test categorizeFailure (we'll simulate it)
269
- const categorizeFailure = (error) => {
270
- if (error.name === 'CircuitBreakerError' || error.code === 'circuit_breaker_open') {
271
- return 'circuit_breaker';
272
- }
273
- if (error.name === 'AbortError' || error.code === 'ETIMEDOUT') {
274
- return 'timeout';
275
- }
276
- if (error.message?.includes('not configured') ||
277
- error.message?.includes('not available') ||
278
- error.code === 'ECONNREFUSED') {
279
- return 'service_unavailable';
280
- }
281
- return 'error';
282
- };
283
-
284
- const testErrors = [
285
- { name: 'CircuitBreakerError', message: 'Circuit breaker open' },
286
- { name: 'AbortError', message: 'Timeout' },
287
- { code: 'ECONNREFUSED', message: 'Connection refused' },
288
- { message: 'Generic error' }
289
- ];
290
-
291
- const { duration: categorizeTime, throughput: categorizeThroughput } = benchmark(
292
- 'Categorize failure',
293
- 100000,
294
- () => {
295
- testErrors.forEach(err => categorizeFailure(err));
296
- }
297
- );
298
-
299
- log(`⏱️ Categorize failure: ${categorizeTime.toFixed(2)}ms for 400k operations`, 'cyan');
300
- log(` Average: ${(categorizeTime / 400000).toFixed(6)}ms per categorization`, 'blue');
301
- log(` Throughput: ${(categorizeThroughput * 4).toLocaleString()} ops/sec`, 'green');
302
-
303
- // Test estimateCostSavings
304
- const estimateCostSavings = (inputTokens, outputTokens) => {
305
- const INPUT_COST_PER_1M = 3.00;
306
- const OUTPUT_COST_PER_1M = 15.00;
307
- const inputCost = (inputTokens / 1_000_000) * INPUT_COST_PER_1M;
308
- const outputCost = (outputTokens / 1_000_000) * OUTPUT_COST_PER_1M;
309
- return inputCost + outputCost;
310
- };
311
-
312
- const { duration: costCalcTime, throughput: costCalcThroughput } = benchmark(
313
- 'Estimate cost savings',
314
- 100000,
315
- () => estimateCostSavings(1000, 500)
316
- );
317
-
318
- log(`⏱️ Cost estimation: ${costCalcTime.toFixed(2)}ms for 100k calculations`, 'cyan');
319
- log(` Average: ${(costCalcTime / 100000).toFixed(6)}ms per calculation`, 'blue');
320
- log(` Throughput: ${costCalcThroughput.toLocaleString()} ops/sec`, 'green');
321
-
322
- log('\n📈 Analysis:', 'yellow');
323
- log(` Helper functions add negligible overhead (<0.001ms)`, 'green');
324
- log(` ✅ No performance impact from utility functions`, 'green');
325
-
326
- return {
327
- categorizeTime,
328
- costCalcTime
329
- };
330
- }
331
-
332
- // =============================================================================
333
- // FINAL REPORT
334
- // =============================================================================
335
- function printFinalReport(results) {
336
- section('📊 HYBRID ROUTING PERFORMANCE SUMMARY');
337
-
338
- console.log('\n');
339
- console.log('┌────────────────────────────────────────────────────────────┐');
340
- console.log('│ HYBRID ROUTING PERFORMANCE │');
341
- console.log('├────────────────────────────────────────────────────────────┤');
342
-
343
- log(`│ 1. Routing Decisions │`, 'bright');
344
- log(`│ Average: ${results.routing.avgDecisionTime.toFixed(6)}ms per decision │`, 'cyan');
345
- log(`│ Overhead: ${colors.green}Negligible (<0.01ms)${colors.reset} │`);
346
-
347
- console.log('├────────────────────────────────────────────────────────────┤');
348
-
349
- log(`│ 2. Metrics Collection │`, 'bright');
350
- log(`│ Average: ${results.metrics.avgMetricsTime.toFixed(6)}ms per operation │`, 'cyan');
351
- log(`│ Overhead: ${colors.green}Negligible (<0.01ms)${colors.reset} │`);
352
-
353
- console.log('├────────────────────────────────────────────────────────────┤');
354
-
355
- log(`│ 3. Full Routing Stack │`, 'bright');
356
- log(`│ Average: ${results.combined.overhead.toFixed(6)}ms per request │`, 'cyan');
357
- log(`│ Throughput: ${results.combined.fullThroughput.toLocaleString()} ops/sec │`, 'cyan');
358
- log(`│ Impact: ${colors.green}Negligible (<0.02ms)${colors.reset} │`);
359
-
360
- console.log('├────────────────────────────────────────────────────────────┤');
361
-
362
- log(`│ 4. Helper Functions │`, 'bright');
363
- log(`│ Overhead: ${colors.green}Negligible (<0.001ms)${colors.reset} │`);
364
-
365
- console.log('└────────────────────────────────────────────────────────────┘');
366
-
367
- // Overall assessment
368
- console.log('\n');
369
- log('🏆 Overall Performance Assessment:', 'bright');
370
- log(' ✅ Routing overhead: <0.01ms per request', 'green');
371
- log(' ✅ Metrics overhead: <0.01ms per request', 'green');
372
- log(' ✅ Combined overhead: <0.02ms per request', 'green');
373
- log(' ✅ No measurable impact on API latency', 'green');
374
-
375
- console.log('\n📈 Expected Real-World Performance:');
376
- log(' • Ollama (local): ~500-1000ms per request', 'cyan');
377
- log(' • Cloud (Databricks): ~1500-2000ms per request', 'cyan');
378
- log(' • Routing overhead: ~0.02ms (0.001-0.002% of total)', 'cyan');
379
- log(' • Latency savings with Ollama: 40-60% faster', 'green');
380
- log(' • Cost savings with Ollama: 100% (free)', 'green');
381
-
382
- console.log('\n');
383
- log('🚀 Conclusion: Hybrid routing adds negligible overhead while', 'bright');
384
- log(' providing significant latency and cost improvements!', 'bright');
385
- console.log('\n');
386
- }
387
-
388
- // =============================================================================
389
- // RUN ALL TESTS
390
- // =============================================================================
391
- async function runAllTests() {
392
- log('\n🚀 Starting Hybrid Routing Performance Test Suite\n', 'bright');
393
-
394
- try {
395
- const results = {
396
- routing: testRoutingDecisionPerformance(),
397
- metrics: testMetricsOverhead(),
398
- combined: testCombinedStack(),
399
- helpers: testHelperFunctions()
400
- };
401
-
402
- printFinalReport(results);
403
-
404
- log('\n✅ All performance tests completed successfully!\n', 'green');
405
- process.exit(0);
406
- } catch (error) {
407
- log(`\n❌ Performance test suite failed: ${error.message}\n`, 'red');
408
- console.error(error);
409
- process.exit(1);
410
- }
411
- }
412
-
413
- // Run tests
414
- if (require.main === module) {
415
- runAllTests();
416
- }
417
-
418
- module.exports = { runAllTests };