lynkr 8.0.0 → 9.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/.lynkr/telemetry.db +0 -0
  2. package/.lynkr/telemetry.db-shm +0 -0
  3. package/.lynkr/telemetry.db-wal +0 -0
  4. package/README.md +196 -322
  5. package/lynkr-skill.tar.gz +0 -0
  6. package/package.json +4 -3
  7. package/src/api/openai-router.js +64 -13
  8. package/src/api/providers-handler.js +171 -3
  9. package/src/api/router.js +9 -2
  10. package/src/clients/circuit-breaker.js +10 -247
  11. package/src/clients/codex-process.js +342 -0
  12. package/src/clients/codex-utils.js +143 -0
  13. package/src/clients/databricks.js +210 -63
  14. package/src/clients/resilience.js +540 -0
  15. package/src/clients/retry.js +22 -167
  16. package/src/clients/standard-tools.js +23 -0
  17. package/src/config/index.js +77 -0
  18. package/src/context/compression.js +42 -9
  19. package/src/context/distill.js +492 -0
  20. package/src/orchestrator/index.js +48 -8
  21. package/src/routing/complexity-analyzer.js +258 -5
  22. package/src/routing/index.js +12 -2
  23. package/src/routing/latency-tracker.js +148 -0
  24. package/src/routing/model-tiers.js +2 -0
  25. package/src/routing/quality-scorer.js +113 -0
  26. package/src/routing/telemetry.js +464 -0
  27. package/src/server.js +13 -12
  28. package/src/tools/code-graph.js +538 -0
  29. package/src/tools/code-mode.js +304 -0
  30. package/src/tools/index.js +4 -0
  31. package/src/tools/lazy-loader.js +18 -0
  32. package/src/tools/mcp-remote.js +7 -0
  33. package/src/tools/smart-selection.js +11 -0
  34. package/src/tools/tinyfish.js +358 -0
  35. package/src/tools/truncate.js +1 -0
  36. package/src/utils/payload.js +206 -0
  37. package/src/utils/perf-timer.js +80 -0
  38. package/.github/FUNDING.yml +0 -15
  39. package/.github/workflows/README.md +0 -215
  40. package/.github/workflows/ci.yml +0 -69
  41. package/.github/workflows/index.yml +0 -62
  42. package/.github/workflows/web-tools-tests.yml +0 -56
  43. package/CITATIONS.bib +0 -6
  44. package/DEPLOYMENT.md +0 -1001
  45. package/LYNKR-TUI-PLAN.md +0 -984
  46. package/PERFORMANCE-REPORT.md +0 -866
  47. package/PLAN-per-client-model-routing.md +0 -252
  48. package/docs/42642f749da6234f41b6b425c3bb07c9.txt +0 -1
  49. package/docs/BingSiteAuth.xml +0 -4
  50. package/docs/docs-style.css +0 -478
  51. package/docs/docs.html +0 -198
  52. package/docs/google5be250e608e6da39.html +0 -1
  53. package/docs/index.html +0 -577
  54. package/docs/index.md +0 -584
  55. package/docs/robots.txt +0 -4
  56. package/docs/sitemap.xml +0 -44
  57. package/docs/style.css +0 -1223
  58. package/docs/toon-integration-spec.md +0 -130
  59. package/documentation/README.md +0 -101
  60. package/documentation/api.md +0 -806
  61. package/documentation/claude-code-cli.md +0 -679
  62. package/documentation/codex-cli.md +0 -397
  63. package/documentation/contributing.md +0 -571
  64. package/documentation/cursor-integration.md +0 -734
  65. package/documentation/docker.md +0 -874
  66. package/documentation/embeddings.md +0 -762
  67. package/documentation/faq.md +0 -713
  68. package/documentation/features.md +0 -403
  69. package/documentation/headroom.md +0 -519
  70. package/documentation/installation.md +0 -758
  71. package/documentation/memory-system.md +0 -476
  72. package/documentation/production.md +0 -636
  73. package/documentation/providers.md +0 -1009
  74. package/documentation/routing.md +0 -476
  75. package/documentation/testing.md +0 -629
  76. package/documentation/token-optimization.md +0 -325
  77. package/documentation/tools.md +0 -697
  78. package/documentation/troubleshooting.md +0 -969
  79. package/final-test.js +0 -33
  80. package/headroom-sidecar/config.py +0 -93
  81. package/headroom-sidecar/requirements.txt +0 -14
  82. package/headroom-sidecar/server.py +0 -451
  83. package/monitor-agents.sh +0 -31
  84. package/scripts/audit-log-reader.js +0 -399
  85. package/scripts/compact-dictionary.js +0 -204
  86. package/scripts/test-deduplication.js +0 -448
  87. package/src/db/database.sqlite +0 -0
  88. package/te +0 -11622
  89. package/test/README.md +0 -212
  90. package/test/azure-openai-config.test.js +0 -213
  91. package/test/azure-openai-error-resilience.test.js +0 -238
  92. package/test/azure-openai-format-conversion.test.js +0 -354
  93. package/test/azure-openai-integration.test.js +0 -287
  94. package/test/azure-openai-routing.test.js +0 -175
  95. package/test/azure-openai-streaming.test.js +0 -171
  96. package/test/bedrock-integration.test.js +0 -457
  97. package/test/comprehensive-test-suite.js +0 -928
  98. package/test/config-validation.test.js +0 -207
  99. package/test/cursor-integration.test.js +0 -484
  100. package/test/format-conversion.test.js +0 -578
  101. package/test/hybrid-routing-integration.test.js +0 -269
  102. package/test/hybrid-routing-performance.test.js +0 -428
  103. package/test/llamacpp-integration.test.js +0 -882
  104. package/test/lmstudio-integration.test.js +0 -347
  105. package/test/memory/extractor.test.js +0 -398
  106. package/test/memory/retriever.test.js +0 -613
  107. package/test/memory/retriever.test.js.bak +0 -585
  108. package/test/memory/search.test.js +0 -537
  109. package/test/memory/search.test.js.bak +0 -389
  110. package/test/memory/store.test.js +0 -344
  111. package/test/memory/store.test.js.bak +0 -312
  112. package/test/memory/surprise.test.js +0 -300
  113. package/test/memory-performance.test.js +0 -472
  114. package/test/openai-integration.test.js +0 -683
  115. package/test/openrouter-error-resilience.test.js +0 -418
  116. package/test/passthrough-mode.test.js +0 -385
  117. package/test/performance-benchmark.js +0 -351
  118. package/test/performance-tests.js +0 -528
  119. package/test/routing.test.js +0 -225
  120. package/test/toon-compression.test.js +0 -131
  121. package/test/web-tools.test.js +0 -329
  122. package/test-agents-simple.js +0 -43
  123. package/test-cli-connection.sh +0 -33
  124. package/test-learning-unit.js +0 -126
  125. package/test-learning.js +0 -112
  126. package/test-parallel-agents.sh +0 -124
  127. package/test-parallel-direct.js +0 -155
  128. package/test-subagents.sh +0 -117
@@ -1,269 +0,0 @@
1
- const assert = require("assert");
2
- const { describe, it, beforeEach, afterEach } = require("node:test");
3
-
4
- describe("Hybrid Routing Integration Tests", () => {
5
- let config;
6
- let databricks;
7
- let metrics;
8
- let originalConfig;
9
-
10
- beforeEach(() => {
11
- // Clear module cache
12
- delete require.cache[require.resolve("../src/config")];
13
- delete require.cache[require.resolve("../src/clients/databricks")];
14
- delete require.cache[require.resolve("../src/observability/metrics")];
15
- delete require.cache[require.resolve("../src/clients/routing")];
16
-
17
- // Store original config
18
- originalConfig = { ...process.env };
19
-
20
- // Set up test environment
21
- process.env.DATABRICKS_API_KEY = "test-key";
22
- process.env.DATABRICKS_API_BASE = "http://test.databricks.com";
23
- process.env.MODEL_PROVIDER = "databricks";
24
-
25
- // Set TIER_* to empty to prevent .env file values from being picked up by dotenv
26
- process.env.TIER_SIMPLE = "";
27
- process.env.TIER_MEDIUM = "";
28
- process.env.TIER_COMPLEX = "";
29
- process.env.TIER_REASONING = "";
30
- });
31
-
32
- afterEach(() => {
33
- // Restore original environment
34
- process.env = originalConfig;
35
- });
36
-
37
- describe("Configuration Validation", () => {
38
- it("should use default OLLAMA_ENDPOINT when not specified", () => {
39
- process.env.MODEL_PROVIDER = "ollama";
40
- delete process.env.OLLAMA_ENDPOINT;
41
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
42
- process.env.DATABRICKS_API_KEY = "test-key";
43
- process.env.DATABRICKS_API_BASE = "http://test.com";
44
-
45
- const config = require("../src/config");
46
-
47
- // Should use default localhost:11434
48
- assert.strictEqual(config.ollama.endpoint, "http://localhost:11434");
49
- });
50
-
51
- it("should reject invalid FALLBACK_PROVIDER", () => {
52
- process.env.MODEL_PROVIDER = "ollama";
53
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
54
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
55
- process.env.FALLBACK_ENABLED = "true";
56
- process.env.FALLBACK_PROVIDER = "invalid-provider";
57
-
58
- assert.throws(() => {
59
- require("../src/config");
60
- }, /Unsupported FALLBACK_PROVIDER.*Valid options are/);
61
- });
62
-
63
- it("should reject circular fallback (ollama -> ollama)", () => {
64
- process.env.MODEL_PROVIDER = "ollama";
65
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
66
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
67
- process.env.FALLBACK_ENABLED = "true";
68
- process.env.FALLBACK_PROVIDER = "ollama";
69
- // Enable tier routing so fallback validation runs
70
- process.env.TIER_SIMPLE = "ollama:llama3.2";
71
- process.env.TIER_MEDIUM = "ollama:llama3.2";
72
- process.env.TIER_COMPLEX = "ollama:llama3.2";
73
- process.env.TIER_REASONING = "ollama:llama3.2";
74
-
75
- assert.throws(() => {
76
- require("../src/config");
77
- }, /FALLBACK_PROVIDER cannot be 'ollama'/);
78
- });
79
-
80
- it("should warn when databricks fallback has no credentials", () => {
81
- process.env.MODEL_PROVIDER = "ollama";
82
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
83
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
84
- process.env.FALLBACK_ENABLED = "true";
85
- process.env.FALLBACK_PROVIDER = "databricks";
86
- // Set to empty strings instead of deleting (dotenv.config() in config module would reload from .env)
87
- process.env.DATABRICKS_API_KEY = "";
88
- process.env.DATABRICKS_API_BASE = "";
89
- // Enable tier routing so fallback validation runs
90
- process.env.TIER_SIMPLE = "ollama:llama3.2";
91
- process.env.TIER_MEDIUM = "ollama:llama3.2";
92
- process.env.TIER_COMPLEX = "ollama:llama3.2";
93
- process.env.TIER_REASONING = "ollama:llama3.2";
94
-
95
- // Should warn but not throw (fallback misconfigured)
96
- const config = require("../src/config");
97
- assert.strictEqual(config.modelProvider.fallbackProvider, "databricks");
98
- });
99
-
100
- it("should accept valid tier routing configuration", () => {
101
- process.env.MODEL_PROVIDER = "ollama";
102
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
103
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
104
- process.env.FALLBACK_ENABLED = "true";
105
- process.env.FALLBACK_PROVIDER = "databricks";
106
- process.env.DATABRICKS_API_KEY = "test-key";
107
- process.env.DATABRICKS_API_BASE = "http://test.com";
108
- process.env.TIER_SIMPLE = "ollama:llama3.2";
109
- process.env.TIER_MEDIUM = "ollama:llama3.2";
110
- process.env.TIER_COMPLEX = "databricks:claude-sonnet";
111
- process.env.TIER_REASONING = "databricks:claude-sonnet";
112
-
113
- const config = require("../src/config");
114
-
115
- assert.strictEqual(config.modelProvider.type, "ollama");
116
- assert.strictEqual(config.modelProvider.fallbackEnabled, true);
117
- assert.strictEqual(config.modelProvider.fallbackProvider, "databricks");
118
- assert.strictEqual(config.modelTiers.enabled, true);
119
- });
120
- });
121
-
122
- describe("Metrics Recording", () => {
123
- beforeEach(() => {
124
- process.env.MODEL_PROVIDER = "ollama";
125
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
126
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
127
-
128
- config = require("../src/config");
129
- const metricsModule = require("../src/observability/metrics");
130
- metrics = metricsModule.getMetricsCollector();
131
- });
132
-
133
- it("should record provider routing", () => {
134
- metrics.recordProviderRouting("ollama");
135
- metrics.recordProviderRouting("ollama");
136
- metrics.recordProviderRouting("databricks");
137
-
138
- const snapshot = metrics.getMetrics();
139
-
140
- assert.deepStrictEqual(snapshot.routing.by_provider, {
141
- ollama: 2,
142
- databricks: 1,
143
- });
144
- });
145
-
146
- it("should record provider success with latency", () => {
147
- metrics.recordProviderSuccess("ollama", 450);
148
- metrics.recordProviderSuccess("ollama", 600);
149
- metrics.recordProviderSuccess("databricks", 1500);
150
-
151
- const snapshot = metrics.getMetrics();
152
-
153
- assert.strictEqual(snapshot.routing.successes_by_provider.ollama, 2);
154
- assert.strictEqual(snapshot.routing.successes_by_provider.databricks, 1);
155
- assert.strictEqual(snapshot.cost_savings.ollama_latency_ms.mean, 525);
156
- });
157
-
158
- it("should record fallback attempts with reasons", () => {
159
- metrics.recordFallbackAttempt("ollama", "databricks", "circuit_breaker");
160
- metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
161
- metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
162
-
163
- const snapshot = metrics.getMetrics();
164
-
165
- assert.strictEqual(snapshot.fallback.attempts_total, 3);
166
- assert.deepStrictEqual(snapshot.fallback.reasons, {
167
- circuit_breaker: 1,
168
- timeout: 2,
169
- });
170
- });
171
-
172
- it("should calculate fallback success rate", () => {
173
- metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
174
- metrics.recordFallbackSuccess(1200);
175
- metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
176
- metrics.recordFallbackSuccess(1100);
177
- metrics.recordFallbackAttempt("ollama", "databricks", "circuit_breaker");
178
- metrics.recordFallbackFailure();
179
-
180
- const snapshot = metrics.getMetrics();
181
-
182
- assert.strictEqual(snapshot.fallback.attempts_total, 3);
183
- assert.strictEqual(snapshot.fallback.successes_total, 2);
184
- assert.strictEqual(snapshot.fallback.failures_total, 1);
185
- assert.strictEqual(snapshot.fallback.success_rate, "66.67%");
186
- });
187
-
188
- it("should record cost savings", () => {
189
- // Simulate 100 tokens input, 50 tokens output
190
- // Input: 100/1M * $3 = $0.0003
191
- // Output: 50/1M * $15 = $0.00075
192
- // Total: $0.00105
193
- metrics.recordCostSavings(0.00105);
194
- metrics.recordCostSavings(0.00105);
195
- metrics.recordCostSavings(0.00105);
196
-
197
- const snapshot = metrics.getMetrics();
198
-
199
- assert.strictEqual(snapshot.cost_savings.ollama_savings_usd, "0.0032");
200
- });
201
-
202
- it("should reset all routing metrics", () => {
203
- metrics.recordProviderRouting("ollama");
204
- metrics.recordProviderSuccess("ollama", 450);
205
- metrics.recordFallbackAttempt("ollama", "databricks", "timeout");
206
- metrics.recordFallbackSuccess(1200);
207
- metrics.recordCostSavings(1.5);
208
-
209
- metrics.reset();
210
-
211
- const snapshot = metrics.getMetrics();
212
-
213
- assert.deepStrictEqual(snapshot.routing.by_provider, {});
214
- assert.deepStrictEqual(snapshot.routing.successes_by_provider, {});
215
- assert.strictEqual(snapshot.fallback.attempts_total, 0);
216
- assert.strictEqual(snapshot.fallback.successes_total, 0);
217
- assert.strictEqual(snapshot.cost_savings.ollama_savings_usd, "0.0000");
218
- });
219
- });
220
-
221
- describe("Helper Functions", () => {
222
- it("should categorize circuit breaker errors", () => {
223
- // This would need to be tested by importing the function if exported
224
- // For now, we test via the integrated behavior
225
- process.env.MODEL_PROVIDER = "ollama";
226
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
227
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
228
-
229
- config = require("../src/config");
230
- const metricsModule = require("../src/observability/metrics");
231
- metrics = metricsModule.getMetricsCollector();
232
-
233
- // Simulate categorization
234
- const circuitBreakerError = new Error("Circuit breaker open");
235
- circuitBreakerError.name = "CircuitBreakerError";
236
-
237
- const timeoutError = new Error("Request timeout");
238
- timeoutError.code = "ETIMEDOUT";
239
-
240
- const unavailableError = new Error("Service not available");
241
- unavailableError.code = "ECONNREFUSED";
242
-
243
- // These would be categorized in the actual invokeModel function
244
- // Here we just verify the structure exists
245
- assert.ok(metrics.recordFallbackAttempt);
246
- });
247
-
248
- it("should estimate cost savings correctly", () => {
249
- process.env.MODEL_PROVIDER = "ollama";
250
- process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
251
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
252
-
253
- config = require("../src/config");
254
- const metricsModule = require("../src/observability/metrics");
255
- metrics = metricsModule.getMetricsCollector();
256
-
257
- // Test: 1000 input tokens, 500 output tokens
258
- // Input cost: 1000/1M * $3 = $0.003
259
- // Output cost: 500/1M * $15 = $0.0075
260
- // Total: $0.0105
261
- const inputTokens = 1000;
262
- const outputTokens = 500;
263
- const expectedSavings =
264
- (inputTokens / 1_000_000) * 3.0 + (outputTokens / 1_000_000) * 15.0;
265
-
266
- assert.strictEqual(expectedSavings.toFixed(4), "0.0105");
267
- });
268
- });
269
- });
@@ -1,428 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- /**
4
- * Hybrid Routing Performance Tests
5
- *
6
- * Measures the performance impact of the hybrid routing system:
7
- * - Routing decision overhead
8
- * - Provider determination speed
9
- * - Metrics collection overhead
10
- * - Fallback logic performance
11
- */
12
-
13
- const { performance } = require('perf_hooks');
14
- const assert = require('assert');
15
-
16
- // Color utilities
17
- const colors = {
18
- reset: '\x1b[0m',
19
- bright: '\x1b[1m',
20
- green: '\x1b[32m',
21
- yellow: '\x1b[33m',
22
- blue: '\x1b[34m',
23
- red: '\x1b[31m',
24
- cyan: '\x1b[36m',
25
- };
26
-
27
- function log(message, color = 'reset') {
28
- console.log(`${colors[color]}${message}${colors.reset}`);
29
- }
30
-
31
- function section(title) {
32
- console.log('\n' + '='.repeat(70));
33
- log(title, 'bright');
34
- console.log('='.repeat(70));
35
- }
36
-
37
- function benchmark(name, iterations, fn) {
38
- const start = performance.now();
39
- for (let i = 0; i < iterations; i++) {
40
- fn();
41
- }
42
- const duration = performance.now() - start;
43
- const avgTime = duration / iterations;
44
- const throughput = (iterations / duration) * 1000;
45
-
46
- return { duration, avgTime, throughput };
47
- }
48
-
49
- // =============================================================================
50
- // TEST 1: Routing Decision Performance
51
- // =============================================================================
52
- function testRoutingDecisionPerformance() {
53
- section('TEST 1: Routing Decision Performance');
54
-
55
- // Clear module cache and set up environment
56
- delete require.cache[require.resolve('../src/config')];
57
- delete require.cache[require.resolve('../src/clients/routing')];
58
-
59
- process.env.MODEL_PROVIDER = 'ollama';
60
- process.env.OLLAMA_ENDPOINT = 'http://localhost:11434';
61
- process.env.OLLAMA_MODEL = 'qwen2.5-coder:latest';
62
- process.env.DATABRICKS_API_KEY = 'test-key';
63
- process.env.DATABRICKS_API_BASE = 'http://test.com';
64
- // Set TIER_* to empty = tier routing disabled, determineProviderSync returns static provider
65
- process.env.TIER_SIMPLE = "";
66
- process.env.TIER_MEDIUM = "";
67
- process.env.TIER_COMPLEX = "";
68
- process.env.TIER_REASONING = "";
69
-
70
- const routing = require('../src/clients/routing');
71
-
72
- log('\n Benchmarking routing decisions...', 'cyan');
73
-
74
- // Test 1: Simple request (0 tools)
75
- const simplePayload = {
76
- messages: [{ role: 'user', content: 'test' }],
77
- tools: []
78
- };
79
-
80
- const { duration: simpleTime, throughput: simpleThroughput } = benchmark(
81
- 'Simple request routing',
82
- 100000,
83
- () => routing.determineProviderSync(simplePayload)
84
- );
85
-
86
- log(` Simple request: ${simpleTime.toFixed(2)}ms for 100k decisions`, 'cyan');
87
- log(` Average: ${(simpleTime / 100000).toFixed(6)}ms per decision`, 'blue');
88
- log(` Throughput: ${simpleThroughput.toLocaleString()} decisions/sec`, 'green');
89
-
90
- // Test 2: Complex request (5 tools)
91
- const complexPayload = {
92
- messages: [{ role: 'user', content: 'test' }],
93
- tools: [
94
- { name: 'tool1' }, { name: 'tool2' }, { name: 'tool3' },
95
- { name: 'tool4' }, { name: 'tool5' }
96
- ]
97
- };
98
-
99
- const { duration: complexTime, throughput: complexThroughput } = benchmark(
100
- 'Complex request routing',
101
- 100000,
102
- () => routing.determineProviderSync(complexPayload)
103
- );
104
-
105
- log(` Complex request: ${complexTime.toFixed(2)}ms for 100k decisions`, 'cyan');
106
- log(` Average: ${(complexTime / 100000).toFixed(6)}ms per decision`, 'blue');
107
- log(` Throughput: ${complexThroughput.toLocaleString()} decisions/sec`, 'green');
108
-
109
- // Test 3: Tool capability check
110
- const toolCapabilityPayload = {
111
- messages: [{ role: 'user', content: 'test' }],
112
- tools: [{ name: 'tool1' }]
113
- };
114
-
115
- const { duration: toolCheckTime, throughput: toolCheckThroughput } = benchmark(
116
- 'Tool capability check',
117
- 100000,
118
- () => routing.determineProviderSync(toolCapabilityPayload)
119
- );
120
-
121
- log(` Tool capability check: ${toolCheckTime.toFixed(2)}ms for 100k decisions`, 'cyan');
122
- log(` Average: ${(toolCheckTime / 100000).toFixed(6)}ms per decision`, 'blue');
123
- log(` Throughput: ${toolCheckThroughput.toLocaleString()} decisions/sec`, 'green');
124
-
125
- // Analysis
126
- log('\n Analysis:', 'yellow');
127
- log(` Routing adds <0.01ms per request (negligible overhead)`, 'green');
128
- log(` Throughput: ${simpleThroughput.toLocaleString()} decisions/sec`, 'green');
129
- log(` Routing is extremely fast and won't impact request latency`, 'green');
130
-
131
- return {
132
- simpleTime,
133
- complexTime,
134
- toolCheckTime,
135
- avgDecisionTime: (simpleTime + complexTime + toolCheckTime) / 3 / 100000
136
- };
137
- }
138
-
139
- // =============================================================================
140
- // TEST 2: Metrics Collection Overhead
141
- // =============================================================================
142
- function testMetricsOverhead() {
143
- section('TEST 2: Metrics Collection Overhead');
144
-
145
- delete require.cache[require.resolve('../src/observability/metrics')];
146
- const { getMetricsCollector } = require('../src/observability/metrics');
147
- const metrics = getMetricsCollector();
148
-
149
- log('\n Benchmarking metrics operations...', 'cyan');
150
-
151
- // Test recording provider routing
152
- const { duration: routingTime, throughput: routingThroughput } = benchmark(
153
- 'Record provider routing',
154
- 100000,
155
- () => metrics.recordProviderRouting('ollama')
156
- );
157
-
158
- log(` Provider routing: ${routingTime.toFixed(2)}ms for 100k recordings`, 'cyan');
159
- log(` Average: ${(routingTime / 100000).toFixed(6)}ms per record`, 'blue');
160
- log(` Throughput: ${routingThroughput.toLocaleString()} ops/sec`, 'green');
161
-
162
- // Test recording provider success
163
- const { duration: successTime, throughput: successThroughput } = benchmark(
164
- 'Record provider success',
165
- 100000,
166
- () => metrics.recordProviderSuccess('ollama', 450)
167
- );
168
-
169
- log(` Provider success: ${successTime.toFixed(2)}ms for 100k recordings`, 'cyan');
170
- log(` Average: ${(successTime / 100000).toFixed(6)}ms per record`, 'blue');
171
- log(` Throughput: ${successThroughput.toLocaleString()} ops/sec`, 'green');
172
-
173
- // Test recording fallback attempts
174
- const { duration: fallbackTime, throughput: fallbackThroughput } = benchmark(
175
- 'Record fallback attempt',
176
- 100000,
177
- () => metrics.recordFallbackAttempt('ollama', 'databricks', 'timeout')
178
- );
179
-
180
- log(` Fallback attempts: ${fallbackTime.toFixed(2)}ms for 100k recordings`, 'cyan');
181
- log(` Average: ${(fallbackTime / 100000).toFixed(6)}ms per record`, 'blue');
182
- log(` Throughput: ${fallbackThroughput.toLocaleString()} ops/sec`, 'green');
183
-
184
- // Test cost savings recording
185
- const { duration: costTime, throughput: costThroughput } = benchmark(
186
- 'Record cost savings',
187
- 100000,
188
- () => metrics.recordCostSavings(0.001)
189
- );
190
-
191
- log(` Cost savings: ${costTime.toFixed(2)}ms for 100k recordings`, 'cyan');
192
- log(` Average: ${(costTime / 100000).toFixed(6)}ms per record`, 'blue');
193
- log(` Throughput: ${costThroughput.toLocaleString()} ops/sec`, 'green');
194
-
195
- // Analysis
196
- const avgMetricsTime = (routingTime + successTime + fallbackTime + costTime) / 4 / 100000;
197
- log('\n Analysis:', 'yellow');
198
- log(` Average metrics overhead: ${avgMetricsTime.toFixed(6)}ms per operation`, 'green');
199
- log(` Metrics collection is extremely lightweight`, 'green');
200
-
201
- return {
202
- routingTime,
203
- successTime,
204
- fallbackTime,
205
- costTime,
206
- avgMetricsTime
207
- };
208
- }
209
-
210
- // =============================================================================
211
- // TEST 3: Combined Hybrid Routing Stack
212
- // =============================================================================
213
- function testCombinedStack() {
214
- section('TEST 3: Combined Hybrid Routing Stack Performance');
215
-
216
- delete require.cache[require.resolve('../src/config')];
217
- delete require.cache[require.resolve('../src/clients/routing')];
218
- delete require.cache[require.resolve('../src/observability/metrics')];
219
-
220
- process.env.MODEL_PROVIDER = 'ollama';
221
- process.env.OLLAMA_ENDPOINT = 'http://localhost:11434';
222
- process.env.OLLAMA_MODEL = 'qwen2.5-coder:latest';
223
- // Set TIER_* to empty = static routing via determineProviderSync
224
- process.env.TIER_SIMPLE = "";
225
- process.env.TIER_MEDIUM = "";
226
- process.env.TIER_COMPLEX = "";
227
- process.env.TIER_REASONING = "";
228
-
229
- const routing = require('../src/clients/routing');
230
- const { getMetricsCollector } = require('../src/observability/metrics');
231
-
232
- log('\n Benchmarking complete routing + metrics stack...', 'cyan');
233
-
234
- // Simulate full routing decision + metrics recording
235
- const payload = {
236
- messages: [{ role: 'user', content: 'test' }],
237
- tools: []
238
- };
239
-
240
- const { duration: fullTime, throughput: fullThroughput } = benchmark(
241
- 'Full routing stack',
242
- 50000,
243
- () => {
244
- const metrics = getMetricsCollector();
245
- const provider = routing.determineProviderSync(payload);
246
- metrics.recordProviderRouting(provider);
247
- metrics.recordProviderSuccess(provider, 450);
248
- }
249
- );
250
-
251
- log(` Full stack: ${fullTime.toFixed(2)}ms for 50k operations`, 'cyan');
252
- log(` Average: ${(fullTime / 50000).toFixed(6)}ms per request`, 'blue');
253
- log(` Throughput: ${fullThroughput.toLocaleString()} ops/sec`, 'green');
254
-
255
- // Analysis
256
- log('\n Analysis:', 'yellow');
257
- const overhead = (fullTime / 50000);
258
- log(` Total routing + metrics overhead: ${overhead.toFixed(6)}ms`, 'green');
259
- log(` Negligible impact on request latency (<0.02ms)`, 'green');
260
-
261
- return {
262
- fullTime,
263
- fullThroughput,
264
- overhead
265
- };
266
- }
267
-
268
- // =============================================================================
269
- // TEST 4: Helper Function Performance
270
- // =============================================================================
271
- function testHelperFunctions() {
272
- section('TEST 4: Helper Function Performance');
273
-
274
- delete require.cache[require.resolve('../src/clients/databricks')];
275
-
276
- log('\n Benchmarking helper functions...', 'cyan');
277
-
278
- // Test categorizeFailure (we'll simulate it)
279
- const categorizeFailure = (error) => {
280
- if (error.name === 'CircuitBreakerError' || error.code === 'circuit_breaker_open') {
281
- return 'circuit_breaker';
282
- }
283
- if (error.name === 'AbortError' || error.code === 'ETIMEDOUT') {
284
- return 'timeout';
285
- }
286
- if (error.message?.includes('not configured') ||
287
- error.message?.includes('not available') ||
288
- error.code === 'ECONNREFUSED') {
289
- return 'service_unavailable';
290
- }
291
- return 'error';
292
- };
293
-
294
- const testErrors = [
295
- { name: 'CircuitBreakerError', message: 'Circuit breaker open' },
296
- { name: 'AbortError', message: 'Timeout' },
297
- { code: 'ECONNREFUSED', message: 'Connection refused' },
298
- { message: 'Generic error' }
299
- ];
300
-
301
- const { duration: categorizeTime, throughput: categorizeThroughput } = benchmark(
302
- 'Categorize failure',
303
- 100000,
304
- () => {
305
- testErrors.forEach(err => categorizeFailure(err));
306
- }
307
- );
308
-
309
- log(` Categorize failure: ${categorizeTime.toFixed(2)}ms for 400k operations`, 'cyan');
310
- log(` Average: ${(categorizeTime / 400000).toFixed(6)}ms per categorization`, 'blue');
311
- log(` Throughput: ${(categorizeThroughput * 4).toLocaleString()} ops/sec`, 'green');
312
-
313
- // Test estimateCostSavings
314
- const estimateCostSavings = (inputTokens, outputTokens) => {
315
- const INPUT_COST_PER_1M = 3.00;
316
- const OUTPUT_COST_PER_1M = 15.00;
317
- const inputCost = (inputTokens / 1_000_000) * INPUT_COST_PER_1M;
318
- const outputCost = (outputTokens / 1_000_000) * OUTPUT_COST_PER_1M;
319
- return inputCost + outputCost;
320
- };
321
-
322
- const { duration: costCalcTime, throughput: costCalcThroughput } = benchmark(
323
- 'Estimate cost savings',
324
- 100000,
325
- () => estimateCostSavings(1000, 500)
326
- );
327
-
328
- log(` Cost estimation: ${costCalcTime.toFixed(2)}ms for 100k calculations`, 'cyan');
329
- log(` Average: ${(costCalcTime / 100000).toFixed(6)}ms per calculation`, 'blue');
330
- log(` Throughput: ${costCalcThroughput.toLocaleString()} ops/sec`, 'green');
331
-
332
- log('\n Analysis:', 'yellow');
333
- log(` Helper functions add negligible overhead (<0.001ms)`, 'green');
334
- log(` No performance impact from utility functions`, 'green');
335
-
336
- return {
337
- categorizeTime,
338
- costCalcTime
339
- };
340
- }
341
-
342
- // =============================================================================
343
- // FINAL REPORT
344
- // =============================================================================
345
- function printFinalReport(results) {
346
- section('HYBRID ROUTING PERFORMANCE SUMMARY');
347
-
348
- console.log('\n');
349
- console.log('+---------------------------------------------------------+');
350
- console.log('| HYBRID ROUTING PERFORMANCE |');
351
- console.log('+---------------------------------------------------------+');
352
-
353
- log(`| 1. Routing Decisions |`, 'bright');
354
- log(`| Average: ${results.routing.avgDecisionTime.toFixed(6)}ms per decision |`, 'cyan');
355
- log(`| Overhead: ${colors.green}Negligible (<0.01ms)${colors.reset} |`);
356
-
357
- console.log('+---------------------------------------------------------+');
358
-
359
- log(`| 2. Metrics Collection |`, 'bright');
360
- log(`| Average: ${results.metrics.avgMetricsTime.toFixed(6)}ms per operation |`, 'cyan');
361
- log(`| Overhead: ${colors.green}Negligible (<0.01ms)${colors.reset} |`);
362
-
363
- console.log('+---------------------------------------------------------+');
364
-
365
- log(`| 3. Full Routing Stack |`, 'bright');
366
- log(`| Average: ${results.combined.overhead.toFixed(6)}ms per request |`, 'cyan');
367
- log(`| Throughput: ${results.combined.fullThroughput.toLocaleString()} ops/sec |`, 'cyan');
368
- log(`| Impact: ${colors.green}Negligible (<0.02ms)${colors.reset} |`);
369
-
370
- console.log('+---------------------------------------------------------+');
371
-
372
- log(`| 4. Helper Functions |`, 'bright');
373
- log(`| Overhead: ${colors.green}Negligible (<0.001ms)${colors.reset} |`);
374
-
375
- console.log('+---------------------------------------------------------+');
376
-
377
- // Overall assessment
378
- console.log('\n');
379
- log('Overall Performance Assessment:', 'bright');
380
- log(' Routing overhead: <0.01ms per request', 'green');
381
- log(' Metrics overhead: <0.01ms per request', 'green');
382
- log(' Combined overhead: <0.02ms per request', 'green');
383
- log(' No measurable impact on API latency', 'green');
384
-
385
- console.log('\n Expected Real-World Performance:');
386
- log(' Ollama (local): ~500-1000ms per request', 'cyan');
387
- log(' Cloud (Databricks): ~1500-2000ms per request', 'cyan');
388
- log(' Routing overhead: ~0.02ms (0.001-0.002% of total)', 'cyan');
389
- log(' Latency savings with Ollama: 40-60% faster', 'green');
390
- log(' Cost savings with Ollama: 100% (free)', 'green');
391
-
392
- console.log('\n');
393
- log('Conclusion: Hybrid routing adds negligible overhead while', 'bright');
394
- log(' providing significant latency and cost improvements!', 'bright');
395
- console.log('\n');
396
- }
397
-
398
- // =============================================================================
399
- // RUN ALL TESTS
400
- // =============================================================================
401
- async function runAllTests() {
402
- log('\n Starting Hybrid Routing Performance Test Suite\n', 'bright');
403
-
404
- try {
405
- const results = {
406
- routing: testRoutingDecisionPerformance(),
407
- metrics: testMetricsOverhead(),
408
- combined: testCombinedStack(),
409
- helpers: testHelperFunctions()
410
- };
411
-
412
- printFinalReport(results);
413
-
414
- log('\n All performance tests completed successfully!\n', 'green');
415
- process.exit(0);
416
- } catch (error) {
417
- log(`\n Performance test suite failed: ${error.message}\n`, 'red');
418
- console.error(error);
419
- process.exit(1);
420
- }
421
- }
422
-
423
- // Run tests
424
- if (require.main === module) {
425
- runAllTests();
426
- }
427
-
428
- module.exports = { runAllTests };