@duckmind/deepquark-darwin-arm64 0.9.83 → 0.9.90

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/.deepquark/skills/bundled/knowledge-graph/SKILL.md +385 -0
  2. package/.deepquark/skills/bundled/knowledge-graph/STANDARDS.md +461 -0
  3. package/.deepquark/skills/bundled/knowledge-graph/lib/cli.ts +588 -0
  4. package/.deepquark/skills/bundled/knowledge-graph/lib/config.ts +630 -0
  5. package/.deepquark/skills/bundled/knowledge-graph/lib/connection-profile.ts +629 -0
  6. package/.deepquark/skills/bundled/knowledge-graph/lib/container.ts +756 -0
  7. package/.deepquark/skills/bundled/knowledge-graph/lib/mcp-client.ts +1310 -0
  8. package/.deepquark/skills/bundled/knowledge-graph/lib/output-formatter.ts +997 -0
  9. package/.deepquark/skills/bundled/knowledge-graph/lib/token-metrics.ts +335 -0
  10. package/.deepquark/skills/bundled/knowledge-graph/lib/transformation-log.ts +137 -0
  11. package/.deepquark/skills/bundled/knowledge-graph/lib/wrapper-config.ts +113 -0
  12. package/.deepquark/skills/bundled/knowledge-graph/server/.env.example +129 -0
  13. package/.deepquark/skills/bundled/knowledge-graph/server/compare-embeddings.ts +175 -0
  14. package/.deepquark/skills/bundled/knowledge-graph/server/config-falkordb.yaml +108 -0
  15. package/.deepquark/skills/bundled/knowledge-graph/server/config-neo4j.yaml +111 -0
  16. package/.deepquark/skills/bundled/knowledge-graph/server/diagnose.ts +483 -0
  17. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-falkordb-dev.yml +146 -0
  18. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-falkordb.yml +151 -0
  19. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-neo4j-dev-local.yml +161 -0
  20. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-neo4j-dev.yml +161 -0
  21. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-neo4j.yml +169 -0
  22. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-production.yml +128 -0
  23. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-test.yml +10 -0
  24. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose.yml +84 -0
  25. package/.deepquark/skills/bundled/knowledge-graph/server/entrypoint.sh +40 -0
  26. package/.deepquark/skills/bundled/knowledge-graph/server/install.ts +2054 -0
  27. package/.deepquark/skills/bundled/knowledge-graph/server/podman-compose-falkordb.yml +78 -0
  28. package/.deepquark/skills/bundled/knowledge-graph/server/podman-compose-neo4j.yml +88 -0
  29. package/.deepquark/skills/bundled/knowledge-graph/server/podman-compose.yml +83 -0
  30. package/.deepquark/skills/bundled/knowledge-graph/server/test-all-llms-mcp.ts +387 -0
  31. package/.deepquark/skills/bundled/knowledge-graph/server/test-embedding-models.ts +201 -0
  32. package/.deepquark/skills/bundled/knowledge-graph/server/test-embedding-providers.ts +641 -0
  33. package/.deepquark/skills/bundled/knowledge-graph/server/test-graphiti-model.ts +217 -0
  34. package/.deepquark/skills/bundled/knowledge-graph/server/test-grok-correct.ts +141 -0
  35. package/.deepquark/skills/bundled/knowledge-graph/server/test-grok-llms-mcp.ts +386 -0
  36. package/.deepquark/skills/bundled/knowledge-graph/server/test-grok-models.ts +173 -0
  37. package/.deepquark/skills/bundled/knowledge-graph/server/test-llama-extraction.ts +188 -0
  38. package/.deepquark/skills/bundled/knowledge-graph/server/test-mcp-final.ts +240 -0
  39. package/.deepquark/skills/bundled/knowledge-graph/server/test-mcp-live.ts +187 -0
  40. package/.deepquark/skills/bundled/knowledge-graph/server/test-mcp-session.ts +127 -0
  41. package/.deepquark/skills/bundled/knowledge-graph/server/test-model-combinations.ts +316 -0
  42. package/.deepquark/skills/bundled/knowledge-graph/server/test-ollama-models.ts +228 -0
  43. package/.deepquark/skills/bundled/knowledge-graph/server/test-openrouter-models.ts +460 -0
  44. package/.deepquark/skills/bundled/knowledge-graph/server/test-real-life-mcp.ts +311 -0
  45. package/.deepquark/skills/bundled/knowledge-graph/server/test-search-debug.ts +199 -0
  46. package/.deepquark/skills/bundled/knowledge-graph/tools/Install.md +104 -0
  47. package/.deepquark/skills/bundled/knowledge-graph/tools/README.md +120 -0
  48. package/.deepquark/skills/bundled/knowledge-graph/tools/knowledge-cli.ts +996 -0
  49. package/.deepquark/skills/bundled/knowledge-graph/tools/server-cli.ts +531 -0
  50. package/.deepquark/skills/bundled/knowledge-graph/workflows/BulkImport.md +514 -0
  51. package/.deepquark/skills/bundled/knowledge-graph/workflows/CaptureEpisode.md +242 -0
  52. package/.deepquark/skills/bundled/knowledge-graph/workflows/ClearGraph.md +392 -0
  53. package/.deepquark/skills/bundled/knowledge-graph/workflows/GetRecent.md +352 -0
  54. package/.deepquark/skills/bundled/knowledge-graph/workflows/GetStatus.md +373 -0
  55. package/.deepquark/skills/bundled/knowledge-graph/workflows/HealthReport.md +212 -0
  56. package/.deepquark/skills/bundled/knowledge-graph/workflows/InvestigateEntity.md +142 -0
  57. package/.deepquark/skills/bundled/knowledge-graph/workflows/OntologyManagement.md +201 -0
  58. package/.deepquark/skills/bundled/knowledge-graph/workflows/RunMaintenance.md +302 -0
  59. package/.deepquark/skills/bundled/knowledge-graph/workflows/SearchByDate.md +255 -0
  60. package/.deepquark/skills/bundled/knowledge-graph/workflows/SearchFacts.md +382 -0
  61. package/.deepquark/skills/bundled/knowledge-graph/workflows/SearchKnowledge.md +374 -0
  62. package/.deepquark/skills/bundled/knowledge-graph/workflows/StixImport.md +212 -0
  63. package/bin/deepquark +0 -0
  64. package/package.json +1 -1
  65. package/.deepquark/skills/bundled/ge-payroll/SKILL.md +0 -153
  66. package/.deepquark/skills/bundled/ge-payroll/evals/evals.json +0 -23
  67. package/.deepquark/skills/bundled/ge-payroll/references/pain-points-improvements.md +0 -106
  68. package/.deepquark/skills/bundled/ge-payroll/references/process-detail.md +0 -217
  69. package/.deepquark/skills/bundled/ge-payroll/references/raci-stakeholders.md +0 -85
  70. package/.deepquark/skills/bundled/ge-payroll/references/timeline-mandays.md +0 -64
@@ -0,0 +1,460 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Test OpenRouter LLM models for cost vs performance
4
+ * Evaluates: entity extraction quality, response time, and cost
5
+ *
6
+ * Usage: OPENROUTER_API_KEY=sk-or-... bun run test-openrouter-models.ts
7
+ */
8
+
9
+ const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
10
+ const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1';
11
+
12
+ if (!OPENROUTER_API_KEY) {
13
+ console.error('❌ OPENROUTER_API_KEY environment variable required');
14
+ console.error(' Get your key at: https://openrouter.ai/keys');
15
+ console.error(' Usage: OPENROUTER_API_KEY=sk-or-... bun run test-openrouter-models.ts');
16
+ process.exit(1);
17
+ }
18
+
19
+ // Models to test with their pricing (per 1M tokens as of Jan 2026)
20
+ // Prices from https://openrouter.ai/models
21
+ const LLM_MODELS = [
22
+ // Top tier - highest quality
23
+ { id: 'anthropic/claude-sonnet-4', name: 'Claude Sonnet 4', inputPrice: 3.0, outputPrice: 15.0 },
24
+ { id: 'openai/gpt-4o', name: 'GPT-4o', inputPrice: 2.5, outputPrice: 10.0 },
25
+ {
26
+ id: 'google/gemini-2.0-flash-001',
27
+ name: 'Gemini 2.0 Flash',
28
+ inputPrice: 0.1,
29
+ outputPrice: 0.4,
30
+ },
31
+
32
+ // Mid tier - good balance
33
+ { id: 'openai/gpt-4o-mini', name: 'GPT-4o Mini', inputPrice: 0.15, outputPrice: 0.6 },
34
+ { id: 'anthropic/claude-3.5-haiku', name: 'Claude 3.5 Haiku', inputPrice: 0.8, outputPrice: 4.0 },
35
+ { id: 'google/gemini-flash-1.5', name: 'Gemini 1.5 Flash', inputPrice: 0.075, outputPrice: 0.3 },
36
+
37
+ // Budget tier - cost effective
38
+ {
39
+ id: 'meta-llama/llama-3.3-70b-instruct',
40
+ name: 'Llama 3.3 70B',
41
+ inputPrice: 0.4,
42
+ outputPrice: 0.4,
43
+ },
44
+ {
45
+ id: 'meta-llama/llama-3.1-8b-instruct',
46
+ name: 'Llama 3.1 8B',
47
+ inputPrice: 0.055,
48
+ outputPrice: 0.055,
49
+ },
50
+ {
51
+ id: 'mistralai/mistral-7b-instruct',
52
+ name: 'Mistral 7B',
53
+ inputPrice: 0.055,
54
+ outputPrice: 0.055,
55
+ },
56
+ { id: 'qwen/qwen-2.5-72b-instruct', name: 'Qwen 2.5 72B', inputPrice: 0.35, outputPrice: 0.4 },
57
+
58
+ // Deep reasoning (may be slower)
59
+ { id: 'deepseek/deepseek-r1', name: 'DeepSeek R1', inputPrice: 0.55, outputPrice: 2.19 },
60
+ { id: 'deepseek/deepseek-chat', name: 'DeepSeek V3', inputPrice: 0.14, outputPrice: 0.28 },
61
+ ];
62
+
63
+ // Test prompts for entity extraction (similar to Graphiti requirements)
64
+ const TEST_CASES = [
65
+ {
66
+ name: 'Basic Entity Extraction',
67
+ text: 'John Smith works at Acme Corp in New York. He met Sarah Jones yesterday to discuss the Q4 budget.',
68
+ expectedEntities: ['John Smith', 'Acme Corp', 'New York', 'Sarah Jones'],
69
+ expectedRelationships: ['works_at', 'located_in', 'met'],
70
+ },
71
+ {
72
+ name: 'Technical Content',
73
+ text: 'The PAI system uses Neo4j for graph storage and OpenAI for embeddings. It was created by Daniel Miessler to help people build personalized AI infrastructure.',
74
+ expectedEntities: ['PAI', 'Neo4j', 'OpenAI', 'Daniel Miessler'],
75
+ expectedRelationships: ['uses', 'created_by'],
76
+ },
77
+ {
78
+ name: 'Complex Relationships',
79
+ text: `Alice, the CTO of TechStart, acquired DataFlow Inc last month. Bob, who was DataFlow's CEO, now reports to Alice. The deal was worth $50M.`,
80
+ expectedEntities: ['Alice', 'TechStart', 'DataFlow Inc', 'Bob'],
81
+ expectedRelationships: ['cto_of', 'acquired', 'reports_to'],
82
+ },
83
+ ];
84
+
85
+ const EXTRACTION_PROMPT = (
86
+ text: string
87
+ ) => `Extract entities and relationships from this text. Return ONLY valid JSON, no other text.
88
+
89
+ Text: "${text}"
90
+
91
+ Return this exact format:
92
+ {"entities": [{"name": "string", "type": "PERSON|ORGANIZATION|LOCATION|CONCEPT"}], "relationships": [{"source": "string", "target": "string", "type": "string"}]}`;
93
+
94
+ interface ModelResult {
95
+ model: string;
96
+ modelName: string;
97
+ inputPrice: number;
98
+ outputPrice: number;
99
+ avgResponseMs: number;
100
+ avgInputTokens: number;
101
+ avgOutputTokens: number;
102
+ costPer1000Calls: number;
103
+ qualityScore: number;
104
+ passRate: number;
105
+ results: TestCaseResult[];
106
+ error?: string;
107
+ }
108
+
109
+ interface TestCaseResult {
110
+ name: string;
111
+ passed: boolean;
112
+ entities: number;
113
+ relationships: number;
114
+ responseMs: number;
115
+ inputTokens: number;
116
+ outputTokens: number;
117
+ error?: string;
118
+ }
119
+
120
+ function extractJSON(text: string): { entities: any[]; relationships: any[] } | null {
121
+ // Remove markdown code blocks
122
+ let clean = text.replace(/```json\n?/g, '').replace(/```\n?/g, '');
123
+
124
+ // Remove thinking tags (DeepSeek uses these)
125
+ clean = clean.replace(/<think>[\s\S]*?<\/think>/g, '');
126
+ clean = clean.trim();
127
+
128
+ // Try direct parse
129
+ try {
130
+ const obj = JSON.parse(clean);
131
+ if (obj.entities && obj.relationships) return obj;
132
+ } catch {}
133
+
134
+ // Find JSON in response
135
+ const jsonMatch = clean.match(/\{[\s\S]*"entities"[\s\S]*"relationships"[\s\S]*\}/);
136
+ if (jsonMatch) {
137
+ try {
138
+ const obj = JSON.parse(jsonMatch[0]);
139
+ if (obj.entities && obj.relationships) return obj;
140
+ } catch {}
141
+ }
142
+
143
+ return null;
144
+ }
145
+
146
+ function calculateQuality(
147
+ result: { entities: any[]; relationships: any[] },
148
+ expected: (typeof TEST_CASES)[0]
149
+ ): number {
150
+ // Score based on: found expected entities + found relationships + reasonable count
151
+ let score = 0;
152
+ const maxScore = 100;
153
+
154
+ // Entity coverage (40 points)
155
+ const foundEntities = result.entities.map((e) => e.name.toLowerCase());
156
+ const expectedFound = expected.expectedEntities.filter((e) =>
157
+ foundEntities.some((f) => f.includes(e.toLowerCase()) || e.toLowerCase().includes(f))
158
+ );
159
+ score += (expectedFound.length / expected.expectedEntities.length) * 40;
160
+
161
+ // Relationship presence (30 points)
162
+ const hasRelationships = result.relationships.length > 0;
163
+ const reasonableRelCount = result.relationships.length >= 1 && result.relationships.length <= 10;
164
+ if (hasRelationships) score += 15;
165
+ if (reasonableRelCount) score += 15;
166
+
167
+ // Valid structure (30 points)
168
+ const validEntities = result.entities.every((e) => e.name && typeof e.name === 'string');
169
+ const validRels = result.relationships.every((r) => r.source && r.target && r.type);
170
+ if (validEntities) score += 15;
171
+ if (validRels) score += 15;
172
+
173
+ return Math.min(score, maxScore);
174
+ }
175
+
176
+ async function testModel(model: (typeof LLM_MODELS)[0]): Promise<ModelResult> {
177
+ console.log(`\n🔄 Testing: ${model.name} (${model.id})`);
178
+ console.log(` ${'─'.repeat(50)}`);
179
+
180
+ const results: TestCaseResult[] = [];
181
+
182
+ for (const testCase of TEST_CASES) {
183
+ const start = Date.now();
184
+
185
+ try {
186
+ const response = await fetch(`${OPENROUTER_BASE_URL}/chat/completions`, {
187
+ method: 'POST',
188
+ headers: {
189
+ Authorization: `Bearer ${OPENROUTER_API_KEY}`,
190
+ 'Content-Type': 'application/json',
191
+ 'HTTP-Referer': 'https://pai.dev',
192
+ 'X-Title': 'PAI Knowledge System Test',
193
+ },
194
+ body: JSON.stringify({
195
+ model: model.id,
196
+ messages: [{ role: 'user', content: EXTRACTION_PROMPT(testCase.text) }],
197
+ temperature: 0.1,
198
+ max_tokens: 1000,
199
+ }),
200
+ });
201
+
202
+ const responseMs = Date.now() - start;
203
+
204
+ if (!response.ok) {
205
+ const error = await response.text();
206
+ console.log(` ❌ ${testCase.name}: HTTP ${response.status}`);
207
+ results.push({
208
+ name: testCase.name,
209
+ passed: false,
210
+ entities: 0,
211
+ relationships: 0,
212
+ responseMs,
213
+ inputTokens: 0,
214
+ outputTokens: 0,
215
+ error: `HTTP ${response.status}: ${error.slice(0, 100)}`,
216
+ });
217
+ continue;
218
+ }
219
+
220
+ const data = (await response.json()) as any;
221
+ const content = data.choices?.[0]?.message?.content || '';
222
+ const inputTokens = data.usage?.prompt_tokens || 0;
223
+ const outputTokens = data.usage?.completion_tokens || 0;
224
+
225
+ const json = extractJSON(content);
226
+
227
+ if (json) {
228
+ const quality = calculateQuality(json, testCase);
229
+ console.log(
230
+ ` ✅ ${testCase.name}: ${json.entities.length} entities, ${json.relationships.length} rels (${responseMs}ms, quality: ${quality.toFixed(0)}%)`
231
+ );
232
+ results.push({
233
+ name: testCase.name,
234
+ passed: true,
235
+ entities: json.entities.length,
236
+ relationships: json.relationships.length,
237
+ responseMs,
238
+ inputTokens,
239
+ outputTokens,
240
+ });
241
+ } else {
242
+ console.log(` ❌ ${testCase.name}: Invalid JSON (${responseMs}ms)`);
243
+ results.push({
244
+ name: testCase.name,
245
+ passed: false,
246
+ entities: 0,
247
+ relationships: 0,
248
+ responseMs,
249
+ inputTokens,
250
+ outputTokens,
251
+ error: 'Invalid JSON structure',
252
+ });
253
+ }
254
+ } catch (err: any) {
255
+ const responseMs = Date.now() - start;
256
+ console.log(` ❌ ${testCase.name}: ${err.message}`);
257
+ results.push({
258
+ name: testCase.name,
259
+ passed: false,
260
+ entities: 0,
261
+ relationships: 0,
262
+ responseMs,
263
+ inputTokens: 0,
264
+ outputTokens: 0,
265
+ error: err.message,
266
+ });
267
+ }
268
+
269
+ // Rate limiting - wait between calls
270
+ await new Promise((r) => setTimeout(r, 500));
271
+ }
272
+
273
+ // Calculate aggregates
274
+ const passedTests = results.filter((r) => r.passed);
275
+ const avgResponseMs =
276
+ results.length > 0
277
+ ? Math.round(results.reduce((a, r) => a + r.responseMs, 0) / results.length)
278
+ : 0;
279
+ const avgInputTokens =
280
+ results.length > 0
281
+ ? Math.round(results.reduce((a, r) => a + r.inputTokens, 0) / results.length)
282
+ : 0;
283
+ const avgOutputTokens =
284
+ results.length > 0
285
+ ? Math.round(results.reduce((a, r) => a + r.outputTokens, 0) / results.length)
286
+ : 0;
287
+
288
+ // Cost per 1000 API calls
289
+ const costPer1000 =
290
+ (avgInputTokens / 1_000_000) * model.inputPrice * 1000 +
291
+ (avgOutputTokens / 1_000_000) * model.outputPrice * 1000;
292
+
293
+ // Quality score (average across passed tests)
294
+ let qualityScore = 0;
295
+ if (passedTests.length > 0) {
296
+ // Re-calculate quality for passed tests
297
+ for (let i = 0; i < TEST_CASES.length; i++) {
298
+ if (results[i].passed) {
299
+ qualityScore += calculateQuality(
300
+ {
301
+ entities: Array(results[i].entities).fill({ name: 'x' }),
302
+ relationships: Array(results[i].relationships).fill({
303
+ source: 'a',
304
+ target: 'b',
305
+ type: 'c',
306
+ }),
307
+ },
308
+ TEST_CASES[i]
309
+ );
310
+ }
311
+ }
312
+ qualityScore = qualityScore / passedTests.length;
313
+ }
314
+
315
+ return {
316
+ model: model.id,
317
+ modelName: model.name,
318
+ inputPrice: model.inputPrice,
319
+ outputPrice: model.outputPrice,
320
+ avgResponseMs,
321
+ avgInputTokens,
322
+ avgOutputTokens,
323
+ costPer1000Calls: costPer1000,
324
+ qualityScore,
325
+ passRate: (passedTests.length / results.length) * 100,
326
+ results,
327
+ };
328
+ }
329
+
330
+ function printComparisonTable(results: ModelResult[]) {
331
+ console.log(`\n${'═'.repeat(120)}`);
332
+ console.log('📊 COST VS PERFORMANCE COMPARISON TABLE');
333
+ console.log(`${'═'.repeat(120)}\n`);
334
+
335
+ // Sort by value score (quality / cost)
336
+ const sortedResults = [...results]
337
+ .filter((r) => r.passRate > 0)
338
+ .sort((a, b) => {
339
+ const valueA = a.costPer1000Calls > 0 ? a.qualityScore / a.costPer1000Calls : 0;
340
+ const valueB = b.costPer1000Calls > 0 ? b.qualityScore / b.costPer1000Calls : 0;
341
+ return valueB - valueA;
342
+ });
343
+
344
+ // Header
345
+ console.log(
346
+ '| Rank | Model | Pass Rate | Quality | Avg Time | Input$/M | Output$/M | Cost/1K Calls | Value Score |'
347
+ );
348
+ console.log(
349
+ '|------|--------------------------|-----------|---------|----------|----------|-----------|---------------|-------------|'
350
+ );
351
+
352
+ sortedResults.forEach((r, i) => {
353
+ const valueScore =
354
+ r.costPer1000Calls > 0 ? (r.qualityScore / r.costPer1000Calls).toFixed(1) : '∞';
355
+ console.log(
356
+ `| ${(i + 1).toString().padStart(4)} | ${r.modelName.padEnd(24)} | ${r.passRate.toFixed(0).padStart(7)}% | ${r.qualityScore.toFixed(0).padStart(5)}% | ${(`${r.avgResponseMs}ms`).padStart(8)} | $${r.inputPrice.toFixed(2).padStart(6)} | $${r.outputPrice.toFixed(2).padStart(9)} | $${r.costPer1000Calls.toFixed(4).padStart(11)} | ${valueScore.toString().padStart(11)} |`
357
+ );
358
+ });
359
+
360
+ // Failed models
361
+ const failed = results.filter((r) => r.passRate === 0);
362
+ if (failed.length > 0) {
363
+ console.log('\n❌ Failed Models:');
364
+ failed.forEach((r) =>
365
+ console.log(` - ${r.modelName}: ${r.results[0]?.error || 'Unknown error'}`)
366
+ );
367
+ }
368
+
369
+ // Recommendations
370
+ console.log(`\n${'─'.repeat(120)}`);
371
+ console.log('📌 RECOMMENDATIONS');
372
+ console.log('─'.repeat(120));
373
+
374
+ const bestQuality = sortedResults.reduce(
375
+ (a, b) => (a.qualityScore > b.qualityScore ? a : b),
376
+ sortedResults[0]
377
+ );
378
+ const bestValue = sortedResults[0]; // Already sorted by value
379
+ const cheapest = sortedResults.reduce(
380
+ (a, b) => (a.costPer1000Calls < b.costPer1000Calls ? a : b),
381
+ sortedResults[0]
382
+ );
383
+ const fastest = sortedResults.reduce(
384
+ (a, b) => (a.avgResponseMs < b.avgResponseMs ? a : b),
385
+ sortedResults[0]
386
+ );
387
+
388
+ console.log(
389
+ `\n🏆 Best Quality: ${bestQuality?.modelName} (${bestQuality?.qualityScore.toFixed(0)}% quality)`
390
+ );
391
+ console.log(
392
+ `💰 Best Value: ${bestValue?.modelName} (${bestValue?.qualityScore.toFixed(0)}% quality at $${bestValue?.costPer1000Calls.toFixed(4)}/1K calls)`
393
+ );
394
+ console.log(
395
+ `🪙 Cheapest: ${cheapest?.modelName} ($${cheapest?.costPer1000Calls.toFixed(4)}/1K calls)`
396
+ );
397
+ console.log(`⚡ Fastest: ${fastest?.modelName} (${fastest?.avgResponseMs}ms avg)`);
398
+
399
+ // Hybrid recommendation
400
+ console.log('\n🎯 RECOMMENDED FOR KNOWLEDGE SYSTEM:');
401
+ console.log(` Primary (best balance): ${bestValue?.modelName}`);
402
+ console.log(` Budget option: ${cheapest?.modelName}`);
403
+ console.log(` Premium option: ${bestQuality?.modelName}`);
404
+ }
405
+
406
+ async function main() {
407
+ console.log('═'.repeat(60));
408
+ console.log('🧪 OpenRouter LLM Model Comparison Test');
409
+ console.log(` Testing ${LLM_MODELS.length} models for entity extraction`);
410
+ console.log('═'.repeat(60));
411
+
412
+ const results: ModelResult[] = [];
413
+
414
+ for (const model of LLM_MODELS) {
415
+ try {
416
+ const result = await testModel(model);
417
+ results.push(result);
418
+ } catch (err: any) {
419
+ console.log(`\n❌ ${model.name}: ${err.message}`);
420
+ results.push({
421
+ model: model.id,
422
+ modelName: model.name,
423
+ inputPrice: model.inputPrice,
424
+ outputPrice: model.outputPrice,
425
+ avgResponseMs: 0,
426
+ avgInputTokens: 0,
427
+ avgOutputTokens: 0,
428
+ costPer1000Calls: 0,
429
+ qualityScore: 0,
430
+ passRate: 0,
431
+ results: [],
432
+ error: err.message,
433
+ });
434
+ }
435
+
436
+ // Rate limiting between models
437
+ await new Promise((r) => setTimeout(r, 1000));
438
+ }
439
+
440
+ // Print comparison table
441
+ printComparisonTable(results);
442
+
443
+ // Save results to file
444
+ const outputPath = new URL('./openrouter-test-results.json', import.meta.url).pathname;
445
+ await Bun.write(
446
+ outputPath,
447
+ JSON.stringify(
448
+ {
449
+ results,
450
+ timestamp: new Date().toISOString(),
451
+ testCases: TEST_CASES.map((t) => t.name),
452
+ },
453
+ null,
454
+ 2
455
+ )
456
+ );
457
+ console.log(`\n📁 Results saved to: ${outputPath}`);
458
+ }
459
+
460
+ main().catch(console.error);