@duckmind/deepquark-darwin-arm64 0.9.83 โ†’ 0.9.90

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/.deepquark/skills/bundled/knowledge-graph/SKILL.md +385 -0
  2. package/.deepquark/skills/bundled/knowledge-graph/STANDARDS.md +461 -0
  3. package/.deepquark/skills/bundled/knowledge-graph/lib/cli.ts +588 -0
  4. package/.deepquark/skills/bundled/knowledge-graph/lib/config.ts +630 -0
  5. package/.deepquark/skills/bundled/knowledge-graph/lib/connection-profile.ts +629 -0
  6. package/.deepquark/skills/bundled/knowledge-graph/lib/container.ts +756 -0
  7. package/.deepquark/skills/bundled/knowledge-graph/lib/mcp-client.ts +1310 -0
  8. package/.deepquark/skills/bundled/knowledge-graph/lib/output-formatter.ts +997 -0
  9. package/.deepquark/skills/bundled/knowledge-graph/lib/token-metrics.ts +335 -0
  10. package/.deepquark/skills/bundled/knowledge-graph/lib/transformation-log.ts +137 -0
  11. package/.deepquark/skills/bundled/knowledge-graph/lib/wrapper-config.ts +113 -0
  12. package/.deepquark/skills/bundled/knowledge-graph/server/.env.example +129 -0
  13. package/.deepquark/skills/bundled/knowledge-graph/server/compare-embeddings.ts +175 -0
  14. package/.deepquark/skills/bundled/knowledge-graph/server/config-falkordb.yaml +108 -0
  15. package/.deepquark/skills/bundled/knowledge-graph/server/config-neo4j.yaml +111 -0
  16. package/.deepquark/skills/bundled/knowledge-graph/server/diagnose.ts +483 -0
  17. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-falkordb-dev.yml +146 -0
  18. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-falkordb.yml +151 -0
  19. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-neo4j-dev-local.yml +161 -0
  20. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-neo4j-dev.yml +161 -0
  21. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-neo4j.yml +169 -0
  22. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-production.yml +128 -0
  23. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose-test.yml +10 -0
  24. package/.deepquark/skills/bundled/knowledge-graph/server/docker-compose.yml +84 -0
  25. package/.deepquark/skills/bundled/knowledge-graph/server/entrypoint.sh +40 -0
  26. package/.deepquark/skills/bundled/knowledge-graph/server/install.ts +2054 -0
  27. package/.deepquark/skills/bundled/knowledge-graph/server/podman-compose-falkordb.yml +78 -0
  28. package/.deepquark/skills/bundled/knowledge-graph/server/podman-compose-neo4j.yml +88 -0
  29. package/.deepquark/skills/bundled/knowledge-graph/server/podman-compose.yml +83 -0
  30. package/.deepquark/skills/bundled/knowledge-graph/server/test-all-llms-mcp.ts +387 -0
  31. package/.deepquark/skills/bundled/knowledge-graph/server/test-embedding-models.ts +201 -0
  32. package/.deepquark/skills/bundled/knowledge-graph/server/test-embedding-providers.ts +641 -0
  33. package/.deepquark/skills/bundled/knowledge-graph/server/test-graphiti-model.ts +217 -0
  34. package/.deepquark/skills/bundled/knowledge-graph/server/test-grok-correct.ts +141 -0
  35. package/.deepquark/skills/bundled/knowledge-graph/server/test-grok-llms-mcp.ts +386 -0
  36. package/.deepquark/skills/bundled/knowledge-graph/server/test-grok-models.ts +173 -0
  37. package/.deepquark/skills/bundled/knowledge-graph/server/test-llama-extraction.ts +188 -0
  38. package/.deepquark/skills/bundled/knowledge-graph/server/test-mcp-final.ts +240 -0
  39. package/.deepquark/skills/bundled/knowledge-graph/server/test-mcp-live.ts +187 -0
  40. package/.deepquark/skills/bundled/knowledge-graph/server/test-mcp-session.ts +127 -0
  41. package/.deepquark/skills/bundled/knowledge-graph/server/test-model-combinations.ts +316 -0
  42. package/.deepquark/skills/bundled/knowledge-graph/server/test-ollama-models.ts +228 -0
  43. package/.deepquark/skills/bundled/knowledge-graph/server/test-openrouter-models.ts +460 -0
  44. package/.deepquark/skills/bundled/knowledge-graph/server/test-real-life-mcp.ts +311 -0
  45. package/.deepquark/skills/bundled/knowledge-graph/server/test-search-debug.ts +199 -0
  46. package/.deepquark/skills/bundled/knowledge-graph/tools/Install.md +104 -0
  47. package/.deepquark/skills/bundled/knowledge-graph/tools/README.md +120 -0
  48. package/.deepquark/skills/bundled/knowledge-graph/tools/knowledge-cli.ts +996 -0
  49. package/.deepquark/skills/bundled/knowledge-graph/tools/server-cli.ts +531 -0
  50. package/.deepquark/skills/bundled/knowledge-graph/workflows/BulkImport.md +514 -0
  51. package/.deepquark/skills/bundled/knowledge-graph/workflows/CaptureEpisode.md +242 -0
  52. package/.deepquark/skills/bundled/knowledge-graph/workflows/ClearGraph.md +392 -0
  53. package/.deepquark/skills/bundled/knowledge-graph/workflows/GetRecent.md +352 -0
  54. package/.deepquark/skills/bundled/knowledge-graph/workflows/GetStatus.md +373 -0
  55. package/.deepquark/skills/bundled/knowledge-graph/workflows/HealthReport.md +212 -0
  56. package/.deepquark/skills/bundled/knowledge-graph/workflows/InvestigateEntity.md +142 -0
  57. package/.deepquark/skills/bundled/knowledge-graph/workflows/OntologyManagement.md +201 -0
  58. package/.deepquark/skills/bundled/knowledge-graph/workflows/RunMaintenance.md +302 -0
  59. package/.deepquark/skills/bundled/knowledge-graph/workflows/SearchByDate.md +255 -0
  60. package/.deepquark/skills/bundled/knowledge-graph/workflows/SearchFacts.md +382 -0
  61. package/.deepquark/skills/bundled/knowledge-graph/workflows/SearchKnowledge.md +374 -0
  62. package/.deepquark/skills/bundled/knowledge-graph/workflows/StixImport.md +212 -0
  63. package/bin/deepquark +0 -0
  64. package/package.json +1 -1
  65. package/.deepquark/skills/bundled/ge-payroll/SKILL.md +0 -153
  66. package/.deepquark/skills/bundled/ge-payroll/evals/evals.json +0 -23
  67. package/.deepquark/skills/bundled/ge-payroll/references/pain-points-improvements.md +0 -106
  68. package/.deepquark/skills/bundled/ge-payroll/references/process-detail.md +0 -217
  69. package/.deepquark/skills/bundled/ge-payroll/references/raci-stakeholders.md +0 -85
  70. package/.deepquark/skills/bundled/ge-payroll/references/timeline-mandays.md +0 -64
@@ -0,0 +1,386 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Grok LLM Models MCP Test
4
+ * Tests all Grok model variants for real-life entity extraction via Graphiti
5
+ */
6
+
7
+ import { $ } from 'bun';
8
+
9
+ const MCP_URL = 'http://localhost:8000/mcp';
10
+ const ACCEPT = 'application/json, text/event-stream';
11
+ const ENV_FILE = '../../config/.env';
12
+
13
+ // All Grok models from benchmark (sorted by cost)
14
+ const GROK_MODELS = [
15
+ { name: 'Grok 4 Fast', model: 'x-ai/grok-4-fast', cost: 0.28 },
16
+ { name: 'Grok 4.1 Fast', model: 'x-ai/grok-4.1-fast', cost: Math.LOG10E },
17
+ { name: 'Grok 3 Mini', model: 'x-ai/grok-3-mini', cost: 0.56 },
18
+ { name: 'Grok 3', model: 'x-ai/grok-3', cost: 2.163 },
19
+ { name: 'Grok 4', model: 'x-ai/grok-4', cost: 11.842 },
20
+ ];
21
+
22
+ const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
23
+ if (!OPENROUTER_API_KEY) {
24
+ console.error('โŒ OPENROUTER_API_KEY required');
25
+ console.log('\nUsage: OPENROUTER_API_KEY=sk-or-v1-... bun test-grok-llms-mcp.ts');
26
+ process.exit(1);
27
+ }
28
+
29
+ // Test episode - complex enough to test extraction quality
30
+ const TEST_EPISODE = {
31
+ name: 'Grok Test Episode',
32
+ body: "During the Q4 planning meeting at TechCorp headquarters in Austin, CEO Sarah Martinez announced a strategic partnership with CloudBase Inc. The $50 million deal, facilitated by Morgan Stanley, will integrate CloudBase's AI platform into TechCorp's enterprise suite by March 2026.",
33
+ };
34
+
35
+ // Expected entities to find
36
+ const EXPECTED_ENTITIES = ['TechCorp', 'Sarah Martinez', 'CloudBase', 'Morgan Stanley', 'Austin'];
37
+
38
+ let sessionId: string | null = null;
39
+
40
+ async function mcpRequest(method: string, params: any = {}): Promise<any> {
41
+ const headers: Record<string, string> = {
42
+ 'Content-Type': 'application/json',
43
+ Accept: ACCEPT,
44
+ };
45
+ if (sessionId) headers['mcp-session-id'] = sessionId;
46
+
47
+ try {
48
+ const res = await fetch(MCP_URL, {
49
+ method: 'POST',
50
+ headers,
51
+ body: JSON.stringify({ jsonrpc: '2.0', id: Date.now(), method, params }),
52
+ });
53
+
54
+ const newSessionId = res.headers.get('mcp-session-id');
55
+ if (newSessionId) sessionId = newSessionId;
56
+
57
+ const text = await res.text();
58
+ let result: any = null;
59
+ let error: any = null;
60
+
61
+ for (const line of text.split('\n')) {
62
+ if (line.startsWith('data: ')) {
63
+ try {
64
+ const data = JSON.parse(line.slice(6));
65
+ if (data.result) result = data.result;
66
+ if (data.error) error = data.error;
67
+ } catch {}
68
+ }
69
+ }
70
+
71
+ if (!result && !error) {
72
+ try {
73
+ const json = JSON.parse(text);
74
+ result = json.result;
75
+ error = json.error;
76
+ } catch {}
77
+ }
78
+
79
+ return { result, error };
80
+ } catch (e: any) {
81
+ return { error: { message: e.message } };
82
+ }
83
+ }
84
+
85
+ async function callTool(
86
+ name: string,
87
+ args: any
88
+ ): Promise<{ success: boolean; data: any; duration: number }> {
89
+ const start = Date.now();
90
+ const { result, error } = await mcpRequest('tools/call', { name, arguments: args });
91
+ const duration = Date.now() - start;
92
+
93
+ if (error) return { success: false, data: error, duration };
94
+
95
+ let data: any = result;
96
+ try {
97
+ if (result?.content?.[0]?.text) {
98
+ data = JSON.parse(result.content[0].text);
99
+ }
100
+ } catch {}
101
+
102
+ return { success: true, data, duration };
103
+ }
104
+
105
+ async function waitForHealth(maxWait = 60000): Promise<boolean> {
106
+ const start = Date.now();
107
+ while (Date.now() - start < maxWait) {
108
+ try {
109
+ const res = await fetch('http://localhost:8000/health', {
110
+ signal: AbortSignal.timeout(2000),
111
+ });
112
+ if (res.ok) return true;
113
+ } catch {}
114
+ await Bun.sleep(2000);
115
+ }
116
+ return false;
117
+ }
118
+
119
+ async function updateEnvAndRestart(modelName: string): Promise<boolean> {
120
+ // Read current env
121
+ const envPath = Bun.file(ENV_FILE);
122
+ let envContent = await envPath.text();
123
+
124
+ // Update MODEL_NAME lines
125
+ envContent = envContent.replace(
126
+ /MADEINOZ_KNOWLEDGE_MODEL_NAME=.*/g,
127
+ `MADEINOZ_KNOWLEDGE_MODEL_NAME=${modelName}`
128
+ );
129
+ envContent = envContent.replace(/^MODEL_NAME=.*/gm, `MODEL_NAME=${modelName}`);
130
+
131
+ // Write updated env
132
+ await Bun.write(ENV_FILE, envContent);
133
+
134
+ // Restart container
135
+ try {
136
+ await $`docker-compose -f docker-compose-neo4j.yml up -d --force-recreate graphiti-mcp`.quiet();
137
+ await Bun.sleep(5000); // Initial wait
138
+ return await waitForHealth();
139
+ } catch (e: any) {
140
+ console.log(` โŒ Restart failed: ${e.message}`);
141
+ return false;
142
+ }
143
+ }
144
+
145
+ async function checkExtractedEntities(groupId: string): Promise<string[]> {
146
+ try {
147
+ const result =
148
+ await $`docker exec madeinoz-knowledge-neo4j cypher-shell -u neo4j -p madeinozknowledge "MATCH (n:Entity {group_id: '${groupId}'}) RETURN n.name as name"`.text();
149
+
150
+ // Parse cypher-shell output
151
+ const lines = result.split('\n').slice(1); // Skip header
152
+ const entities: string[] = [];
153
+ for (const line of lines) {
154
+ const name = line.replace(/"/g, '').trim();
155
+ if (name && name !== 'name') entities.push(name);
156
+ }
157
+ return entities;
158
+ } catch {
159
+ return [];
160
+ }
161
+ }
162
+
163
+ async function checkLogs(): Promise<{ success: boolean; error?: string }> {
164
+ try {
165
+ const logs =
166
+ await $`docker-compose -f docker-compose-neo4j.yml logs --tail=30 graphiti-mcp`.text();
167
+
168
+ if (logs.includes('Successfully processed episode')) {
169
+ return { success: true };
170
+ }
171
+ if (logs.includes('Failed to process episode')) {
172
+ const match = logs.match(/Failed to process episode.*?: (.+)/);
173
+ return { success: false, error: match?.[1] || 'Unknown error' };
174
+ }
175
+ return { success: false, error: 'Processing not completed' };
176
+ } catch {
177
+ return { success: false, error: 'Could not read logs' };
178
+ }
179
+ }
180
+
181
+ async function testModel(model: (typeof GROK_MODELS)[0], index: number): Promise<any> {
182
+ const groupId = `grok-test-${index}`;
183
+ sessionId = null;
184
+
185
+ const result = {
186
+ name: model.name,
187
+ model: model.model,
188
+ cost: model.cost,
189
+ works: false,
190
+ entitiesFound: 0,
191
+ expectedFound: 0,
192
+ error: null as string | null,
193
+ extractionTimeMs: 0,
194
+ };
195
+
196
+ // Initialize MCP session
197
+ const initRes = await mcpRequest('initialize', {
198
+ protocolVersion: '2024-11-05',
199
+ capabilities: {},
200
+ clientInfo: { name: 'grok-test', version: '1.0' },
201
+ });
202
+
203
+ if (initRes.error) {
204
+ result.error = `Init failed: ${initRes.error.message}`;
205
+ return result;
206
+ }
207
+
208
+ await mcpRequest('notifications/initialized', {});
209
+
210
+ // Add test episode
211
+ const addStart = Date.now();
212
+ const addRes = await callTool('add_memory', {
213
+ name: TEST_EPISODE.name,
214
+ episode_body: TEST_EPISODE.body,
215
+ source: 'text',
216
+ group_id: groupId,
217
+ });
218
+
219
+ if (!addRes.success) {
220
+ result.error = `add_memory failed: ${JSON.stringify(addRes.data).slice(0, 100)}`;
221
+ return result;
222
+ }
223
+
224
+ // Wait for async processing (up to 30 seconds)
225
+ console.log(' โณ Waiting for extraction...');
226
+ let processed = false;
227
+ for (let i = 0; i < 15; i++) {
228
+ await Bun.sleep(2000);
229
+ const logCheck = await checkLogs();
230
+ if (logCheck.success) {
231
+ processed = true;
232
+ break;
233
+ }
234
+ if (logCheck.error?.includes('validation error')) {
235
+ result.error = logCheck.error;
236
+ return result;
237
+ }
238
+ }
239
+
240
+ result.extractionTimeMs = Date.now() - addStart;
241
+
242
+ if (!processed) {
243
+ // Check logs for error
244
+ const logCheck = await checkLogs();
245
+ if (!logCheck.success) {
246
+ result.error = logCheck.error || 'Extraction timeout';
247
+ return result;
248
+ }
249
+ }
250
+
251
+ // Check extracted entities
252
+ const entities = await checkExtractedEntities(groupId);
253
+ result.entitiesFound = entities.length;
254
+
255
+ // Count how many expected entities were found
256
+ for (const expected of EXPECTED_ENTITIES) {
257
+ const found = entities.some(
258
+ (e) =>
259
+ e.toLowerCase().includes(expected.toLowerCase()) ||
260
+ expected.toLowerCase().includes(e.toLowerCase())
261
+ );
262
+ if (found) result.expectedFound++;
263
+ }
264
+
265
+ result.works = result.entitiesFound >= 3 && result.expectedFound >= 3;
266
+
267
+ return result;
268
+ }
269
+
270
+ async function main() {
271
+ console.log('โ•'.repeat(70));
272
+ console.log('๐Ÿค– Grok LLM Models MCP Test');
273
+ console.log('โ•'.repeat(70));
274
+ console.log(`\nTest Episode: "${TEST_EPISODE.body.slice(0, 80)}..."`);
275
+ console.log(`Expected Entities: ${EXPECTED_ENTITIES.join(', ')}`);
276
+ console.log(`\nGrok models to test: ${GROK_MODELS.length}`);
277
+
278
+ const results: any[] = [];
279
+
280
+ for (let i = 0; i < GROK_MODELS.length; i++) {
281
+ const model = GROK_MODELS[i];
282
+ console.log(`\n${'โ”€'.repeat(70)}`);
283
+ console.log(`[${i + 1}/${GROK_MODELS.length}] ${model.name} ($${model.cost}/1K)`);
284
+ console.log(`${'โ”€'.repeat(70)}`);
285
+
286
+ // Update config and restart
287
+ console.log(` ๐Ÿ”„ Configuring ${model.model}...`);
288
+ const started = await updateEnvAndRestart(model.model);
289
+
290
+ if (!started) {
291
+ console.log(' โŒ Container failed to start');
292
+ results.push({
293
+ name: model.name,
294
+ model: model.model,
295
+ cost: model.cost,
296
+ works: false,
297
+ error: 'Container failed',
298
+ });
299
+ continue;
300
+ }
301
+
302
+ // Verify model is configured
303
+ const logs =
304
+ await $`docker-compose -f docker-compose-neo4j.yml logs --tail=5 graphiti-mcp`.text();
305
+ if (!logs.includes(model.model)) {
306
+ console.log(' โš ๏ธ Model not reflected in logs, continuing anyway...');
307
+ }
308
+
309
+ console.log(' โœ… Container ready');
310
+ console.log(' ๐Ÿ“ฅ Testing entity extraction...');
311
+
312
+ const result = await testModel(model, i);
313
+ results.push(result);
314
+
315
+ if (result.works) {
316
+ console.log(
317
+ ` โœ… PASS - ${result.entitiesFound} entities, ${result.expectedFound}/${EXPECTED_ENTITIES.length} expected (${result.extractionTimeMs}ms)`
318
+ );
319
+ } else {
320
+ console.log(
321
+ ` โŒ FAIL - ${result.error || `Only ${result.expectedFound}/${EXPECTED_ENTITIES.length} expected entities`}`
322
+ );
323
+ }
324
+ }
325
+
326
+ // Summary
327
+ console.log(`\n${'โ•'.repeat(70)}`);
328
+ console.log('๐Ÿ“Š GROK MODEL RESULTS');
329
+ console.log(`${'โ•'.repeat(70)}`);
330
+
331
+ const working = results.filter((r) => r.works);
332
+ const failed = results.filter((r) => !r.works);
333
+
334
+ console.log(`\nโœ… WORKING MODELS (${working.length}/${results.length}):`);
335
+ if (working.length > 0) {
336
+ console.log('| Model | Cost/1K | Entities | Time |');
337
+ console.log('|-------|---------|----------|------|');
338
+ for (const r of working.sort((a, b) => a.cost - b.cost)) {
339
+ console.log(
340
+ `| ${r.name.padEnd(20)} | $${r.cost.toFixed(4).padStart(6)} | ${r.entitiesFound}/${r.expectedFound} | ${r.extractionTimeMs}ms |`
341
+ );
342
+ }
343
+ } else {
344
+ console.log(' (none)');
345
+ }
346
+
347
+ if (failed.length > 0) {
348
+ console.log(`\nโŒ FAILED MODELS (${failed.length}/${results.length}):`);
349
+ console.log('| Model | Cost/1K | Error |');
350
+ console.log('|-------|---------|-------|');
351
+ for (const r of failed) {
352
+ const error = (r.error || 'Unknown').slice(0, 50);
353
+ console.log(`| ${r.name.padEnd(20)} | $${r.cost.toFixed(4).padStart(6)} | ${error} |`);
354
+ }
355
+ }
356
+
357
+ // Save results
358
+ await Bun.write(
359
+ 'grok-mcp-results.json',
360
+ JSON.stringify(
361
+ {
362
+ results,
363
+ summary: {
364
+ total: results.length,
365
+ working: working.length,
366
+ failed: failed.length,
367
+ workingModels: working.map((r) => r.model),
368
+ cheapestWorking: working.sort((a, b) => a.cost - b.cost)[0]?.name || 'None',
369
+ },
370
+ testEpisode: TEST_EPISODE,
371
+ expectedEntities: EXPECTED_ENTITIES,
372
+ timestamp: new Date().toISOString(),
373
+ },
374
+ null,
375
+ 2
376
+ )
377
+ );
378
+
379
+ console.log('\n๐Ÿ“ Results saved to grok-mcp-results.json');
380
+
381
+ // Restore to GPT-4o-mini (known working)
382
+ console.log('\n๐Ÿ”„ Restoring GPT-4o-mini configuration...');
383
+ await updateEnvAndRestart('openai/gpt-4o-mini');
384
+ }
385
+
386
+ main().catch(console.error);
@@ -0,0 +1,173 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Test Grok models via OpenRouter
4
+ */
5
+
6
+ const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
7
+ const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1';
8
+
9
+ if (!OPENROUTER_API_KEY) {
10
+ console.error('โŒ OPENROUTER_API_KEY required');
11
+ process.exit(1);
12
+ }
13
+
14
+ // Grok models on OpenRouter
15
+ const GROK_MODELS = [
16
+ { id: 'x-ai/grok-2-1212', name: 'Grok 2', inputPrice: 2.0, outputPrice: 10.0 },
17
+ { id: 'x-ai/grok-2-vision-1212', name: 'Grok 2 Vision', inputPrice: 2.0, outputPrice: 10.0 },
18
+ { id: 'x-ai/grok-beta', name: 'Grok Beta', inputPrice: 5.0, outputPrice: 15.0 },
19
+ ];
20
+
21
+ const TEST_CASES = [
22
+ {
23
+ name: 'Basic Entity Extraction',
24
+ text: 'John Smith works at Acme Corp in New York. He met Sarah Jones yesterday to discuss the Q4 budget.',
25
+ },
26
+ {
27
+ name: 'Technical Content',
28
+ text: 'The PAI system uses Neo4j for graph storage and OpenAI for embeddings. It was created by Daniel Miessler.',
29
+ },
30
+ {
31
+ name: 'Complex Relationships',
32
+ text: `Alice, the CTO of TechStart, acquired DataFlow Inc last month. Bob, who was DataFlow's CEO, now reports to Alice.`,
33
+ },
34
+ ];
35
+
36
+ const PROMPT = (
37
+ text: string
38
+ ) => `Extract entities and relationships from this text. Return ONLY valid JSON.
39
+
40
+ Text: "${text}"
41
+
42
+ Format: {"entities": [{"name": "string", "type": "PERSON|ORGANIZATION|LOCATION"}], "relationships": [{"source": "string", "target": "string", "type": "string"}]}`;
43
+
44
+ function extractJSON(text: string): any | null {
45
+ const clean = text
46
+ .replace(/```json\n?/g, '')
47
+ .replace(/```\n?/g, '')
48
+ .trim();
49
+ try {
50
+ const obj = JSON.parse(clean);
51
+ if (obj.entities && obj.relationships) return obj;
52
+ } catch {}
53
+ const match = clean.match(/\{[\s\S]*"entities"[\s\S]*"relationships"[\s\S]*\}/);
54
+ if (match) {
55
+ try {
56
+ return JSON.parse(match[0]);
57
+ } catch {}
58
+ }
59
+ return null;
60
+ }
61
+
62
+ async function testModel(model: (typeof GROK_MODELS)[0]) {
63
+ console.log(`\n๐Ÿ”„ Testing: ${model.name} (${model.id})`);
64
+ console.log(` ${'โ”€'.repeat(50)}`);
65
+
66
+ let passed = 0;
67
+ let totalMs = 0;
68
+ let totalInput = 0;
69
+ let totalOutput = 0;
70
+
71
+ for (const test of TEST_CASES) {
72
+ const start = Date.now();
73
+ try {
74
+ const res = await fetch(`${OPENROUTER_BASE_URL}/chat/completions`, {
75
+ method: 'POST',
76
+ headers: {
77
+ Authorization: `Bearer ${OPENROUTER_API_KEY}`,
78
+ 'Content-Type': 'application/json',
79
+ },
80
+ body: JSON.stringify({
81
+ model: model.id,
82
+ messages: [{ role: 'user', content: PROMPT(test.text) }],
83
+ temperature: 0.1,
84
+ max_tokens: 1000,
85
+ }),
86
+ });
87
+
88
+ const ms = Date.now() - start;
89
+ totalMs += ms;
90
+
91
+ if (!res.ok) {
92
+ const err = await res.text();
93
+ console.log(` โŒ ${test.name}: HTTP ${res.status} - ${err.slice(0, 80)}`);
94
+ continue;
95
+ }
96
+
97
+ const data = (await res.json()) as any;
98
+ const content = data.choices?.[0]?.message?.content || '';
99
+ totalInput += data.usage?.prompt_tokens || 0;
100
+ totalOutput += data.usage?.completion_tokens || 0;
101
+
102
+ const json = extractJSON(content);
103
+ if (json) {
104
+ console.log(
105
+ ` โœ… ${test.name}: ${json.entities.length} entities, ${json.relationships.length} rels (${ms}ms)`
106
+ );
107
+ passed++;
108
+ } else {
109
+ console.log(` โŒ ${test.name}: Invalid JSON (${ms}ms)`);
110
+ }
111
+ } catch (err: any) {
112
+ console.log(` โŒ ${test.name}: ${err.message}`);
113
+ }
114
+ await new Promise((r) => setTimeout(r, 500));
115
+ }
116
+
117
+ const avgMs = Math.round(totalMs / TEST_CASES.length);
118
+ const avgInput = Math.round(totalInput / TEST_CASES.length);
119
+ const avgOutput = Math.round(totalOutput / TEST_CASES.length);
120
+ const cost = ((avgInput / 1e6) * model.inputPrice + (avgOutput / 1e6) * model.outputPrice) * 1000;
121
+
122
+ return {
123
+ model: model.name,
124
+ id: model.id,
125
+ passRate: (passed / TEST_CASES.length) * 100,
126
+ avgMs,
127
+ cost,
128
+ inputPrice: model.inputPrice,
129
+ outputPrice: model.outputPrice,
130
+ };
131
+ }
132
+
133
+ async function main() {
134
+ console.log('โ•'.repeat(60));
135
+ console.log('๐Ÿงช Grok Model Test (via OpenRouter)');
136
+ console.log('โ•'.repeat(60));
137
+
138
+ const results = [];
139
+ for (const model of GROK_MODELS) {
140
+ results.push(await testModel(model));
141
+ await new Promise((r) => setTimeout(r, 1000));
142
+ }
143
+
144
+ console.log(`\n${'โ•'.repeat(80)}`);
145
+ console.log('๐Ÿ“Š GROK MODELS COMPARISON');
146
+ console.log(`${'โ•'.repeat(80)}\n`);
147
+
148
+ console.log(
149
+ '| Model | Pass Rate | Avg Time | Input $/M | Output $/M | Cost/1K Calls |'
150
+ );
151
+ console.log(
152
+ '|-----------------|-----------|----------|-----------|------------|---------------|'
153
+ );
154
+ for (const r of results) {
155
+ if (r.passRate > 0) {
156
+ console.log(
157
+ `| ${r.model.padEnd(15)} | ${r.passRate.toFixed(0).padStart(7)}% | ${(`${r.avgMs}ms`).padStart(8)} | $${r.inputPrice.toFixed(2).padStart(7)} | $${r.outputPrice.toFixed(2).padStart(8)} | $${r.cost.toFixed(4).padStart(11)} |`
158
+ );
159
+ } else {
160
+ console.log(
161
+ `| ${r.model.padEnd(15)} | FAILED | - | - | - | - |`
162
+ );
163
+ }
164
+ }
165
+
166
+ await Bun.write(
167
+ 'grok-test-results.json',
168
+ JSON.stringify({ results, timestamp: new Date().toISOString() }, null, 2)
169
+ );
170
+ console.log('\n๐Ÿ“ Results saved to grok-test-results.json');
171
+ }
172
+
173
+ main();