npm - @duckmind/deepquark-darwin-arm64 - Versions diffs - 0.9.83 → 0.9.90 - Mend

@duckmind/deepquark-darwin-arm64 0.9.83 → 0.9.90

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/.deepquark/skills/bundled/knowledge-graph/server/test-grok-llms-mcp.ts ADDED Viewed

@@ -0,0 +1,386 @@
+#!/usr/bin/env bun
+/**
+ * Grok LLM Models MCP Test
+ * Tests all Grok model variants for real-life entity extraction via Graphiti
+ */
+import { $ } from 'bun';
+const MCP_URL = 'http://localhost:8000/mcp';
+const ACCEPT = 'application/json, text/event-stream';
+const ENV_FILE = '../../config/.env';
+// All Grok models from benchmark (sorted by cost)
+const GROK_MODELS = [
+  { name: 'Grok 4 Fast', model: 'x-ai/grok-4-fast', cost: 0.28 },
+  { name: 'Grok 4.1 Fast', model: 'x-ai/grok-4.1-fast', cost: Math.LOG10E },
+  { name: 'Grok 3 Mini', model: 'x-ai/grok-3-mini', cost: 0.56 },
+  { name: 'Grok 3', model: 'x-ai/grok-3', cost: 2.163 },
+  { name: 'Grok 4', model: 'x-ai/grok-4', cost: 11.842 },
+];
+const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
+if (!OPENROUTER_API_KEY) {
+  console.error('❌ OPENROUTER_API_KEY required');
+  console.log('\nUsage: OPENROUTER_API_KEY=sk-or-v1-... bun test-grok-llms-mcp.ts');
+  process.exit(1);
+}
+// Test episode - complex enough to test extraction quality
+const TEST_EPISODE = {
+  name: 'Grok Test Episode',
+  body: "During the Q4 planning meeting at TechCorp headquarters in Austin, CEO Sarah Martinez announced a strategic partnership with CloudBase Inc. The $50 million deal, facilitated by Morgan Stanley, will integrate CloudBase's AI platform into TechCorp's enterprise suite by March 2026.",
+};
+// Expected entities to find
+const EXPECTED_ENTITIES = ['TechCorp', 'Sarah Martinez', 'CloudBase', 'Morgan Stanley', 'Austin'];
+let sessionId: string | null = null;
+async function mcpRequest(method: string, params: any = {}): Promise<any> {
+  const headers: Record<string, string> = {
+    'Content-Type': 'application/json',
+    Accept: ACCEPT,
+  };
+  if (sessionId) headers['mcp-session-id'] = sessionId;
+  try {
+    const res = await fetch(MCP_URL, {
+      method: 'POST',
+      headers,
+      body: JSON.stringify({ jsonrpc: '2.0', id: Date.now(), method, params }),
+    });
+    const newSessionId = res.headers.get('mcp-session-id');
+    if (newSessionId) sessionId = newSessionId;
+    const text = await res.text();
+    let result: any = null;
+    let error: any = null;
+    for (const line of text.split('\n')) {
+      if (line.startsWith('data: ')) {
+        try {
+          const data = JSON.parse(line.slice(6));
+          if (data.result) result = data.result;
+          if (data.error) error = data.error;
+        } catch {}
+      }
+    }
+    if (!result && !error) {
+      try {
+        const json = JSON.parse(text);
+        result = json.result;
+        error = json.error;
+      } catch {}
+    }
+    return { result, error };
+  } catch (e: any) {
+    return { error: { message: e.message } };
+  }
+}
+async function callTool(
+  name: string,
+  args: any
+): Promise<{ success: boolean; data: any; duration: number }> {
+  const start = Date.now();
+  const { result, error } = await mcpRequest('tools/call', { name, arguments: args });
+  const duration = Date.now() - start;
+  if (error) return { success: false, data: error, duration };
+  let data: any = result;
+  try {
+    if (result?.content?.[0]?.text) {
+      data = JSON.parse(result.content[0].text);
+    }
+  } catch {}
+  return { success: true, data, duration };
+}
+async function waitForHealth(maxWait = 60000): Promise<boolean> {
+  const start = Date.now();
+  while (Date.now() - start < maxWait) {
+    try {
+      const res = await fetch('http://localhost:8000/health', {
+        signal: AbortSignal.timeout(2000),
+      });
+      if (res.ok) return true;
+    } catch {}
+    await Bun.sleep(2000);
+  }
+  return false;
+}
+async function updateEnvAndRestart(modelName: string): Promise<boolean> {
+  // Read current env
+  const envPath = Bun.file(ENV_FILE);
+  let envContent = await envPath.text();
+  // Update MODEL_NAME lines
+  envContent = envContent.replace(
+    /MADEINOZ_KNOWLEDGE_MODEL_NAME=.*/g,
+    `MADEINOZ_KNOWLEDGE_MODEL_NAME=${modelName}`
+  );
+  envContent = envContent.replace(/^MODEL_NAME=.*/gm, `MODEL_NAME=${modelName}`);
+  // Write updated env
+  await Bun.write(ENV_FILE, envContent);
+  // Restart container
+  try {
+    await $`docker-compose -f docker-compose-neo4j.yml up -d --force-recreate graphiti-mcp`.quiet();
+    await Bun.sleep(5000); // Initial wait
+    return await waitForHealth();
+  } catch (e: any) {
+    console.log(`   ❌ Restart failed: ${e.message}`);
+    return false;
+  }
+}
+async function checkExtractedEntities(groupId: string): Promise<string[]> {
+  try {
+    const result =
+      await $`docker exec madeinoz-knowledge-neo4j cypher-shell -u neo4j -p madeinozknowledge "MATCH (n:Entity {group_id: '${groupId}'}) RETURN n.name as name"`.text();
+    // Parse cypher-shell output
+    const lines = result.split('\n').slice(1); // Skip header
+    const entities: string[] = [];
+    for (const line of lines) {
+      const name = line.replace(/"/g, '').trim();
+      if (name && name !== 'name') entities.push(name);
+    }
+    return entities;
+  } catch {
+    return [];
+  }
+}
+async function checkLogs(): Promise<{ success: boolean; error?: string }> {
+  try {
+    const logs =
+      await $`docker-compose -f docker-compose-neo4j.yml logs --tail=30 graphiti-mcp`.text();
+    if (logs.includes('Successfully processed episode')) {
+      return { success: true };
+    }
+    if (logs.includes('Failed to process episode')) {
+      const match = logs.match(/Failed to process episode.*?: (.+)/);
+      return { success: false, error: match?.[1] || 'Unknown error' };
+    }
+    return { success: false, error: 'Processing not completed' };
+  } catch {
+    return { success: false, error: 'Could not read logs' };
+  }
+}
+async function testModel(model: (typeof GROK_MODELS)[0], index: number): Promise<any> {
+  const groupId = `grok-test-${index}`;
+  sessionId = null;
+  const result = {
+    name: model.name,
+    model: model.model,
+    cost: model.cost,
+    works: false,
+    entitiesFound: 0,
+    expectedFound: 0,
+    error: null as string | null,
+    extractionTimeMs: 0,
+  };
+  // Initialize MCP session
+  const initRes = await mcpRequest('initialize', {
+    protocolVersion: '2024-11-05',
+    capabilities: {},
+    clientInfo: { name: 'grok-test', version: '1.0' },
+  });
+  if (initRes.error) {
+    result.error = `Init failed: ${initRes.error.message}`;
+    return result;
+  }
+  await mcpRequest('notifications/initialized', {});
+  // Add test episode
+  const addStart = Date.now();
+  const addRes = await callTool('add_memory', {
+    name: TEST_EPISODE.name,
+    episode_body: TEST_EPISODE.body,
+    source: 'text',
+    group_id: groupId,
+  });
+  if (!addRes.success) {
+    result.error = `add_memory failed: ${JSON.stringify(addRes.data).slice(0, 100)}`;
+    return result;
+  }
+  // Wait for async processing (up to 30 seconds)
+  console.log('      ⏳ Waiting for extraction...');
+  let processed = false;
+  for (let i = 0; i < 15; i++) {
+    await Bun.sleep(2000);
+    const logCheck = await checkLogs();
+    if (logCheck.success) {
+      processed = true;
+      break;
+    }
+    if (logCheck.error?.includes('validation error')) {
+      result.error = logCheck.error;
+      return result;
+    }
+  }
+  result.extractionTimeMs = Date.now() - addStart;
+  if (!processed) {
+    // Check logs for error
+    const logCheck = await checkLogs();
+    if (!logCheck.success) {
+      result.error = logCheck.error || 'Extraction timeout';
+      return result;
+    }
+  }
+  // Check extracted entities
+  const entities = await checkExtractedEntities(groupId);
+  result.entitiesFound = entities.length;
+  // Count how many expected entities were found
+  for (const expected of EXPECTED_ENTITIES) {
+    const found = entities.some(
+      (e) =>
+        e.toLowerCase().includes(expected.toLowerCase()) ||
+        expected.toLowerCase().includes(e.toLowerCase())
+    );
+    if (found) result.expectedFound++;
+  }
+  result.works = result.entitiesFound >= 3 && result.expectedFound >= 3;
+  return result;
+}
+async function main() {
+  console.log('═'.repeat(70));
+  console.log('🤖 Grok LLM Models MCP Test');
+  console.log('═'.repeat(70));
+  console.log(`\nTest Episode: "${TEST_EPISODE.body.slice(0, 80)}..."`);
+  console.log(`Expected Entities: ${EXPECTED_ENTITIES.join(', ')}`);
+  console.log(`\nGrok models to test: ${GROK_MODELS.length}`);
+  const results: any[] = [];
+  for (let i = 0; i < GROK_MODELS.length; i++) {
+    const model = GROK_MODELS[i];
+    console.log(`\n${'─'.repeat(70)}`);
+    console.log(`[${i + 1}/${GROK_MODELS.length}] ${model.name} ($${model.cost}/1K)`);
+    console.log(`${'─'.repeat(70)}`);
+    // Update config and restart
+    console.log(`   🔄 Configuring ${model.model}...`);
+    const started = await updateEnvAndRestart(model.model);
+    if (!started) {
+      console.log('   ❌ Container failed to start');
+      results.push({
+        name: model.name,
+        model: model.model,
+        cost: model.cost,
+        works: false,
+        error: 'Container failed',
+      });
+      continue;
+    }
+    // Verify model is configured
+    const logs =
+      await $`docker-compose -f docker-compose-neo4j.yml logs --tail=5 graphiti-mcp`.text();
+    if (!logs.includes(model.model)) {
+      console.log('   ⚠️  Model not reflected in logs, continuing anyway...');
+    }
+    console.log('   ✅ Container ready');
+    console.log('   📥 Testing entity extraction...');
+    const result = await testModel(model, i);
+    results.push(result);
+    if (result.works) {
+      console.log(
+        `   ✅ PASS - ${result.entitiesFound} entities, ${result.expectedFound}/${EXPECTED_ENTITIES.length} expected (${result.extractionTimeMs}ms)`
+      );
+    } else {
+      console.log(
+        `   ❌ FAIL - ${result.error || `Only ${result.expectedFound}/${EXPECTED_ENTITIES.length} expected entities`}`
+      );
+    }
+  }
+  // Summary
+  console.log(`\n${'═'.repeat(70)}`);
+  console.log('📊 GROK MODEL RESULTS');
+  console.log(`${'═'.repeat(70)}`);
+  const working = results.filter((r) => r.works);
+  const failed = results.filter((r) => !r.works);
+  console.log(`\n✅ WORKING MODELS (${working.length}/${results.length}):`);
+  if (working.length > 0) {
+    console.log('| Model | Cost/1K | Entities | Time |');
+    console.log('|-------|---------|----------|------|');
+    for (const r of working.sort((a, b) => a.cost - b.cost)) {
+      console.log(
+        `| ${r.name.padEnd(20)} | $${r.cost.toFixed(4).padStart(6)} | ${r.entitiesFound}/${r.expectedFound} | ${r.extractionTimeMs}ms |`
+      );
+    }
+  } else {
+    console.log('   (none)');
+  }
+  if (failed.length > 0) {
+    console.log(`\n❌ FAILED MODELS (${failed.length}/${results.length}):`);
+    console.log('| Model | Cost/1K | Error |');
+    console.log('|-------|---------|-------|');
+    for (const r of failed) {
+      const error = (r.error || 'Unknown').slice(0, 50);
+      console.log(`| ${r.name.padEnd(20)} | $${r.cost.toFixed(4).padStart(6)} | ${error} |`);
+    }
+  }
+  // Save results
+  await Bun.write(
+    'grok-mcp-results.json',
+    JSON.stringify(
+      {
+        results,
+        summary: {
+          total: results.length,
+          working: working.length,
+          failed: failed.length,
+          workingModels: working.map((r) => r.model),
+          cheapestWorking: working.sort((a, b) => a.cost - b.cost)[0]?.name || 'None',
+        },
+        testEpisode: TEST_EPISODE,
+        expectedEntities: EXPECTED_ENTITIES,
+        timestamp: new Date().toISOString(),
+      },
+      null,
+      2
+    )
+  );
+  console.log('\n📁 Results saved to grok-mcp-results.json');
+  // Restore to GPT-4o-mini (known working)
+  console.log('\n🔄 Restoring GPT-4o-mini configuration...');
+  await updateEnvAndRestart('openai/gpt-4o-mini');
+}
+main().catch(console.error);

package/.deepquark/skills/bundled/knowledge-graph/server/test-grok-models.ts ADDED Viewed

@@ -0,0 +1,173 @@
+#!/usr/bin/env bun
+/**
+ * Test Grok models via OpenRouter
+ */
+const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
+const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1';
+if (!OPENROUTER_API_KEY) {
+  console.error('❌ OPENROUTER_API_KEY required');
+  process.exit(1);
+}
+// Grok models on OpenRouter
+const GROK_MODELS = [
+  { id: 'x-ai/grok-2-1212', name: 'Grok 2', inputPrice: 2.0, outputPrice: 10.0 },
+  { id: 'x-ai/grok-2-vision-1212', name: 'Grok 2 Vision', inputPrice: 2.0, outputPrice: 10.0 },
+  { id: 'x-ai/grok-beta', name: 'Grok Beta', inputPrice: 5.0, outputPrice: 15.0 },
+];
+const TEST_CASES = [
+  {
+    name: 'Basic Entity Extraction',
+    text: 'John Smith works at Acme Corp in New York. He met Sarah Jones yesterday to discuss the Q4 budget.',
+  },
+  {
+    name: 'Technical Content',
+    text: 'The PAI system uses Neo4j for graph storage and OpenAI for embeddings. It was created by Daniel Miessler.',
+  },
+  {
+    name: 'Complex Relationships',
+    text: `Alice, the CTO of TechStart, acquired DataFlow Inc last month. Bob, who was DataFlow's CEO, now reports to Alice.`,
+  },
+];
+const PROMPT = (
+  text: string
+) => `Extract entities and relationships from this text. Return ONLY valid JSON.
+Text: "${text}"
+Format: {"entities": [{"name": "string", "type": "PERSON|ORGANIZATION|LOCATION"}], "relationships": [{"source": "string", "target": "string", "type": "string"}]}`;
+function extractJSON(text: string): any | null {
+  const clean = text
+    .replace(/```json\n?/g, '')
+    .replace(/```\n?/g, '')
+    .trim();
+  try {
+    const obj = JSON.parse(clean);
+    if (obj.entities && obj.relationships) return obj;
+  } catch {}
+  const match = clean.match(/\{[\s\S]*"entities"[\s\S]*"relationships"[\s\S]*\}/);
+  if (match) {
+    try {
+      return JSON.parse(match[0]);
+    } catch {}
+  }
+  return null;
+}
+async function testModel(model: (typeof GROK_MODELS)[0]) {
+  console.log(`\n🔄 Testing: ${model.name} (${model.id})`);
+  console.log(`   ${'─'.repeat(50)}`);
+  let passed = 0;
+  let totalMs = 0;
+  let totalInput = 0;
+  let totalOutput = 0;
+  for (const test of TEST_CASES) {
+    const start = Date.now();
+    try {
+      const res = await fetch(`${OPENROUTER_BASE_URL}/chat/completions`, {
+        method: 'POST',
+        headers: {
+          Authorization: `Bearer ${OPENROUTER_API_KEY}`,
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify({
+          model: model.id,
+          messages: [{ role: 'user', content: PROMPT(test.text) }],
+          temperature: 0.1,
+          max_tokens: 1000,
+        }),
+      });
+      const ms = Date.now() - start;
+      totalMs += ms;
+      if (!res.ok) {
+        const err = await res.text();
+        console.log(`   ❌ ${test.name}: HTTP ${res.status} - ${err.slice(0, 80)}`);
+        continue;
+      }
+      const data = (await res.json()) as any;
+      const content = data.choices?.[0]?.message?.content || '';
+      totalInput += data.usage?.prompt_tokens || 0;
+      totalOutput += data.usage?.completion_tokens || 0;
+      const json = extractJSON(content);
+      if (json) {
+        console.log(
+          `   ✅ ${test.name}: ${json.entities.length} entities, ${json.relationships.length} rels (${ms}ms)`
+        );
+        passed++;
+      } else {
+        console.log(`   ❌ ${test.name}: Invalid JSON (${ms}ms)`);
+      }
+    } catch (err: any) {
+      console.log(`   ❌ ${test.name}: ${err.message}`);
+    }
+    await new Promise((r) => setTimeout(r, 500));
+  }
+  const avgMs = Math.round(totalMs / TEST_CASES.length);
+  const avgInput = Math.round(totalInput / TEST_CASES.length);
+  const avgOutput = Math.round(totalOutput / TEST_CASES.length);
+  const cost = ((avgInput / 1e6) * model.inputPrice + (avgOutput / 1e6) * model.outputPrice) * 1000;
+  return {
+    model: model.name,
+    id: model.id,
+    passRate: (passed / TEST_CASES.length) * 100,
+    avgMs,
+    cost,
+    inputPrice: model.inputPrice,
+    outputPrice: model.outputPrice,
+  };
+}
+async function main() {
+  console.log('═'.repeat(60));
+  console.log('🧪 Grok Model Test (via OpenRouter)');
+  console.log('═'.repeat(60));
+  const results = [];
+  for (const model of GROK_MODELS) {
+    results.push(await testModel(model));
+    await new Promise((r) => setTimeout(r, 1000));
+  }
+  console.log(`\n${'═'.repeat(80)}`);
+  console.log('📊 GROK MODELS COMPARISON');
+  console.log(`${'═'.repeat(80)}\n`);
+  console.log(
+    '| Model           | Pass Rate | Avg Time | Input $/M | Output $/M | Cost/1K Calls |'
+  );
+  console.log(
+    '|-----------------|-----------|----------|-----------|------------|---------------|'
+  );
+  for (const r of results) {
+    if (r.passRate > 0) {
+      console.log(
+        `| ${r.model.padEnd(15)} | ${r.passRate.toFixed(0).padStart(7)}% | ${(`${r.avgMs}ms`).padStart(8)} | $${r.inputPrice.toFixed(2).padStart(7)} | $${r.outputPrice.toFixed(2).padStart(8)} | $${r.cost.toFixed(4).padStart(11)} |`
+      );
+    } else {
+      console.log(
+        `| ${r.model.padEnd(15)} | FAILED    | -        | -         | -          | -             |`
+      );
+    }
+  }
+  await Bun.write(
+    'grok-test-results.json',
+    JSON.stringify({ results, timestamp: new Date().toISOString() }, null, 2)
+  );
+  console.log('\n📁 Results saved to grok-test-results.json');
+}
+main();