npm - @veewo/gitnexus - Versions diffs - 1.3.4 - Mend

@veewo/gitnexus 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (231) hide show

package/README.md +234 -0
package/dist/benchmark/agent-context/evaluators.d.ts +9 -0
package/dist/benchmark/agent-context/evaluators.js +196 -0
package/dist/benchmark/agent-context/evaluators.test.d.ts +1 -0
package/dist/benchmark/agent-context/evaluators.test.js +39 -0
package/dist/benchmark/agent-context/io.d.ts +2 -0
package/dist/benchmark/agent-context/io.js +23 -0
package/dist/benchmark/agent-context/io.test.d.ts +1 -0
package/dist/benchmark/agent-context/io.test.js +19 -0
package/dist/benchmark/agent-context/report.d.ts +2 -0
package/dist/benchmark/agent-context/report.js +59 -0
package/dist/benchmark/agent-context/report.test.d.ts +1 -0
package/dist/benchmark/agent-context/report.test.js +85 -0
package/dist/benchmark/agent-context/runner.d.ts +46 -0
package/dist/benchmark/agent-context/runner.js +111 -0
package/dist/benchmark/agent-context/runner.test.d.ts +1 -0
package/dist/benchmark/agent-context/runner.test.js +79 -0
package/dist/benchmark/agent-context/tool-runner.d.ts +7 -0
package/dist/benchmark/agent-context/tool-runner.js +18 -0
package/dist/benchmark/agent-context/tool-runner.test.d.ts +1 -0
package/dist/benchmark/agent-context/tool-runner.test.js +11 -0
package/dist/benchmark/agent-context/types.d.ts +40 -0
package/dist/benchmark/agent-context/types.js +1 -0
package/dist/benchmark/analyze-runner.d.ts +16 -0
package/dist/benchmark/analyze-runner.js +51 -0
package/dist/benchmark/analyze-runner.test.d.ts +1 -0
package/dist/benchmark/analyze-runner.test.js +37 -0
package/dist/benchmark/evaluators.d.ts +6 -0
package/dist/benchmark/evaluators.js +10 -0
package/dist/benchmark/evaluators.test.d.ts +1 -0
package/dist/benchmark/evaluators.test.js +12 -0
package/dist/benchmark/io.d.ts +7 -0
package/dist/benchmark/io.js +25 -0
package/dist/benchmark/io.test.d.ts +1 -0
package/dist/benchmark/io.test.js +35 -0
package/dist/benchmark/neonspark-candidates.d.ts +19 -0
package/dist/benchmark/neonspark-candidates.js +94 -0
package/dist/benchmark/neonspark-candidates.test.d.ts +1 -0
package/dist/benchmark/neonspark-candidates.test.js +43 -0
package/dist/benchmark/neonspark-materialize.d.ts +19 -0
package/dist/benchmark/neonspark-materialize.js +111 -0
package/dist/benchmark/neonspark-materialize.test.d.ts +1 -0
package/dist/benchmark/neonspark-materialize.test.js +124 -0
package/dist/benchmark/neonspark-sync.d.ts +3 -0
package/dist/benchmark/neonspark-sync.js +53 -0
package/dist/benchmark/neonspark-sync.test.d.ts +1 -0
package/dist/benchmark/neonspark-sync.test.js +20 -0
package/dist/benchmark/report.d.ts +1 -0
package/dist/benchmark/report.js +7 -0
package/dist/benchmark/runner.d.ts +48 -0
package/dist/benchmark/runner.js +302 -0
package/dist/benchmark/runner.test.d.ts +1 -0
package/dist/benchmark/runner.test.js +50 -0
package/dist/benchmark/scoring.d.ts +16 -0
package/dist/benchmark/scoring.js +27 -0
package/dist/benchmark/scoring.test.d.ts +1 -0
package/dist/benchmark/scoring.test.js +24 -0
package/dist/benchmark/tool-runner.d.ts +6 -0
package/dist/benchmark/tool-runner.js +17 -0
package/dist/benchmark/types.d.ts +36 -0
package/dist/benchmark/types.js +1 -0
package/dist/cli/ai-context.d.ts +22 -0
package/dist/cli/ai-context.js +184 -0
package/dist/cli/ai-context.test.d.ts +1 -0
package/dist/cli/ai-context.test.js +30 -0
package/dist/cli/analyze-multi-scope-regression.test.d.ts +1 -0
package/dist/cli/analyze-multi-scope-regression.test.js +22 -0
package/dist/cli/analyze-options.d.ts +7 -0
package/dist/cli/analyze-options.js +56 -0
package/dist/cli/analyze-options.test.d.ts +1 -0
package/dist/cli/analyze-options.test.js +36 -0
package/dist/cli/analyze.d.ts +14 -0
package/dist/cli/analyze.js +384 -0
package/dist/cli/augment.d.ts +13 -0
package/dist/cli/augment.js +33 -0
package/dist/cli/benchmark-agent-context.d.ts +29 -0
package/dist/cli/benchmark-agent-context.js +61 -0
package/dist/cli/benchmark-agent-context.test.d.ts +1 -0
package/dist/cli/benchmark-agent-context.test.js +80 -0
package/dist/cli/benchmark-unity.d.ts +15 -0
package/dist/cli/benchmark-unity.js +31 -0
package/dist/cli/benchmark-unity.test.d.ts +1 -0
package/dist/cli/benchmark-unity.test.js +18 -0
package/dist/cli/claude-hooks.d.ts +22 -0
package/dist/cli/claude-hooks.js +97 -0
package/dist/cli/clean.d.ts +10 -0
package/dist/cli/clean.js +60 -0
package/dist/cli/eval-server.d.ts +30 -0
package/dist/cli/eval-server.js +372 -0
package/dist/cli/index.d.ts +2 -0
package/dist/cli/index.js +182 -0
package/dist/cli/list.d.ts +6 -0
package/dist/cli/list.js +33 -0
package/dist/cli/mcp.d.ts +8 -0
package/dist/cli/mcp.js +34 -0
package/dist/cli/repo-manager-alias.test.d.ts +1 -0
package/dist/cli/repo-manager-alias.test.js +40 -0
package/dist/cli/scope-filter.test.d.ts +1 -0
package/dist/cli/scope-filter.test.js +49 -0
package/dist/cli/serve.d.ts +4 -0
package/dist/cli/serve.js +6 -0
package/dist/cli/setup.d.ts +8 -0
package/dist/cli/setup.js +311 -0
package/dist/cli/setup.test.d.ts +1 -0
package/dist/cli/setup.test.js +31 -0
package/dist/cli/status.d.ts +6 -0
package/dist/cli/status.js +27 -0
package/dist/cli/tool.d.ts +40 -0
package/dist/cli/tool.js +94 -0
package/dist/cli/version.test.d.ts +1 -0
package/dist/cli/version.test.js +19 -0
package/dist/cli/wiki.d.ts +15 -0
package/dist/cli/wiki.js +361 -0
package/dist/config/ignore-service.d.ts +1 -0
package/dist/config/ignore-service.js +210 -0
package/dist/config/supported-languages.d.ts +12 -0
package/dist/config/supported-languages.js +15 -0
package/dist/core/augmentation/engine.d.ts +26 -0
package/dist/core/augmentation/engine.js +213 -0
package/dist/core/embeddings/embedder.d.ts +60 -0
package/dist/core/embeddings/embedder.js +251 -0
package/dist/core/embeddings/embedding-pipeline.d.ts +51 -0
package/dist/core/embeddings/embedding-pipeline.js +329 -0
package/dist/core/embeddings/index.d.ts +9 -0
package/dist/core/embeddings/index.js +9 -0
package/dist/core/embeddings/text-generator.d.ts +24 -0
package/dist/core/embeddings/text-generator.js +182 -0
package/dist/core/embeddings/types.d.ts +87 -0
package/dist/core/embeddings/types.js +32 -0
package/dist/core/graph/graph.d.ts +2 -0
package/dist/core/graph/graph.js +66 -0
package/dist/core/graph/types.d.ts +61 -0
package/dist/core/graph/types.js +1 -0
package/dist/core/ingestion/ast-cache.d.ts +11 -0
package/dist/core/ingestion/ast-cache.js +34 -0
package/dist/core/ingestion/call-processor.d.ts +15 -0
package/dist/core/ingestion/call-processor.js +327 -0
package/dist/core/ingestion/cluster-enricher.d.ts +38 -0
package/dist/core/ingestion/cluster-enricher.js +170 -0
package/dist/core/ingestion/community-processor.d.ts +39 -0
package/dist/core/ingestion/community-processor.js +312 -0
package/dist/core/ingestion/entry-point-scoring.d.ts +39 -0
package/dist/core/ingestion/entry-point-scoring.js +260 -0
package/dist/core/ingestion/filesystem-walker.d.ts +28 -0
package/dist/core/ingestion/filesystem-walker.js +80 -0
package/dist/core/ingestion/framework-detection.d.ts +39 -0
package/dist/core/ingestion/framework-detection.js +235 -0
package/dist/core/ingestion/heritage-processor.d.ts +20 -0
package/dist/core/ingestion/heritage-processor.js +197 -0
package/dist/core/ingestion/import-processor.d.ts +38 -0
package/dist/core/ingestion/import-processor.js +778 -0
package/dist/core/ingestion/parsing-processor.d.ts +15 -0
package/dist/core/ingestion/parsing-processor.js +291 -0
package/dist/core/ingestion/pipeline.d.ts +5 -0
package/dist/core/ingestion/pipeline.js +323 -0
package/dist/core/ingestion/process-processor.d.ts +51 -0
package/dist/core/ingestion/process-processor.js +309 -0
package/dist/core/ingestion/scope-filter.d.ts +25 -0
package/dist/core/ingestion/scope-filter.js +100 -0
package/dist/core/ingestion/structure-processor.d.ts +2 -0
package/dist/core/ingestion/structure-processor.js +36 -0
package/dist/core/ingestion/symbol-table.d.ts +33 -0
package/dist/core/ingestion/symbol-table.js +38 -0
package/dist/core/ingestion/tree-sitter-queries.d.ts +12 -0
package/dist/core/ingestion/tree-sitter-queries.js +398 -0
package/dist/core/ingestion/utils.d.ts +10 -0
package/dist/core/ingestion/utils.js +50 -0
package/dist/core/ingestion/workers/parse-worker.d.ts +59 -0
package/dist/core/ingestion/workers/parse-worker.js +672 -0
package/dist/core/ingestion/workers/worker-pool.d.ts +16 -0
package/dist/core/ingestion/workers/worker-pool.js +120 -0
package/dist/core/kuzu/csv-generator.d.ts +29 -0
package/dist/core/kuzu/csv-generator.js +336 -0
package/dist/core/kuzu/kuzu-adapter.d.ts +101 -0
package/dist/core/kuzu/kuzu-adapter.js +753 -0
package/dist/core/kuzu/schema.d.ts +53 -0
package/dist/core/kuzu/schema.js +407 -0
package/dist/core/search/bm25-index.d.ts +23 -0
package/dist/core/search/bm25-index.js +95 -0
package/dist/core/search/hybrid-search.d.ts +49 -0
package/dist/core/search/hybrid-search.js +118 -0
package/dist/core/tree-sitter/parser-loader.d.ts +4 -0
package/dist/core/tree-sitter/parser-loader.js +44 -0
package/dist/core/wiki/generator.d.ts +110 -0
package/dist/core/wiki/generator.js +786 -0
package/dist/core/wiki/graph-queries.d.ts +80 -0
package/dist/core/wiki/graph-queries.js +238 -0
package/dist/core/wiki/html-viewer.d.ts +10 -0
package/dist/core/wiki/html-viewer.js +297 -0
package/dist/core/wiki/llm-client.d.ts +40 -0
package/dist/core/wiki/llm-client.js +162 -0
package/dist/core/wiki/prompts.d.ts +53 -0
package/dist/core/wiki/prompts.js +174 -0
package/dist/lib/utils.d.ts +1 -0
package/dist/lib/utils.js +3 -0
package/dist/mcp/core/embedder.d.ts +27 -0
package/dist/mcp/core/embedder.js +108 -0
package/dist/mcp/core/kuzu-adapter.d.ts +34 -0
package/dist/mcp/core/kuzu-adapter.js +231 -0
package/dist/mcp/local/local-backend.d.ts +160 -0
package/dist/mcp/local/local-backend.js +1646 -0
package/dist/mcp/resources.d.ts +31 -0
package/dist/mcp/resources.js +407 -0
package/dist/mcp/server.d.ts +23 -0
package/dist/mcp/server.js +251 -0
package/dist/mcp/staleness.d.ts +15 -0
package/dist/mcp/staleness.js +29 -0
package/dist/mcp/tools.d.ts +24 -0
package/dist/mcp/tools.js +195 -0
package/dist/server/api.d.ts +10 -0
package/dist/server/api.js +344 -0
package/dist/server/mcp-http.d.ts +13 -0
package/dist/server/mcp-http.js +100 -0
package/dist/storage/git.d.ts +6 -0
package/dist/storage/git.js +32 -0
package/dist/storage/repo-manager.d.ts +125 -0
package/dist/storage/repo-manager.js +257 -0
package/dist/types/pipeline.d.ts +34 -0
package/dist/types/pipeline.js +18 -0
package/hooks/claude/gitnexus-hook.cjs +135 -0
package/hooks/claude/pre-tool-use.sh +78 -0
package/hooks/claude/session-start.sh +42 -0
package/package.json +92 -0
package/skills/gitnexus-cli.md +82 -0
package/skills/gitnexus-debugging.md +89 -0
package/skills/gitnexus-exploring.md +78 -0
package/skills/gitnexus-guide.md +64 -0
package/skills/gitnexus-impact-analysis.md +97 -0
package/skills/gitnexus-refactoring.md +121 -0
package/vendor/leiden/index.cjs +355 -0
package/vendor/leiden/utils.cjs +392 -0

package/dist/core/augmentation/engine.js ADDED Viewed

@@ -0,0 +1,213 @@
+/**
+ * Augmentation Engine
+ *
+ * Lightweight, fast-path enrichment of search patterns with knowledge graph context.
+ * Designed to be called from platform hooks (Claude Code PreToolUse, Cursor beforeShellExecution)
+ * when an agent runs grep/glob/search.
+ *
+ * Performance target: <500ms cold start, <200ms warm.
+ *
+ * Design decisions:
+ * - Uses only BM25 search (no semantic/embedding) for speed
+ * - Clusters used internally for ranking, NEVER in output
+ * - Output is pure relationships: callers, callees, process participation
+ * - Graceful failure: any error → return empty string
+ */
+import path from 'path';
+import { listRegisteredRepos } from '../../storage/repo-manager.js';
+/**
+ * Find the best matching repo for a given working directory.
+ * Matches by checking if cwd is within the repo's path.
+ */
+async function findRepoForCwd(cwd) {
+    try {
+        const entries = await listRegisteredRepos({ validate: true });
+        const resolved = path.resolve(cwd);
+        // Normalize to lowercase on Windows (drive letters can differ: D: vs d:)
+        const isWindows = process.platform === 'win32';
+        const normalizedCwd = isWindows ? resolved.toLowerCase() : resolved;
+        const sep = path.sep;
+        // Find the LONGEST matching repo path (most specific match wins)
+        let bestMatch = null;
+        let bestLen = 0;
+        for (const entry of entries) {
+            const repoResolved = path.resolve(entry.path);
+            const normalizedRepo = isWindows ? repoResolved.toLowerCase() : repoResolved;
+            // Check if cwd is inside repo OR repo is inside cwd
+            // Must match at a path separator boundary to avoid false positives
+            // (e.g. /projects/gitnexusv2 should NOT match /projects/gitnexus)
+            let matched = false;
+            if (normalizedCwd === normalizedRepo) {
+                matched = true;
+            }
+            else if (normalizedCwd.startsWith(normalizedRepo + sep)) {
+                matched = true;
+            }
+            else if (normalizedRepo.startsWith(normalizedCwd + sep)) {
+                matched = true;
+            }
+            if (matched && normalizedRepo.length > bestLen) {
+                bestMatch = entry;
+                bestLen = normalizedRepo.length;
+            }
+        }
+        if (!bestMatch)
+            return null;
+        return {
+            name: bestMatch.name,
+            storagePath: bestMatch.storagePath,
+            kuzuPath: path.join(bestMatch.storagePath, 'kuzu'),
+        };
+    }
+    catch {
+        return null;
+    }
+}
+/**
+ * Augment a search pattern with knowledge graph context.
+ *
+ * 1. BM25 search for the pattern
+ * 2. For top matches, fetch callers/callees/processes
+ * 3. Rank by internal cluster cohesion (not exposed)
+ * 4. Format as structured text block
+ *
+ * Returns empty string on any error (graceful failure).
+ */
+export async function augment(pattern, cwd) {
+    if (!pattern || pattern.length < 3)
+        return '';
+    const workDir = cwd || process.cwd();
+    try {
+        const repo = await findRepoForCwd(workDir);
+        if (!repo)
+            return '';
+        // Lazy-load kuzu adapter (skip unnecessary init)
+        const { initKuzu, executeQuery, isKuzuReady } = await import('../../mcp/core/kuzu-adapter.js');
+        const { searchFTSFromKuzu } = await import('../search/bm25-index.js');
+        const repoId = repo.name.toLowerCase();
+        // Init KuzuDB if not already
+        if (!isKuzuReady(repoId)) {
+            await initKuzu(repoId, repo.kuzuPath);
+        }
+        // Step 1: BM25 search (fast, no embeddings)
+        const bm25Results = await searchFTSFromKuzu(pattern, 10, repoId);
+        if (bm25Results.length === 0)
+            return '';
+        // Step 2: Map BM25 file results to symbols
+        const symbolMatches = [];
+        for (const result of bm25Results.slice(0, 5)) {
+            const escaped = result.filePath.replace(/'/g, "''");
+            try {
+                const symbols = await executeQuery(repoId, `
+          MATCH (n) WHERE n.filePath = '${escaped}'
+          AND n.name CONTAINS '${pattern.replace(/'/g, "''").split(/\s+/)[0]}'
+          RETURN n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath
+          LIMIT 3
+        `);
+                for (const sym of symbols) {
+                    symbolMatches.push({
+                        nodeId: sym.id || sym[0],
+                        name: sym.name || sym[1],
+                        type: sym.type || sym[2],
+                        filePath: sym.filePath || sym[3],
+                        score: result.score,
+                    });
+                }
+            }
+            catch { /* skip */ }
+        }
+        if (symbolMatches.length === 0)
+            return '';
+        // Step 3: For top matches, fetch callers/callees/processes
+        // Also get cluster cohesion internally for ranking
+        const enriched = [];
+        const seen = new Set();
+        for (const sym of symbolMatches.slice(0, 5)) {
+            if (seen.has(sym.nodeId))
+                continue;
+            seen.add(sym.nodeId);
+            const escaped = sym.nodeId.replace(/'/g, "''");
+            // Callers
+            let callers = [];
+            try {
+                const rows = await executeQuery(repoId, `
+          MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(n {id: '${escaped}'})
+          RETURN caller.name AS name
+          LIMIT 3
+        `);
+                callers = rows.map((r) => r.name || r[0]).filter(Boolean);
+            }
+            catch { /* skip */ }
+            // Callees
+            let callees = [];
+            try {
+                const rows = await executeQuery(repoId, `
+          MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'CALLS'}]->(callee)
+          RETURN callee.name AS name
+          LIMIT 3
+        `);
+                callees = rows.map((r) => r.name || r[0]).filter(Boolean);
+            }
+            catch { /* skip */ }
+            // Processes
+            let processes = [];
+            try {
+                const rows = await executeQuery(repoId, `
+          MATCH (n {id: '${escaped}'})-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process)
+          RETURN p.heuristicLabel AS label, r.step AS step, p.stepCount AS stepCount
+        `);
+                processes = rows.map((r) => {
+                    const label = r.label || r[0];
+                    const step = r.step || r[1];
+                    const stepCount = r.stepCount || r[2];
+                    return `${label} (step ${step}/${stepCount})`;
+                }).filter(Boolean);
+            }
+            catch { /* skip */ }
+            // Cluster cohesion (internal ranking signal)
+            let cohesion = 0;
+            try {
+                const rows = await executeQuery(repoId, `
+          MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
+          RETURN c.cohesion AS cohesion
+          LIMIT 1
+        `);
+                if (rows.length > 0) {
+                    cohesion = (rows[0].cohesion ?? rows[0][0]) || 0;
+                }
+            }
+            catch { /* skip */ }
+            enriched.push({
+                name: sym.name,
+                filePath: sym.filePath,
+                callers,
+                callees,
+                processes,
+                cohesion,
+            });
+        }
+        if (enriched.length === 0)
+            return '';
+        // Step 4: Rank by cohesion (internal signal) and format
+        enriched.sort((a, b) => b.cohesion - a.cohesion);
+        const lines = [`[GitNexus] ${enriched.length} related symbols found:`, ''];
+        for (const item of enriched) {
+            lines.push(`${item.name} (${item.filePath})`);
+            if (item.callers.length > 0) {
+                lines.push(`  Called by: ${item.callers.join(', ')}`);
+            }
+            if (item.callees.length > 0) {
+                lines.push(`  Calls: ${item.callees.join(', ')}`);
+            }
+            if (item.processes.length > 0) {
+                lines.push(`  Flows: ${item.processes.join(', ')}`);
+            }
+            lines.push('');
+        }
+        return lines.join('\n').trim();
+    }
+    catch {
+        // Graceful failure — never break the original tool
+        return '';
+    }
+}

package/dist/core/embeddings/embedder.d.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * Embedder Module
+ *
+ * Singleton factory for transformers.js embedding pipeline.
+ * Handles model loading, caching, and both single and batch embedding operations.
+ *
+ * Uses snowflake-arctic-embed-xs by default (22M params, 384 dims, ~90MB)
+ */
+import { type FeatureExtractionPipeline } from '@huggingface/transformers';
+import { type EmbeddingConfig, type ModelProgress } from './types.js';
+/**
+ * Progress callback type for model loading
+ */
+export type ModelProgressCallback = (progress: ModelProgress) => void;
+/**
+ * Get the current device being used for inference
+ */
+export declare const getCurrentDevice: () => "dml" | "cuda" | "cpu" | "wasm" | null;
+/**
+ * Initialize the embedding model
+ * Uses singleton pattern - only loads once, subsequent calls return cached instance
+ *
+ * @param onProgress - Optional callback for model download progress
+ * @param config - Optional configuration override
+ * @param forceDevice - Force a specific device
+ * @returns Promise resolving to the embedder pipeline
+ */
+export declare const initEmbedder: (onProgress?: ModelProgressCallback, config?: Partial<EmbeddingConfig>, forceDevice?: "dml" | "cuda" | "cpu" | "wasm") => Promise<FeatureExtractionPipeline>;
+/**
+ * Check if the embedder is initialized and ready
+ */
+export declare const isEmbedderReady: () => boolean;
+/**
+ * Get the embedder instance (throws if not initialized)
+ */
+export declare const getEmbedder: () => FeatureExtractionPipeline;
+/**
+ * Embed a single text string
+ *
+ * @param text - Text to embed
+ * @returns Float32Array of embedding vector (384 dimensions)
+ */
+export declare const embedText: (text: string) => Promise<Float32Array>;
+/**
+ * Embed multiple texts in a single batch
+ * More efficient than calling embedText multiple times
+ *
+ * @param texts - Array of texts to embed
+ * @returns Array of Float32Array embedding vectors
+ */
+export declare const embedBatch: (texts: string[]) => Promise<Float32Array[]>;
+/**
+ * Convert Float32Array to regular number array (for KuzuDB storage)
+ */
+export declare const embeddingToArray: (embedding: Float32Array) => number[];
+/**
+ * Cleanup the embedder (free memory)
+ * Call this when done with embeddings
+ */
+export declare const disposeEmbedder: () => Promise<void>;

package/dist/core/embeddings/embedder.js ADDED Viewed

@@ -0,0 +1,251 @@
+/**
+ * Embedder Module
+ *
+ * Singleton factory for transformers.js embedding pipeline.
+ * Handles model loading, caching, and both single and batch embedding operations.
+ *
+ * Uses snowflake-arctic-embed-xs by default (22M params, 384 dims, ~90MB)
+ */
+// Suppress ONNX Runtime native warnings (e.g. VerifyEachNodeIsAssignedToAnEp)
+// Must be set BEFORE onnxruntime-node is imported by transformers.js
+// Level 3 = Error only (skips Warning/Info)
+if (!process.env.ORT_LOG_LEVEL) {
+    process.env.ORT_LOG_LEVEL = '3';
+}
+import { pipeline, env } from '@huggingface/transformers';
+import { existsSync } from 'fs';
+import { execFileSync } from 'child_process';
+import { join } from 'path';
+import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
+/**
+ * Check whether CUDA libraries are actually available on this system.
+ * ONNX Runtime's native layer crashes (uncatchable) if we attempt CUDA
+ * without the required shared libraries, so we probe first.
+ *
+ * Checks the dynamic linker cache (ldconfig) which covers all architectures
+ * and install paths, then falls back to CUDA_PATH / LD_LIBRARY_PATH env vars.
+ */
+function isCudaAvailable() {
+    // Primary: query the dynamic linker cache — covers all architectures,
+    // distro layouts, and custom install paths registered with ldconfig
+    try {
+        const out = execFileSync('ldconfig', ['-p'], { timeout: 3000, encoding: 'utf-8' });
+        if (out.includes('libcublasLt.so.12'))
+            return true;
+    }
+    catch {
+        // ldconfig not available (e.g. non-standard container)
+    }
+    // Fallback: check CUDA_PATH and LD_LIBRARY_PATH for environments where
+    // ldconfig doesn't know about the CUDA install (conda, manual /opt/cuda, etc.)
+    for (const envVar of ['CUDA_PATH', 'LD_LIBRARY_PATH']) {
+        const val = process.env[envVar];
+        if (!val)
+            continue;
+        for (const dir of val.split(':').filter(Boolean)) {
+            if (existsSync(join(dir, 'lib64', 'libcublasLt.so.12')) ||
+                existsSync(join(dir, 'lib', 'libcublasLt.so.12')) ||
+                existsSync(join(dir, 'libcublasLt.so.12')))
+                return true;
+        }
+    }
+    return false;
+}
+// Module-level state for singleton pattern
+let embedderInstance = null;
+let isInitializing = false;
+let initPromise = null;
+let currentDevice = null;
+/**
+ * Get the current device being used for inference
+ */
+export const getCurrentDevice = () => currentDevice;
+/**
+ * Initialize the embedding model
+ * Uses singleton pattern - only loads once, subsequent calls return cached instance
+ *
+ * @param onProgress - Optional callback for model download progress
+ * @param config - Optional configuration override
+ * @param forceDevice - Force a specific device
+ * @returns Promise resolving to the embedder pipeline
+ */
+export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
+    // Return existing instance if available
+    if (embedderInstance) {
+        return embedderInstance;
+    }
+    // If already initializing, wait for that promise
+    if (isInitializing && initPromise) {
+        return initPromise;
+    }
+    isInitializing = true;
+    const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
+    // On Windows, use DirectML for GPU acceleration (via DirectX12)
+    // CUDA is only available on Linux x64 with onnxruntime-node
+    // Probe for CUDA first — ONNX Runtime crashes (uncatchable native error)
+    // if we attempt CUDA without the required shared libraries
+    const isWindows = process.platform === 'win32';
+    const gpuDevice = isWindows ? 'dml' : (isCudaAvailable() ? 'cuda' : 'cpu');
+    let requestedDevice = forceDevice || (finalConfig.device === 'auto' ? gpuDevice : finalConfig.device);
+    initPromise = (async () => {
+        try {
+            // Configure transformers.js environment
+            env.allowLocalModels = false;
+            const isDev = process.env.NODE_ENV === 'development';
+            if (isDev) {
+                console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);
+            }
+            const progressCallback = onProgress ? (data) => {
+                const progress = {
+                    status: data.status || 'progress',
+                    file: data.file,
+                    progress: data.progress,
+                    loaded: data.loaded,
+                    total: data.total,
+                };
+                onProgress(progress);
+            } : undefined;
+            // Try GPU first if auto, fall back to CPU
+            // Windows: dml (DirectML/DirectX12), Linux: cuda
+            const devicesToTry = (requestedDevice === 'dml' || requestedDevice === 'cuda')
+                ? [requestedDevice, 'cpu']
+                : [requestedDevice];
+            for (const device of devicesToTry) {
+                try {
+                    if (isDev && device === 'dml') {
+                        console.log('🔧 Trying DirectML (DirectX12) GPU backend...');
+                    }
+                    else if (isDev && device === 'cuda') {
+                        console.log('🔧 Trying CUDA GPU backend...');
+                    }
+                    else if (isDev && device === 'cpu') {
+                        console.log('🔧 Using CPU backend...');
+                    }
+                    else if (isDev && device === 'wasm') {
+                        console.log('🔧 Using WASM backend (slower)...');
+                    }
+                    embedderInstance = await pipeline('feature-extraction', finalConfig.modelId, {
+                        device: device,
+                        dtype: 'fp32',
+                        progress_callback: progressCallback,
+                        session_options: { logSeverityLevel: 3 },
+                    });
+                    currentDevice = device;
+                    if (isDev) {
+                        const label = device === 'dml' ? 'GPU (DirectML/DirectX12)'
+                            : device === 'cuda' ? 'GPU (CUDA)'
+                                : device.toUpperCase();
+                        console.log(`✅ Using ${label} backend`);
+                        console.log('✅ Embedding model loaded successfully');
+                    }
+                    return embedderInstance;
+                }
+                catch (deviceError) {
+                    if (isDev && (device === 'cuda' || device === 'dml')) {
+                        const gpuType = device === 'dml' ? 'DirectML' : 'CUDA';
+                        console.log(`⚠️  ${gpuType} not available, falling back to CPU...`);
+                    }
+                    // Continue to next device in list
+                    if (device === devicesToTry[devicesToTry.length - 1]) {
+                        throw deviceError; // Last device failed, propagate error
+                    }
+                }
+            }
+            throw new Error('No suitable device found for embedding model');
+        }
+        catch (error) {
+            isInitializing = false;
+            initPromise = null;
+            embedderInstance = null;
+            throw error;
+        }
+        finally {
+            isInitializing = false;
+        }
+    })();
+    return initPromise;
+};
+/**
+ * Check if the embedder is initialized and ready
+ */
+export const isEmbedderReady = () => {
+    return embedderInstance !== null;
+};
+/**
+ * Get the embedder instance (throws if not initialized)
+ */
+export const getEmbedder = () => {
+    if (!embedderInstance) {
+        throw new Error('Embedder not initialized. Call initEmbedder() first.');
+    }
+    return embedderInstance;
+};
+/**
+ * Embed a single text string
+ *
+ * @param text - Text to embed
+ * @returns Float32Array of embedding vector (384 dimensions)
+ */
+export const embedText = async (text) => {
+    const embedder = getEmbedder();
+    const result = await embedder(text, {
+        pooling: 'mean',
+        normalize: true,
+    });
+    // Result is a Tensor, convert to Float32Array
+    return new Float32Array(result.data);
+};
+/**
+ * Embed multiple texts in a single batch
+ * More efficient than calling embedText multiple times
+ *
+ * @param texts - Array of texts to embed
+ * @returns Array of Float32Array embedding vectors
+ */
+export const embedBatch = async (texts) => {
+    if (texts.length === 0) {
+        return [];
+    }
+    const embedder = getEmbedder();
+    // Process batch
+    const result = await embedder(texts, {
+        pooling: 'mean',
+        normalize: true,
+    });
+    // Result shape is [batch_size, dimensions]
+    // Need to split into individual vectors
+    const data = result.data;
+    const dimensions = DEFAULT_EMBEDDING_CONFIG.dimensions;
+    const embeddings = [];
+    for (let i = 0; i < texts.length; i++) {
+        const start = i * dimensions;
+        const end = start + dimensions;
+        embeddings.push(new Float32Array(Array.prototype.slice.call(data, start, end)));
+    }
+    return embeddings;
+};
+/**
+ * Convert Float32Array to regular number array (for KuzuDB storage)
+ */
+export const embeddingToArray = (embedding) => {
+    return Array.from(embedding);
+};
+/**
+ * Cleanup the embedder (free memory)
+ * Call this when done with embeddings
+ */
+export const disposeEmbedder = async () => {
+    if (embedderInstance) {
+        // transformers.js pipelines may have a dispose method
+        try {
+            if ('dispose' in embedderInstance && typeof embedderInstance.dispose === 'function') {
+                await embedderInstance.dispose();
+            }
+        }
+        catch {
+            // Ignore disposal errors
+        }
+        embedderInstance = null;
+        initPromise = null;
+    }
+};

package/dist/core/embeddings/embedding-pipeline.d.ts ADDED Viewed

@@ -0,0 +1,51 @@
+/**
+ * Embedding Pipeline Module
+ *
+ * Orchestrates the background embedding process:
+ * 1. Query embeddable nodes from KuzuDB
+ * 2. Generate text representations
+ * 3. Batch embed using transformers.js
+ * 4. Update KuzuDB with embeddings
+ * 5. Create vector index for semantic search
+ */
+import { type EmbeddingProgress, type EmbeddingConfig, type SemanticSearchResult } from './types.js';
+/**
+ * Progress callback type
+ */
+export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
+/**
+ * Run the embedding pipeline
+ *
+ * @param executeQuery - Function to execute Cypher queries against KuzuDB
+ * @param executeWithReusedStatement - Function to execute with reused prepared statement
+ * @param onProgress - Callback for progress updates
+ * @param config - Optional configuration override
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
+ */
+export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
+/**
+ * Perform semantic search using the vector index
+ *
+ * Uses CodeEmbedding table and queries each node table to get metadata
+ *
+ * @param executeQuery - Function to execute Cypher queries
+ * @param query - Search query text
+ * @param k - Number of results to return (default: 10)
+ * @param maxDistance - Maximum distance threshold (default: 0.5)
+ * @returns Array of search results ordered by relevance
+ */
+export declare const semanticSearch: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, maxDistance?: number) => Promise<SemanticSearchResult[]>;
+/**
+ * Semantic search with graph expansion (flattened results)
+ *
+ * Note: With multi-table schema, graph traversal is simplified.
+ * Returns semantic matches with their metadata.
+ * For full graph traversal, use execute_vector_cypher tool directly.
+ *
+ * @param executeQuery - Function to execute Cypher queries
+ * @param query - Search query text
+ * @param k - Number of initial semantic matches (default: 5)
+ * @param _hops - Unused (kept for API compatibility).
+ * @returns Semantic matches with metadata
+ */
+export declare const semanticSearchWithContext: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, _hops?: number) => Promise<any[]>;