npm - @duytransipher/gitnexus - Versions diffs - 1.4.6-sipher.0 - Mend

@duytransipher/gitnexus 1.4.6-sipher.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (224) hide show

package/LICENSE +73 -0
package/README.md +261 -0
package/dist/cli/ai-context.d.ts +23 -0
package/dist/cli/ai-context.js +265 -0
package/dist/cli/analyze.d.ts +12 -0
package/dist/cli/analyze.js +345 -0
package/dist/cli/augment.d.ts +13 -0
package/dist/cli/augment.js +33 -0
package/dist/cli/clean.d.ts +10 -0
package/dist/cli/clean.js +60 -0
package/dist/cli/eval-server.d.ts +37 -0
package/dist/cli/eval-server.js +389 -0
package/dist/cli/index.d.ts +2 -0
package/dist/cli/index.js +137 -0
package/dist/cli/lazy-action.d.ts +6 -0
package/dist/cli/lazy-action.js +18 -0
package/dist/cli/list.d.ts +6 -0
package/dist/cli/list.js +30 -0
package/dist/cli/mcp.d.ts +8 -0
package/dist/cli/mcp.js +36 -0
package/dist/cli/serve.d.ts +4 -0
package/dist/cli/serve.js +6 -0
package/dist/cli/setup.d.ts +8 -0
package/dist/cli/setup.js +367 -0
package/dist/cli/sipher-patched.d.ts +2 -0
package/dist/cli/sipher-patched.js +77 -0
package/dist/cli/skill-gen.d.ts +26 -0
package/dist/cli/skill-gen.js +549 -0
package/dist/cli/status.d.ts +6 -0
package/dist/cli/status.js +36 -0
package/dist/cli/tool.d.ts +60 -0
package/dist/cli/tool.js +180 -0
package/dist/cli/wiki.d.ts +15 -0
package/dist/cli/wiki.js +365 -0
package/dist/config/ignore-service.d.ts +26 -0
package/dist/config/ignore-service.js +284 -0
package/dist/config/supported-languages.d.ts +15 -0
package/dist/config/supported-languages.js +16 -0
package/dist/core/augmentation/engine.d.ts +26 -0
package/dist/core/augmentation/engine.js +240 -0
package/dist/core/embeddings/embedder.d.ts +60 -0
package/dist/core/embeddings/embedder.js +251 -0
package/dist/core/embeddings/embedding-pipeline.d.ts +51 -0
package/dist/core/embeddings/embedding-pipeline.js +356 -0
package/dist/core/embeddings/index.d.ts +9 -0
package/dist/core/embeddings/index.js +9 -0
package/dist/core/embeddings/text-generator.d.ts +24 -0
package/dist/core/embeddings/text-generator.js +182 -0
package/dist/core/embeddings/types.d.ts +87 -0
package/dist/core/embeddings/types.js +32 -0
package/dist/core/graph/graph.d.ts +2 -0
package/dist/core/graph/graph.js +66 -0
package/dist/core/graph/types.d.ts +66 -0
package/dist/core/graph/types.js +1 -0
package/dist/core/ingestion/ast-cache.d.ts +11 -0
package/dist/core/ingestion/ast-cache.js +35 -0
package/dist/core/ingestion/call-processor.d.ts +23 -0
package/dist/core/ingestion/call-processor.js +793 -0
package/dist/core/ingestion/call-routing.d.ts +68 -0
package/dist/core/ingestion/call-routing.js +129 -0
package/dist/core/ingestion/cluster-enricher.d.ts +38 -0
package/dist/core/ingestion/cluster-enricher.js +170 -0
package/dist/core/ingestion/community-processor.d.ts +39 -0
package/dist/core/ingestion/community-processor.js +312 -0
package/dist/core/ingestion/constants.d.ts +16 -0
package/dist/core/ingestion/constants.js +16 -0
package/dist/core/ingestion/entry-point-scoring.d.ts +40 -0
package/dist/core/ingestion/entry-point-scoring.js +353 -0
package/dist/core/ingestion/export-detection.d.ts +18 -0
package/dist/core/ingestion/export-detection.js +231 -0
package/dist/core/ingestion/filesystem-walker.d.ts +28 -0
package/dist/core/ingestion/filesystem-walker.js +81 -0
package/dist/core/ingestion/framework-detection.d.ts +54 -0
package/dist/core/ingestion/framework-detection.js +411 -0
package/dist/core/ingestion/heritage-processor.d.ts +28 -0
package/dist/core/ingestion/heritage-processor.js +251 -0
package/dist/core/ingestion/import-processor.d.ts +34 -0
package/dist/core/ingestion/import-processor.js +398 -0
package/dist/core/ingestion/language-config.d.ts +46 -0
package/dist/core/ingestion/language-config.js +167 -0
package/dist/core/ingestion/mro-processor.d.ts +45 -0
package/dist/core/ingestion/mro-processor.js +369 -0
package/dist/core/ingestion/named-binding-extraction.d.ts +61 -0
package/dist/core/ingestion/named-binding-extraction.js +363 -0
package/dist/core/ingestion/parsing-processor.d.ts +19 -0
package/dist/core/ingestion/parsing-processor.js +315 -0
package/dist/core/ingestion/pipeline.d.ts +6 -0
package/dist/core/ingestion/pipeline.js +401 -0
package/dist/core/ingestion/process-processor.d.ts +51 -0
package/dist/core/ingestion/process-processor.js +315 -0
package/dist/core/ingestion/resolution-context.d.ts +53 -0
package/dist/core/ingestion/resolution-context.js +132 -0
package/dist/core/ingestion/resolvers/csharp.d.ts +22 -0
package/dist/core/ingestion/resolvers/csharp.js +109 -0
package/dist/core/ingestion/resolvers/go.d.ts +19 -0
package/dist/core/ingestion/resolvers/go.js +42 -0
package/dist/core/ingestion/resolvers/index.d.ts +18 -0
package/dist/core/ingestion/resolvers/index.js +13 -0
package/dist/core/ingestion/resolvers/jvm.d.ts +23 -0
package/dist/core/ingestion/resolvers/jvm.js +87 -0
package/dist/core/ingestion/resolvers/php.d.ts +15 -0
package/dist/core/ingestion/resolvers/php.js +35 -0
package/dist/core/ingestion/resolvers/python.d.ts +19 -0
package/dist/core/ingestion/resolvers/python.js +52 -0
package/dist/core/ingestion/resolvers/ruby.d.ts +12 -0
package/dist/core/ingestion/resolvers/ruby.js +15 -0
package/dist/core/ingestion/resolvers/rust.d.ts +15 -0
package/dist/core/ingestion/resolvers/rust.js +73 -0
package/dist/core/ingestion/resolvers/standard.d.ts +28 -0
package/dist/core/ingestion/resolvers/standard.js +123 -0
package/dist/core/ingestion/resolvers/utils.d.ts +33 -0
package/dist/core/ingestion/resolvers/utils.js +122 -0
package/dist/core/ingestion/structure-processor.d.ts +2 -0
package/dist/core/ingestion/structure-processor.js +36 -0
package/dist/core/ingestion/symbol-table.d.ts +63 -0
package/dist/core/ingestion/symbol-table.js +85 -0
package/dist/core/ingestion/tree-sitter-queries.d.ts +15 -0
package/dist/core/ingestion/tree-sitter-queries.js +888 -0
package/dist/core/ingestion/type-env.d.ts +49 -0
package/dist/core/ingestion/type-env.js +613 -0
package/dist/core/ingestion/type-extractors/c-cpp.d.ts +2 -0
package/dist/core/ingestion/type-extractors/c-cpp.js +385 -0
package/dist/core/ingestion/type-extractors/csharp.d.ts +2 -0
package/dist/core/ingestion/type-extractors/csharp.js +383 -0
package/dist/core/ingestion/type-extractors/go.d.ts +2 -0
package/dist/core/ingestion/type-extractors/go.js +467 -0
package/dist/core/ingestion/type-extractors/index.d.ts +22 -0
package/dist/core/ingestion/type-extractors/index.js +31 -0
package/dist/core/ingestion/type-extractors/jvm.d.ts +3 -0
package/dist/core/ingestion/type-extractors/jvm.js +681 -0
package/dist/core/ingestion/type-extractors/php.d.ts +2 -0
package/dist/core/ingestion/type-extractors/php.js +549 -0
package/dist/core/ingestion/type-extractors/python.d.ts +2 -0
package/dist/core/ingestion/type-extractors/python.js +455 -0
package/dist/core/ingestion/type-extractors/ruby.d.ts +2 -0
package/dist/core/ingestion/type-extractors/ruby.js +389 -0
package/dist/core/ingestion/type-extractors/rust.d.ts +2 -0
package/dist/core/ingestion/type-extractors/rust.js +456 -0
package/dist/core/ingestion/type-extractors/shared.d.ts +145 -0
package/dist/core/ingestion/type-extractors/shared.js +810 -0
package/dist/core/ingestion/type-extractors/swift.d.ts +2 -0
package/dist/core/ingestion/type-extractors/swift.js +137 -0
package/dist/core/ingestion/type-extractors/types.d.ts +127 -0
package/dist/core/ingestion/type-extractors/types.js +1 -0
package/dist/core/ingestion/type-extractors/typescript.d.ts +2 -0
package/dist/core/ingestion/type-extractors/typescript.js +494 -0
package/dist/core/ingestion/utils.d.ts +138 -0
package/dist/core/ingestion/utils.js +1290 -0
package/dist/core/ingestion/workers/parse-worker.d.ts +122 -0
package/dist/core/ingestion/workers/parse-worker.js +1126 -0
package/dist/core/ingestion/workers/worker-pool.d.ts +16 -0
package/dist/core/ingestion/workers/worker-pool.js +128 -0
package/dist/core/lbug/csv-generator.d.ts +33 -0
package/dist/core/lbug/csv-generator.js +366 -0
package/dist/core/lbug/lbug-adapter.d.ts +103 -0
package/dist/core/lbug/lbug-adapter.js +769 -0
package/dist/core/lbug/schema.d.ts +53 -0
package/dist/core/lbug/schema.js +430 -0
package/dist/core/search/bm25-index.d.ts +23 -0
package/dist/core/search/bm25-index.js +96 -0
package/dist/core/search/hybrid-search.d.ts +49 -0
package/dist/core/search/hybrid-search.js +118 -0
package/dist/core/tree-sitter/parser-loader.d.ts +5 -0
package/dist/core/tree-sitter/parser-loader.js +63 -0
package/dist/core/wiki/generator.d.ts +120 -0
package/dist/core/wiki/generator.js +939 -0
package/dist/core/wiki/graph-queries.d.ts +80 -0
package/dist/core/wiki/graph-queries.js +238 -0
package/dist/core/wiki/html-viewer.d.ts +10 -0
package/dist/core/wiki/html-viewer.js +297 -0
package/dist/core/wiki/llm-client.d.ts +43 -0
package/dist/core/wiki/llm-client.js +186 -0
package/dist/core/wiki/prompts.d.ts +53 -0
package/dist/core/wiki/prompts.js +174 -0
package/dist/lib/utils.d.ts +1 -0
package/dist/lib/utils.js +3 -0
package/dist/mcp/compatible-stdio-transport.d.ts +25 -0
package/dist/mcp/compatible-stdio-transport.js +200 -0
package/dist/mcp/core/embedder.d.ts +27 -0
package/dist/mcp/core/embedder.js +108 -0
package/dist/mcp/core/lbug-adapter.d.ts +57 -0
package/dist/mcp/core/lbug-adapter.js +455 -0
package/dist/mcp/local/local-backend.d.ts +181 -0
package/dist/mcp/local/local-backend.js +1722 -0
package/dist/mcp/resources.d.ts +31 -0
package/dist/mcp/resources.js +411 -0
package/dist/mcp/server.d.ts +23 -0
package/dist/mcp/server.js +296 -0
package/dist/mcp/staleness.d.ts +15 -0
package/dist/mcp/staleness.js +29 -0
package/dist/mcp/tools.d.ts +24 -0
package/dist/mcp/tools.js +292 -0
package/dist/server/api.d.ts +10 -0
package/dist/server/api.js +344 -0
package/dist/server/mcp-http.d.ts +13 -0
package/dist/server/mcp-http.js +100 -0
package/dist/storage/git.d.ts +6 -0
package/dist/storage/git.js +35 -0
package/dist/storage/repo-manager.d.ts +138 -0
package/dist/storage/repo-manager.js +299 -0
package/dist/types/pipeline.d.ts +32 -0
package/dist/types/pipeline.js +18 -0
package/dist/unreal/bridge.d.ts +4 -0
package/dist/unreal/bridge.js +113 -0
package/dist/unreal/config.d.ts +6 -0
package/dist/unreal/config.js +55 -0
package/dist/unreal/types.d.ts +105 -0
package/dist/unreal/types.js +1 -0
package/hooks/claude/gitnexus-hook.cjs +238 -0
package/hooks/claude/pre-tool-use.sh +79 -0
package/hooks/claude/session-start.sh +42 -0
package/package.json +100 -0
package/scripts/ensure-cli-executable.cjs +21 -0
package/scripts/patch-tree-sitter-swift.cjs +74 -0
package/scripts/setup-unreal-gitnexus.ps1 +191 -0
package/skills/gitnexus-cli.md +82 -0
package/skills/gitnexus-debugging.md +89 -0
package/skills/gitnexus-exploring.md +78 -0
package/skills/gitnexus-guide.md +64 -0
package/skills/gitnexus-impact-analysis.md +97 -0
package/skills/gitnexus-pr-review.md +163 -0
package/skills/gitnexus-refactoring.md +121 -0
package/vendor/leiden/index.cjs +355 -0
package/vendor/leiden/utils.cjs +392 -0

package/dist/core/embeddings/embedder.js ADDED Viewed

@@ -0,0 +1,251 @@
+/**
+ * Embedder Module
+ *
+ * Singleton factory for transformers.js embedding pipeline.
+ * Handles model loading, caching, and both single and batch embedding operations.
+ *
+ * Uses snowflake-arctic-embed-xs by default (22M params, 384 dims, ~90MB)
+ */
+// Suppress ONNX Runtime native warnings (e.g. VerifyEachNodeIsAssignedToAnEp)
+// Must be set BEFORE onnxruntime-node is imported by transformers.js
+// Level 3 = Error only (skips Warning/Info)
+if (!process.env.ORT_LOG_LEVEL) {
+    process.env.ORT_LOG_LEVEL = '3';
+}
+import { pipeline, env } from '@huggingface/transformers';
+import { existsSync } from 'fs';
+import { execFileSync } from 'child_process';
+import { join } from 'path';
+import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
+/**
+ * Check whether CUDA libraries are actually available on this system.
+ * ONNX Runtime's native layer crashes (uncatchable) if we attempt CUDA
+ * without the required shared libraries, so we probe first.
+ *
+ * Checks the dynamic linker cache (ldconfig) which covers all architectures
+ * and install paths, then falls back to CUDA_PATH / LD_LIBRARY_PATH env vars.
+ */
+function isCudaAvailable() {
+    // Primary: query the dynamic linker cache — covers all architectures,
+    // distro layouts, and custom install paths registered with ldconfig
+    try {
+        const out = execFileSync('ldconfig', ['-p'], { timeout: 3000, encoding: 'utf-8' });
+        if (out.includes('libcublasLt.so.12'))
+            return true;
+    }
+    catch {
+        // ldconfig not available (e.g. non-standard container)
+    }
+    // Fallback: check CUDA_PATH and LD_LIBRARY_PATH for environments where
+    // ldconfig doesn't know about the CUDA install (conda, manual /opt/cuda, etc.)
+    for (const envVar of ['CUDA_PATH', 'LD_LIBRARY_PATH']) {
+        const val = process.env[envVar];
+        if (!val)
+            continue;
+        for (const dir of val.split(':').filter(Boolean)) {
+            if (existsSync(join(dir, 'lib64', 'libcublasLt.so.12')) ||
+                existsSync(join(dir, 'lib', 'libcublasLt.so.12')) ||
+                existsSync(join(dir, 'libcublasLt.so.12')))
+                return true;
+        }
+    }
+    return false;
+}
+// Module-level state for singleton pattern
+let embedderInstance = null;
+let isInitializing = false;
+let initPromise = null;
+let currentDevice = null;
+/**
+ * Get the current device being used for inference
+ */
+export const getCurrentDevice = () => currentDevice;
+/**
+ * Initialize the embedding model
+ * Uses singleton pattern - only loads once, subsequent calls return cached instance
+ *
+ * @param onProgress - Optional callback for model download progress
+ * @param config - Optional configuration override
+ * @param forceDevice - Force a specific device
+ * @returns Promise resolving to the embedder pipeline
+ */
+export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
+    // Return existing instance if available
+    if (embedderInstance) {
+        return embedderInstance;
+    }
+    // If already initializing, wait for that promise
+    if (isInitializing && initPromise) {
+        return initPromise;
+    }
+    isInitializing = true;
+    const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
+    // On Windows, use DirectML for GPU acceleration (via DirectX12)
+    // CUDA is only available on Linux x64 with onnxruntime-node
+    // Probe for CUDA first — ONNX Runtime crashes (uncatchable native error)
+    // if we attempt CUDA without the required shared libraries
+    const isWindows = process.platform === 'win32';
+    const gpuDevice = isWindows ? 'dml' : (isCudaAvailable() ? 'cuda' : 'cpu');
+    let requestedDevice = forceDevice || (finalConfig.device === 'auto' ? gpuDevice : finalConfig.device);
+    initPromise = (async () => {
+        try {
+            // Configure transformers.js environment
+            env.allowLocalModels = false;
+            const isDev = process.env.NODE_ENV === 'development';
+            if (isDev) {
+                console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);
+            }
+            const progressCallback = onProgress ? (data) => {
+                const progress = {
+                    status: data.status || 'progress',
+                    file: data.file,
+                    progress: data.progress,
+                    loaded: data.loaded,
+                    total: data.total,
+                };
+                onProgress(progress);
+            } : undefined;
+            // Try GPU first if auto, fall back to CPU
+            // Windows: dml (DirectML/DirectX12), Linux: cuda
+            const devicesToTry = (requestedDevice === 'dml' || requestedDevice === 'cuda')
+                ? [requestedDevice, 'cpu']
+                : [requestedDevice];
+            for (const device of devicesToTry) {
+                try {
+                    if (isDev && device === 'dml') {
+                        console.log('🔧 Trying DirectML (DirectX12) GPU backend...');
+                    }
+                    else if (isDev && device === 'cuda') {
+                        console.log('🔧 Trying CUDA GPU backend...');
+                    }
+                    else if (isDev && device === 'cpu') {
+                        console.log('🔧 Using CPU backend...');
+                    }
+                    else if (isDev && device === 'wasm') {
+                        console.log('🔧 Using WASM backend (slower)...');
+                    }
+                    embedderInstance = await pipeline('feature-extraction', finalConfig.modelId, {
+                        device: device,
+                        dtype: 'fp32',
+                        progress_callback: progressCallback,
+                        session_options: { logSeverityLevel: 3 },
+                    });
+                    currentDevice = device;
+                    if (isDev) {
+                        const label = device === 'dml' ? 'GPU (DirectML/DirectX12)'
+                            : device === 'cuda' ? 'GPU (CUDA)'
+                                : device.toUpperCase();
+                        console.log(`✅ Using ${label} backend`);
+                        console.log('✅ Embedding model loaded successfully');
+                    }
+                    return embedderInstance;
+                }
+                catch (deviceError) {
+                    if (isDev && (device === 'cuda' || device === 'dml')) {
+                        const gpuType = device === 'dml' ? 'DirectML' : 'CUDA';
+                        console.log(`⚠️  ${gpuType} not available, falling back to CPU...`);
+                    }
+                    // Continue to next device in list
+                    if (device === devicesToTry[devicesToTry.length - 1]) {
+                        throw deviceError; // Last device failed, propagate error
+                    }
+                }
+            }
+            throw new Error('No suitable device found for embedding model');
+        }
+        catch (error) {
+            isInitializing = false;
+            initPromise = null;
+            embedderInstance = null;
+            throw error;
+        }
+        finally {
+            isInitializing = false;
+        }
+    })();
+    return initPromise;
+};
+/**
+ * Check if the embedder is initialized and ready
+ */
+export const isEmbedderReady = () => {
+    return embedderInstance !== null;
+};
+/**
+ * Get the embedder instance (throws if not initialized)
+ */
+export const getEmbedder = () => {
+    if (!embedderInstance) {
+        throw new Error('Embedder not initialized. Call initEmbedder() first.');
+    }
+    return embedderInstance;
+};
+/**
+ * Embed a single text string
+ *
+ * @param text - Text to embed
+ * @returns Float32Array of embedding vector (384 dimensions)
+ */
+export const embedText = async (text) => {
+    const embedder = getEmbedder();
+    const result = await embedder(text, {
+        pooling: 'mean',
+        normalize: true,
+    });
+    // Result is a Tensor, convert to Float32Array
+    return new Float32Array(result.data);
+};
+/**
+ * Embed multiple texts in a single batch
+ * More efficient than calling embedText multiple times
+ *
+ * @param texts - Array of texts to embed
+ * @returns Array of Float32Array embedding vectors
+ */
+export const embedBatch = async (texts) => {
+    if (texts.length === 0) {
+        return [];
+    }
+    const embedder = getEmbedder();
+    // Process batch
+    const result = await embedder(texts, {
+        pooling: 'mean',
+        normalize: true,
+    });
+    // Result shape is [batch_size, dimensions]
+    // Need to split into individual vectors
+    const data = result.data;
+    const dimensions = DEFAULT_EMBEDDING_CONFIG.dimensions;
+    const embeddings = [];
+    for (let i = 0; i < texts.length; i++) {
+        const start = i * dimensions;
+        const end = start + dimensions;
+        embeddings.push(new Float32Array(Array.prototype.slice.call(data, start, end)));
+    }
+    return embeddings;
+};
+/**
+ * Convert Float32Array to regular number array (for LadybugDB storage)
+ */
+export const embeddingToArray = (embedding) => {
+    return Array.from(embedding);
+};
+/**
+ * Cleanup the embedder (free memory)
+ * Call this when done with embeddings
+ */
+export const disposeEmbedder = async () => {
+    if (embedderInstance) {
+        // transformers.js pipelines may have a dispose method
+        try {
+            if ('dispose' in embedderInstance && typeof embedderInstance.dispose === 'function') {
+                await embedderInstance.dispose();
+            }
+        }
+        catch {
+            // Ignore disposal errors
+        }
+        embedderInstance = null;
+        initPromise = null;
+    }
+};

package/dist/core/embeddings/embedding-pipeline.d.ts ADDED Viewed

@@ -0,0 +1,51 @@
+/**
+ * Embedding Pipeline Module
+ *
+ * Orchestrates the background embedding process:
+ * 1. Query embeddable nodes from LadybugDB
+ * 2. Generate text representations
+ * 3. Batch embed using transformers.js
+ * 4. Update LadybugDB with embeddings
+ * 5. Create vector index for semantic search
+ */
+import { type EmbeddingProgress, type EmbeddingConfig, type SemanticSearchResult } from './types.js';
+/**
+ * Progress callback type
+ */
+export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
+/**
+ * Run the embedding pipeline
+ *
+ * @param executeQuery - Function to execute Cypher queries against LadybugDB
+ * @param executeWithReusedStatement - Function to execute with reused prepared statement
+ * @param onProgress - Callback for progress updates
+ * @param config - Optional configuration override
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
+ */
+export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
+/**
+ * Perform semantic search using the vector index
+ *
+ * Uses CodeEmbedding table and queries each node table to get metadata
+ *
+ * @param executeQuery - Function to execute Cypher queries
+ * @param query - Search query text
+ * @param k - Number of results to return (default: 10)
+ * @param maxDistance - Maximum distance threshold (default: 0.5)
+ * @returns Array of search results ordered by relevance
+ */
+export declare const semanticSearch: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, maxDistance?: number) => Promise<SemanticSearchResult[]>;
+/**
+ * Semantic search with graph expansion (flattened results)
+ *
+ * Note: With multi-table schema, graph traversal is simplified.
+ * Returns semantic matches with their metadata.
+ * For full graph traversal, use execute_vector_cypher tool directly.
+ *
+ * @param executeQuery - Function to execute Cypher queries
+ * @param query - Search query text
+ * @param k - Number of initial semantic matches (default: 5)
+ * @param _hops - Unused (kept for API compatibility).
+ * @returns Semantic matches with metadata
+ */
+export declare const semanticSearchWithContext: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, _hops?: number) => Promise<any[]>;

package/dist/core/embeddings/embedding-pipeline.js ADDED Viewed

@@ -0,0 +1,356 @@
+/**
+ * Embedding Pipeline Module
+ *
+ * Orchestrates the background embedding process:
+ * 1. Query embeddable nodes from LadybugDB
+ * 2. Generate text representations
+ * 3. Batch embed using transformers.js
+ * 4. Update LadybugDB with embeddings
+ * 5. Create vector index for semantic search
+ */
+import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady } from './embedder.js';
+import { generateBatchEmbeddingTexts } from './text-generator.js';
+import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, } from './types.js';
+const isDev = process.env.NODE_ENV === 'development';
+/**
+ * Query all embeddable nodes from LadybugDB
+ * Uses table-specific queries (File has different schema than code elements)
+ */
+const queryEmbeddableNodes = async (executeQuery) => {
+    const allNodes = [];
+    // Query each embeddable table with table-specific columns
+    for (const label of EMBEDDABLE_LABELS) {
+        try {
+            let query;
+            if (label === 'File') {
+                // File nodes don't have startLine/endLine
+                query = `
+          MATCH (n:File)
+          RETURN n.id AS id, n.name AS name, 'File' AS label,
+                 n.filePath AS filePath, n.content AS content
+        `;
+            }
+            else {
+                // Code elements have startLine/endLine
+                query = `
+          MATCH (n:${label})
+          RETURN n.id AS id, n.name AS name, '${label}' AS label,
+                 n.filePath AS filePath, n.content AS content,
+                 n.startLine AS startLine, n.endLine AS endLine
+        `;
+            }
+            const rows = await executeQuery(query);
+            for (const row of rows) {
+                allNodes.push({
+                    id: row.id ?? row[0],
+                    name: row.name ?? row[1],
+                    label: row.label ?? row[2],
+                    filePath: row.filePath ?? row[3],
+                    content: row.content ?? row[4] ?? '',
+                    startLine: row.startLine ?? row[5],
+                    endLine: row.endLine ?? row[6],
+                });
+            }
+        }
+        catch (error) {
+            // Table might not exist or be empty, continue
+            if (isDev) {
+                console.warn(`Query for ${label} nodes failed:`, error);
+            }
+        }
+    }
+    return allNodes;
+};
+/**
+ * Batch INSERT embeddings into separate CodeEmbedding table
+ * Using a separate lightweight table avoids copy-on-write overhead
+ * that occurs when UPDATEing nodes with large content fields
+ */
+const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
+    // INSERT into separate embedding table - much more memory efficient!
+    const cypher = `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`;
+    const paramsList = updates.map(u => ({ nodeId: u.id, embedding: u.embedding }));
+    await executeWithReusedStatement(cypher, paramsList);
+};
+/**
+ * Create the vector index for semantic search
+ * Now indexes the separate CodeEmbedding table
+ */
+let vectorExtensionLoaded = false;
+const createVectorIndex = async (executeQuery) => {
+    // LadybugDB v0.15+ requires explicit VECTOR extension loading (once per session)
+    if (!vectorExtensionLoaded) {
+        try {
+            await executeQuery('INSTALL VECTOR');
+            await executeQuery('LOAD EXTENSION VECTOR');
+            vectorExtensionLoaded = true;
+        }
+        catch {
+            // Extension may already be loaded — CREATE_VECTOR_INDEX will fail clearly if not
+            vectorExtensionLoaded = true;
+        }
+    }
+    const cypher = `
+    CALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine')
+  `;
+    try {
+        await executeQuery(cypher);
+    }
+    catch (error) {
+        // Index might already exist
+        if (isDev) {
+            console.warn('Vector index creation warning:', error);
+        }
+    }
+};
+/**
+ * Run the embedding pipeline
+ *
+ * @param executeQuery - Function to execute Cypher queries against LadybugDB
+ * @param executeWithReusedStatement - Function to execute with reused prepared statement
+ * @param onProgress - Callback for progress updates
+ * @param config - Optional configuration override
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
+ */
+export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds) => {
+    const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
+    try {
+        // Phase 1: Load embedding model
+        onProgress({
+            phase: 'loading-model',
+            percent: 0,
+            modelDownloadPercent: 0,
+        });
+        await initEmbedder((modelProgress) => {
+            const downloadPercent = modelProgress.progress ?? 0;
+            onProgress({
+                phase: 'loading-model',
+                percent: Math.round(downloadPercent * 0.2),
+                modelDownloadPercent: downloadPercent,
+            });
+        }, finalConfig);
+        onProgress({
+            phase: 'loading-model',
+            percent: 20,
+            modelDownloadPercent: 100,
+        });
+        if (isDev) {
+            console.log('🔍 Querying embeddable nodes...');
+        }
+        // Phase 2: Query embeddable nodes
+        let nodes = await queryEmbeddableNodes(executeQuery);
+        // Incremental mode: filter out nodes that already have embeddings
+        if (skipNodeIds && skipNodeIds.size > 0) {
+            const beforeCount = nodes.length;
+            nodes = nodes.filter(n => !skipNodeIds.has(n.id));
+            if (isDev) {
+                console.log(`📦 Incremental embeddings: ${beforeCount} total, ${skipNodeIds.size} cached, ${nodes.length} to embed`);
+            }
+        }
+        const totalNodes = nodes.length;
+        if (isDev) {
+            console.log(`📊 Found ${totalNodes} embeddable nodes`);
+        }
+        if (totalNodes === 0) {
+            onProgress({
+                phase: 'ready',
+                percent: 100,
+                nodesProcessed: 0,
+                totalNodes: 0,
+            });
+            return;
+        }
+        // Phase 3: Batch embed nodes
+        const batchSize = finalConfig.batchSize;
+        const totalBatches = Math.ceil(totalNodes / batchSize);
+        let processedNodes = 0;
+        onProgress({
+            phase: 'embedding',
+            percent: 20,
+            nodesProcessed: 0,
+            totalNodes,
+            currentBatch: 0,
+            totalBatches,
+        });
+        for (let batchIndex = 0; batchIndex < totalBatches; batchIndex++) {
+            const start = batchIndex * batchSize;
+            const end = Math.min(start + batchSize, totalNodes);
+            const batch = nodes.slice(start, end);
+            // Generate texts for this batch
+            const texts = generateBatchEmbeddingTexts(batch, finalConfig);
+            // Embed the batch
+            const embeddings = await embedBatch(texts);
+            // Update LadybugDB with embeddings
+            const updates = batch.map((node, i) => ({
+                id: node.id,
+                embedding: embeddingToArray(embeddings[i]),
+            }));
+            await batchInsertEmbeddings(executeWithReusedStatement, updates);
+            processedNodes += batch.length;
+            // Report progress (20-90% for embedding phase)
+            const embeddingProgress = 20 + ((processedNodes / totalNodes) * 70);
+            onProgress({
+                phase: 'embedding',
+                percent: Math.round(embeddingProgress),
+                nodesProcessed: processedNodes,
+                totalNodes,
+                currentBatch: batchIndex + 1,
+                totalBatches,
+            });
+        }
+        // Phase 4: Create vector index
+        onProgress({
+            phase: 'indexing',
+            percent: 90,
+            nodesProcessed: totalNodes,
+            totalNodes,
+        });
+        if (isDev) {
+            console.log('📇 Creating vector index...');
+        }
+        await createVectorIndex(executeQuery);
+        // Complete
+        onProgress({
+            phase: 'ready',
+            percent: 100,
+            nodesProcessed: totalNodes,
+            totalNodes,
+        });
+        if (isDev) {
+            console.log('✅ Embedding pipeline complete!');
+        }
+    }
+    catch (error) {
+        const errorMessage = error instanceof Error ? error.message : 'Unknown error';
+        if (isDev) {
+            console.error('❌ Embedding pipeline error:', error);
+        }
+        onProgress({
+            phase: 'error',
+            percent: 0,
+            error: errorMessage,
+        });
+        throw error;
+    }
+};
+/**
+ * Perform semantic search using the vector index
+ *
+ * Uses CodeEmbedding table and queries each node table to get metadata
+ *
+ * @param executeQuery - Function to execute Cypher queries
+ * @param query - Search query text
+ * @param k - Number of results to return (default: 10)
+ * @param maxDistance - Maximum distance threshold (default: 0.5)
+ * @returns Array of search results ordered by relevance
+ */
+export const semanticSearch = async (executeQuery, query, k = 10, maxDistance = 0.5) => {
+    if (!isEmbedderReady()) {
+        throw new Error('Embedding model not initialized. Run embedding pipeline first.');
+    }
+    // Embed the query
+    const queryEmbedding = await embedText(query);
+    const queryVec = embeddingToArray(queryEmbedding);
+    const queryVecStr = `[${queryVec.join(',')}]`;
+    // Query the vector index on CodeEmbedding to get nodeIds and distances
+    const vectorQuery = `
+    CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx',
+      CAST(${queryVecStr} AS FLOAT[384]), ${k})
+    YIELD node AS emb, distance
+    WITH emb, distance
+    WHERE distance < ${maxDistance}
+    RETURN emb.nodeId AS nodeId, distance
+    ORDER BY distance
+  `;
+    const embResults = await executeQuery(vectorQuery);
+    if (embResults.length === 0) {
+        return [];
+    }
+    // Group results by label for batched metadata queries
+    const byLabel = new Map();
+    for (const embRow of embResults) {
+        const nodeId = embRow.nodeId ?? embRow[0];
+        const distance = embRow.distance ?? embRow[1];
+        const labelEndIdx = nodeId.indexOf(':');
+        const label = labelEndIdx > 0 ? nodeId.substring(0, labelEndIdx) : 'Unknown';
+        if (!byLabel.has(label))
+            byLabel.set(label, []);
+        byLabel.get(label).push({ nodeId, distance });
+    }
+    // Batch-fetch metadata per label
+    const results = [];
+    for (const [label, items] of byLabel) {
+        const idList = items.map(i => `'${i.nodeId.replace(/'/g, "''")}'`).join(', ');
+        try {
+            let nodeQuery;
+            if (label === 'File') {
+                nodeQuery = `
+          MATCH (n:File) WHERE n.id IN [${idList}]
+          RETURN n.id AS id, n.name AS name, n.filePath AS filePath
+        `;
+            }
+            else {
+                nodeQuery = `
+          MATCH (n:${label}) WHERE n.id IN [${idList}]
+          RETURN n.id AS id, n.name AS name, n.filePath AS filePath,
+                 n.startLine AS startLine, n.endLine AS endLine
+        `;
+            }
+            const nodeRows = await executeQuery(nodeQuery);
+            const rowMap = new Map();
+            for (const row of nodeRows) {
+                const id = row.id ?? row[0];
+                rowMap.set(id, row);
+            }
+            for (const item of items) {
+                const nodeRow = rowMap.get(item.nodeId);
+                if (nodeRow) {
+                    results.push({
+                        nodeId: item.nodeId,
+                        name: nodeRow.name ?? nodeRow[1] ?? '',
+                        label,
+                        filePath: nodeRow.filePath ?? nodeRow[2] ?? '',
+                        distance: item.distance,
+                        startLine: label !== 'File' ? (nodeRow.startLine ?? nodeRow[3]) : undefined,
+                        endLine: label !== 'File' ? (nodeRow.endLine ?? nodeRow[4]) : undefined,
+                    });
+                }
+            }
+        }
+        catch {
+            // Table might not exist, skip
+        }
+    }
+    // Re-sort by distance since batch queries may have mixed order
+    results.sort((a, b) => a.distance - b.distance);
+    return results;
+};
+/**
+ * Semantic search with graph expansion (flattened results)
+ *
+ * Note: With multi-table schema, graph traversal is simplified.
+ * Returns semantic matches with their metadata.
+ * For full graph traversal, use execute_vector_cypher tool directly.
+ *
+ * @param executeQuery - Function to execute Cypher queries
+ * @param query - Search query text
+ * @param k - Number of initial semantic matches (default: 5)
+ * @param _hops - Unused (kept for API compatibility).
+ * @returns Semantic matches with metadata
+ */
+export const semanticSearchWithContext = async (executeQuery, query, k = 5, _hops = 1) => {
+    // For multi-table schema, just return semantic search results
+    // Graph traversal is complex with separate tables - use execute_vector_cypher instead
+    const results = await semanticSearch(executeQuery, query, k, 0.5);
+    return results.map(r => ({
+        matchId: r.nodeId,
+        matchName: r.name,
+        matchLabel: r.label,
+        matchPath: r.filePath,
+        distance: r.distance,
+        connectedId: null,
+        connectedName: null,
+        connectedLabel: null,
+        relationType: null,
+    }));
+};

package/dist/core/embeddings/index.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * Embeddings Module
+ *
+ * Re-exports for the embedding pipeline system.
+ */
+export * from './types.js';
+export * from './embedder.js';
+export * from './text-generator.js';
+export * from './embedding-pipeline.js';

package/dist/core/embeddings/index.js ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * Embeddings Module
+ *
+ * Re-exports for the embedding pipeline system.
+ */
+export * from './types.js';
+export * from './embedder.js';
+export * from './text-generator.js';
+export * from './embedding-pipeline.js';