@zuvia-software-solutions/code-mapper 1.4.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/ai-context.js +1 -1
- package/dist/cli/analyze.d.ts +1 -0
- package/dist/cli/analyze.js +73 -82
- package/dist/cli/augment.js +0 -2
- package/dist/cli/eval-server.d.ts +2 -2
- package/dist/cli/eval-server.js +6 -6
- package/dist/cli/index.js +6 -10
- package/dist/cli/mcp.d.ts +1 -3
- package/dist/cli/mcp.js +3 -3
- package/dist/cli/refresh.d.ts +2 -2
- package/dist/cli/refresh.js +24 -29
- package/dist/cli/status.js +4 -13
- package/dist/cli/tool.d.ts +5 -4
- package/dist/cli/tool.js +8 -10
- package/dist/config/ignore-service.js +14 -34
- package/dist/core/augmentation/engine.js +53 -83
- package/dist/core/db/adapter.d.ts +99 -0
- package/dist/core/db/adapter.js +402 -0
- package/dist/core/db/graph-loader.d.ts +27 -0
- package/dist/core/db/graph-loader.js +148 -0
- package/dist/core/db/queries.d.ts +160 -0
- package/dist/core/db/queries.js +441 -0
- package/dist/core/db/schema.d.ts +108 -0
- package/dist/core/db/schema.js +136 -0
- package/dist/core/embeddings/embedder.d.ts +21 -12
- package/dist/core/embeddings/embedder.js +104 -50
- package/dist/core/embeddings/embedding-pipeline.d.ts +48 -22
- package/dist/core/embeddings/embedding-pipeline.js +220 -262
- package/dist/core/embeddings/text-generator.js +4 -19
- package/dist/core/embeddings/types.d.ts +1 -1
- package/dist/core/graph/graph.d.ts +1 -1
- package/dist/core/graph/graph.js +1 -0
- package/dist/core/graph/types.d.ts +11 -9
- package/dist/core/graph/types.js +4 -1
- package/dist/core/incremental/refresh.d.ts +46 -0
- package/dist/core/incremental/refresh.js +503 -0
- package/dist/core/incremental/types.d.ts +2 -1
- package/dist/core/incremental/types.js +42 -44
- package/dist/core/ingestion/ast-cache.js +1 -0
- package/dist/core/ingestion/call-processor.d.ts +15 -3
- package/dist/core/ingestion/call-processor.js +448 -60
- package/dist/core/ingestion/cluster-enricher.d.ts +1 -1
- package/dist/core/ingestion/cluster-enricher.js +2 -0
- package/dist/core/ingestion/community-processor.d.ts +1 -1
- package/dist/core/ingestion/community-processor.js +8 -3
- package/dist/core/ingestion/export-detection.d.ts +1 -1
- package/dist/core/ingestion/export-detection.js +1 -1
- package/dist/core/ingestion/filesystem-walker.js +1 -1
- package/dist/core/ingestion/heritage-processor.d.ts +2 -2
- package/dist/core/ingestion/heritage-processor.js +22 -11
- package/dist/core/ingestion/import-processor.d.ts +2 -2
- package/dist/core/ingestion/import-processor.js +24 -9
- package/dist/core/ingestion/language-config.js +7 -4
- package/dist/core/ingestion/mro-processor.d.ts +1 -1
- package/dist/core/ingestion/mro-processor.js +23 -11
- package/dist/core/ingestion/named-binding-extraction.js +5 -5
- package/dist/core/ingestion/parsing-processor.d.ts +4 -4
- package/dist/core/ingestion/parsing-processor.js +26 -18
- package/dist/core/ingestion/pipeline.d.ts +4 -2
- package/dist/core/ingestion/pipeline.js +50 -20
- package/dist/core/ingestion/process-processor.d.ts +2 -2
- package/dist/core/ingestion/process-processor.js +28 -14
- package/dist/core/ingestion/resolution-context.d.ts +1 -1
- package/dist/core/ingestion/resolution-context.js +14 -4
- package/dist/core/ingestion/resolvers/csharp.js +4 -3
- package/dist/core/ingestion/resolvers/go.js +3 -1
- package/dist/core/ingestion/resolvers/jvm.js +13 -4
- package/dist/core/ingestion/resolvers/standard.js +2 -2
- package/dist/core/ingestion/resolvers/utils.js +6 -2
- package/dist/core/ingestion/route-stitcher.d.ts +15 -0
- package/dist/core/ingestion/route-stitcher.js +92 -0
- package/dist/core/ingestion/structure-processor.d.ts +1 -1
- package/dist/core/ingestion/structure-processor.js +3 -2
- package/dist/core/ingestion/symbol-table.d.ts +2 -0
- package/dist/core/ingestion/symbol-table.js +5 -1
- package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
- package/dist/core/ingestion/tree-sitter-queries.js +177 -0
- package/dist/core/ingestion/type-env.js +20 -0
- package/dist/core/ingestion/type-extractors/csharp.js +4 -3
- package/dist/core/ingestion/type-extractors/go.js +23 -12
- package/dist/core/ingestion/type-extractors/php.js +18 -10
- package/dist/core/ingestion/type-extractors/ruby.js +15 -3
- package/dist/core/ingestion/type-extractors/rust.js +3 -2
- package/dist/core/ingestion/type-extractors/shared.js +3 -2
- package/dist/core/ingestion/type-extractors/typescript.js +11 -5
- package/dist/core/ingestion/utils.d.ts +27 -4
- package/dist/core/ingestion/utils.js +145 -100
- package/dist/core/ingestion/workers/parse-worker.d.ts +1 -0
- package/dist/core/ingestion/workers/parse-worker.js +97 -29
- package/dist/core/ingestion/workers/worker-pool.js +3 -0
- package/dist/core/search/bm25-index.d.ts +15 -8
- package/dist/core/search/bm25-index.js +48 -98
- package/dist/core/search/hybrid-search.d.ts +9 -3
- package/dist/core/search/hybrid-search.js +30 -25
- package/dist/core/search/reranker.js +9 -7
- package/dist/core/search/types.d.ts +0 -4
- package/dist/core/semantic/tsgo-service.d.ts +7 -1
- package/dist/core/semantic/tsgo-service.js +165 -66
- package/dist/lib/tsgo-test.d.ts +2 -0
- package/dist/lib/tsgo-test.js +6 -0
- package/dist/lib/type-utils.d.ts +25 -0
- package/dist/lib/type-utils.js +22 -0
- package/dist/lib/utils.d.ts +3 -2
- package/dist/lib/utils.js +3 -2
- package/dist/mcp/compatible-stdio-transport.js +1 -1
- package/dist/mcp/local/local-backend.d.ts +29 -56
- package/dist/mcp/local/local-backend.js +808 -1118
- package/dist/mcp/resources.js +35 -25
- package/dist/mcp/server.d.ts +1 -1
- package/dist/mcp/server.js +5 -5
- package/dist/mcp/tools.js +24 -25
- package/dist/storage/repo-manager.d.ts +2 -12
- package/dist/storage/repo-manager.js +1 -47
- package/dist/types/pipeline.d.ts +8 -5
- package/dist/types/pipeline.js +5 -0
- package/package.json +18 -11
- package/dist/cli/serve.d.ts +0 -5
- package/dist/cli/serve.js +0 -8
- package/dist/core/incremental/child-process.d.ts +0 -8
- package/dist/core/incremental/child-process.js +0 -649
- package/dist/core/incremental/refresh-coordinator.d.ts +0 -32
- package/dist/core/incremental/refresh-coordinator.js +0 -147
- package/dist/core/lbug/csv-generator.d.ts +0 -28
- package/dist/core/lbug/csv-generator.js +0 -355
- package/dist/core/lbug/lbug-adapter.d.ts +0 -96
- package/dist/core/lbug/lbug-adapter.js +0 -753
- package/dist/core/lbug/schema.d.ts +0 -46
- package/dist/core/lbug/schema.js +0 -402
- package/dist/mcp/core/embedder.d.ts +0 -24
- package/dist/mcp/core/embedder.js +0 -168
- package/dist/mcp/core/lbug-adapter.d.ts +0 -29
- package/dist/mcp/core/lbug-adapter.js +0 -330
- package/dist/server/api.d.ts +0 -5
- package/dist/server/api.js +0 -340
- package/dist/server/mcp-http.d.ts +0 -7
- package/dist/server/mcp-http.js +0 -95
- package/models/mlx-embedder.py +0 -185
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
/** @file graph.ts @description Factory for creating in-memory knowledge graphs backed by Maps for O(1) lookups */
|
|
2
|
-
import { KnowledgeGraph } from './types.js';
|
|
2
|
+
import { type KnowledgeGraph } from './types.js';
|
|
3
3
|
/** Create a new empty KnowledgeGraph instance */
|
|
4
4
|
export declare const createKnowledgeGraph: () => KnowledgeGraph;
|
package/dist/core/graph/graph.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
// code-mapper/src/core/graph/graph.ts
|
|
2
2
|
/** @file graph.ts @description Factory for creating in-memory knowledge graphs backed by Maps for O(1) lookups */
|
|
3
|
+
import {} from './types.js';
|
|
3
4
|
/** Create a new empty KnowledgeGraph instance */
|
|
4
5
|
export const createKnowledgeGraph = () => {
|
|
5
6
|
const nodeMap = new Map();
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/** @file types.ts @description Core type definitions for the knowledge graph: nodes, relationships, and the graph interface */
|
|
2
|
-
|
|
2
|
+
import { type NodeLabel, type EdgeType, type NodeId, type EdgeId } from '../db/schema.js';
|
|
3
3
|
import { SupportedLanguages } from '../../config/supported-languages.js';
|
|
4
|
+
export { type NodeLabel, type EdgeType, type NodeId, type EdgeId } from '../db/schema.js';
|
|
4
5
|
export type NodeProperties = {
|
|
5
6
|
name: string;
|
|
6
7
|
filePath: string;
|
|
@@ -26,17 +27,18 @@ export type NodeProperties = {
|
|
|
26
27
|
parameterCount?: number;
|
|
27
28
|
returnType?: string;
|
|
28
29
|
};
|
|
29
|
-
|
|
30
|
+
/** @deprecated Use EdgeType directly — this alias exists only for migration convenience */
|
|
31
|
+
export type RelationshipType = EdgeType;
|
|
30
32
|
export interface GraphNode {
|
|
31
|
-
id:
|
|
33
|
+
id: NodeId;
|
|
32
34
|
label: NodeLabel;
|
|
33
35
|
properties: NodeProperties;
|
|
34
36
|
}
|
|
35
37
|
export interface GraphRelationship {
|
|
36
|
-
id:
|
|
37
|
-
sourceId:
|
|
38
|
-
targetId:
|
|
39
|
-
type:
|
|
38
|
+
id: EdgeId;
|
|
39
|
+
sourceId: NodeId;
|
|
40
|
+
targetId: NodeId;
|
|
41
|
+
type: EdgeType;
|
|
40
42
|
/** Confidence score 0-1 (1.0 = certain, lower = uncertain) */
|
|
41
43
|
confidence: number;
|
|
42
44
|
/** Resolution reason: 'import-resolved', 'same-file', 'fuzzy-global', or empty for non-CALLS */
|
|
@@ -59,11 +61,11 @@ export interface KnowledgeGraph {
|
|
|
59
61
|
forEachNode: (fn: (node: GraphNode) => void) => void;
|
|
60
62
|
forEachRelationship: (fn: (rel: GraphRelationship) => void) => void;
|
|
61
63
|
/** Lookup a single node by id — O(1) */
|
|
62
|
-
getNode: (id:
|
|
64
|
+
getNode: (id: NodeId) => GraphNode | undefined;
|
|
63
65
|
nodeCount: number;
|
|
64
66
|
relationshipCount: number;
|
|
65
67
|
addNode: (node: GraphNode) => void;
|
|
66
68
|
addRelationship: (relationship: GraphRelationship) => void;
|
|
67
|
-
removeNode: (nodeId:
|
|
69
|
+
removeNode: (nodeId: NodeId) => boolean;
|
|
68
70
|
removeNodesByFile: (filePath: string) => number;
|
|
69
71
|
}
|
package/dist/core/graph/types.js
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
1
|
// code-mapper/src/core/graph/types.ts
|
|
2
2
|
/** @file types.ts @description Core type definitions for the knowledge graph: nodes, relationships, and the graph interface */
|
|
3
|
-
|
|
3
|
+
import {} from '../db/schema.js';
|
|
4
|
+
import { SupportedLanguages } from '../../config/supported-languages.js';
|
|
5
|
+
// Re-export canonical types so existing consumers can keep importing from here
|
|
6
|
+
export {} from '../db/schema.js';
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Standalone incremental refresh — parses dirty files with tree-sitter
|
|
3
|
+
* and writes directly to the SQLite database via the adapter.
|
|
4
|
+
*
|
|
5
|
+
* Extracted from LocalBackend.inProcessRefresh so both the MCP server
|
|
6
|
+
* and the CLI can call it without duplicating logic.
|
|
7
|
+
*
|
|
8
|
+
* Phases:
|
|
9
|
+
* 1. Delete old nodes for dirty files
|
|
10
|
+
* 2. Parse modified/created files with tree-sitter
|
|
11
|
+
* 3. Insert File nodes + symbol nodes + DEFINES edges
|
|
12
|
+
* 4. Resolve call edges using tsgo LSP (TS/JS files only)
|
|
13
|
+
* 5. Repair cross-file edges (callers from unchanged files → changed symbols)
|
|
14
|
+
*/
|
|
15
|
+
import type Database from 'better-sqlite3';
|
|
16
|
+
import { type DirtyFileEntry, type RefreshResult } from './types.js';
|
|
17
|
+
/**
|
|
18
|
+
* Parse dirty files with tree-sitter and write nodes/edges to the database.
|
|
19
|
+
*
|
|
20
|
+
* Phase 1: Delete old nodes for all dirty files
|
|
21
|
+
* Phase 2: Parse modified/created files with tree-sitter
|
|
22
|
+
* Phase 3: Insert File nodes + symbol nodes + DEFINES edges
|
|
23
|
+
* Phase 4: Resolve call edges using tsgo LSP (TS/JS only, optional)
|
|
24
|
+
* Phase 5: Repair cross-file edges from unchanged files (tsgo, optional)
|
|
25
|
+
*
|
|
26
|
+
* @param db - Open better-sqlite3 database instance
|
|
27
|
+
* @param repoPath - Absolute path to the repository root
|
|
28
|
+
* @param dirtyFiles - Files that changed (modified, created, or deleted)
|
|
29
|
+
* @returns Statistics about what was processed
|
|
30
|
+
*/
|
|
31
|
+
export declare function refreshFiles(db: Database.Database, repoPath: string, dirtyFiles: readonly DirtyFileEntry[]): Promise<RefreshResult>;
|
|
32
|
+
/**
|
|
33
|
+
* Update embeddings for dirty files so semantic search stays in sync.
|
|
34
|
+
*
|
|
35
|
+
* Runs ONLY when the repo previously had embeddings (hasEmbeddings = true).
|
|
36
|
+
* Steps:
|
|
37
|
+
* 1. Delete stale embeddings for all dirty file paths
|
|
38
|
+
* 2. Query new embeddable nodes for modified/created files
|
|
39
|
+
* 3. Enrich with graph context (callers, callees, module) — same as full analyze
|
|
40
|
+
* 4. Generate text, batch embed, insert via SQLite adapter
|
|
41
|
+
*
|
|
42
|
+
* @param db - Open better-sqlite3 database instance
|
|
43
|
+
* @param dirtyFiles - Files that changed (modified, created, or deleted)
|
|
44
|
+
* @param hasEmbeddings - Whether the repo previously had embeddings (skip if false)
|
|
45
|
+
*/
|
|
46
|
+
export declare function refreshEmbeddings(db: Database.Database, dirtyFiles: readonly DirtyFileEntry[], hasEmbeddings?: boolean): Promise<void>;
|
|
@@ -0,0 +1,503 @@
|
|
|
1
|
+
// code-mapper/src/core/incremental/refresh.ts
|
|
2
|
+
/**
|
|
3
|
+
* @file Standalone incremental refresh — parses dirty files with tree-sitter
|
|
4
|
+
* and writes directly to the SQLite database via the adapter.
|
|
5
|
+
*
|
|
6
|
+
* Extracted from LocalBackend.inProcessRefresh so both the MCP server
|
|
7
|
+
* and the CLI can call it without duplicating logic.
|
|
8
|
+
*
|
|
9
|
+
* Phases:
|
|
10
|
+
* 1. Delete old nodes for dirty files
|
|
11
|
+
* 2. Parse modified/created files with tree-sitter
|
|
12
|
+
* 3. Insert File nodes + symbol nodes + DEFINES edges
|
|
13
|
+
* 4. Resolve call edges using tsgo LSP (TS/JS files only)
|
|
14
|
+
* 5. Repair cross-file edges (callers from unchanged files → changed symbols)
|
|
15
|
+
*/
|
|
16
|
+
import path from 'path';
|
|
17
|
+
import fsSync from 'fs';
|
|
18
|
+
import Parser from 'tree-sitter';
|
|
19
|
+
import { LANGUAGE_QUERIES } from '../ingestion/tree-sitter-queries.js';
|
|
20
|
+
import { getLanguageFromFilename, getDefinitionNodeFromCaptures } from '../ingestion/utils.js';
|
|
21
|
+
import { loadParser, loadLanguage, isLanguageAvailable } from '../tree-sitter/parser-loader.js';
|
|
22
|
+
import { getTreeSitterBufferSize, TREE_SITTER_MAX_BUFFER } from '../ingestion/constants.js';
|
|
23
|
+
import { generateId } from '../../lib/utils.js';
|
|
24
|
+
import { deleteNodesByFile, insertNode, insertEdge, findNodeAtLine, findNodesByFile, deleteEmbeddingsByFile, insertEmbeddingsBatch, countEmbeddings } from '../db/adapter.js';
|
|
25
|
+
import { assertNodeLabel, toNodeId, toEdgeId } from '../db/schema.js';
|
|
26
|
+
import {} from './types.js';
|
|
27
|
+
import { getTsgoService } from '../semantic/tsgo-service.js';
|
|
28
|
+
import { EMBEDDABLE_LABELS } from '../embeddings/types.js';
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Helpers
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
/** File extensions that tsgo can resolve */
|
|
33
|
+
const TS_JS_EXTENSIONS = new Set(['.ts', '.tsx', '.js', '.jsx', '.mts', '.cts', '.mjs', '.cjs']);
|
|
34
|
+
function isTypeScriptOrJavaScript(filePath) {
|
|
35
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
36
|
+
return TS_JS_EXTENSIONS.has(ext);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Find the innermost symbol node enclosing a given 0-based line in a file.
|
|
40
|
+
* Excludes structural nodes (File, Folder, Community, Process).
|
|
41
|
+
*/
|
|
42
|
+
function findEnclosingNode(db, filePath, line) {
|
|
43
|
+
return findNodeAtLine(db, filePath, line, 'File');
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Find a node whose definition starts at a given 0-based line in a file.
|
|
47
|
+
* Falls back to the enclosing node if no exact match.
|
|
48
|
+
*/
|
|
49
|
+
function findNodeByFileAndLine(db, filePath, line) {
|
|
50
|
+
// Try exact startLine match first (exclude structural labels)
|
|
51
|
+
const exact = db.prepare(`SELECT * FROM nodes
|
|
52
|
+
WHERE filePath = ? AND startLine = ?
|
|
53
|
+
AND label NOT IN ('File', 'Folder', 'Community', 'Process')
|
|
54
|
+
LIMIT 1`
|
|
55
|
+
// Safe: schema and NodeRow defined together in schema.ts
|
|
56
|
+
).get(filePath, line);
|
|
57
|
+
if (exact)
|
|
58
|
+
return exact;
|
|
59
|
+
// Fall back to enclosing node
|
|
60
|
+
return findEnclosingNode(db, filePath, line);
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Parse dirty files with tree-sitter and write nodes/edges to the database.
|
|
64
|
+
*
|
|
65
|
+
* Phase 1: Delete old nodes for all dirty files
|
|
66
|
+
* Phase 2: Parse modified/created files with tree-sitter
|
|
67
|
+
* Phase 3: Insert File nodes + symbol nodes + DEFINES edges
|
|
68
|
+
* Phase 4: Resolve call edges using tsgo LSP (TS/JS only, optional)
|
|
69
|
+
* Phase 5: Repair cross-file edges from unchanged files (tsgo, optional)
|
|
70
|
+
*
|
|
71
|
+
* @param db - Open better-sqlite3 database instance
|
|
72
|
+
* @param repoPath - Absolute path to the repository root
|
|
73
|
+
* @param dirtyFiles - Files that changed (modified, created, or deleted)
|
|
74
|
+
* @returns Statistics about what was processed
|
|
75
|
+
*/
|
|
76
|
+
export async function refreshFiles(db, repoPath, dirtyFiles) {
|
|
77
|
+
const t0 = Date.now();
|
|
78
|
+
let nodesDeleted = 0;
|
|
79
|
+
let nodesInserted = 0;
|
|
80
|
+
let edgesInserted = 0;
|
|
81
|
+
let filesSkipped = 0;
|
|
82
|
+
// Phase 1: Delete old nodes for all dirty files
|
|
83
|
+
for (const entry of dirtyFiles) {
|
|
84
|
+
const deleted = deleteNodesByFile(db, entry.relativePath);
|
|
85
|
+
nodesDeleted += deleted;
|
|
86
|
+
}
|
|
87
|
+
// Phase 2: Parse modified/created files with tree-sitter
|
|
88
|
+
const parser = await loadParser();
|
|
89
|
+
const filesToProcess = dirtyFiles.filter(f => f.changeKind === 'modified' || f.changeKind === 'created');
|
|
90
|
+
const allDefinitions = [];
|
|
91
|
+
const callSites = [];
|
|
92
|
+
const insertedFilePaths = new Set();
|
|
93
|
+
for (const entry of filesToProcess) {
|
|
94
|
+
const relPath = entry.relativePath;
|
|
95
|
+
const absPath = path.resolve(repoPath, relPath);
|
|
96
|
+
const language = getLanguageFromFilename(relPath);
|
|
97
|
+
if (!language || !isLanguageAvailable(language)) {
|
|
98
|
+
filesSkipped++;
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
let content;
|
|
102
|
+
try {
|
|
103
|
+
content = fsSync.readFileSync(absPath, 'utf-8');
|
|
104
|
+
}
|
|
105
|
+
catch {
|
|
106
|
+
filesSkipped++;
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
if (content.length > TREE_SITTER_MAX_BUFFER) {
|
|
110
|
+
filesSkipped++;
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
try {
|
|
114
|
+
await loadLanguage(language, relPath);
|
|
115
|
+
}
|
|
116
|
+
catch {
|
|
117
|
+
filesSkipped++;
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
let tree;
|
|
121
|
+
try {
|
|
122
|
+
tree = parser.parse(content, undefined, { bufferSize: getTreeSitterBufferSize(content.length) });
|
|
123
|
+
}
|
|
124
|
+
catch {
|
|
125
|
+
filesSkipped++;
|
|
126
|
+
continue;
|
|
127
|
+
}
|
|
128
|
+
const queryString = LANGUAGE_QUERIES[language];
|
|
129
|
+
if (!queryString) {
|
|
130
|
+
filesSkipped++;
|
|
131
|
+
continue;
|
|
132
|
+
}
|
|
133
|
+
let matches;
|
|
134
|
+
try {
|
|
135
|
+
const tsLang = parser.getLanguage();
|
|
136
|
+
const query = new Parser.Query(tsLang, queryString);
|
|
137
|
+
matches = query.matches(tree.rootNode);
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
filesSkipped++;
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
insertedFilePaths.add(relPath);
|
|
144
|
+
for (const match of matches) {
|
|
145
|
+
const captureMap = {};
|
|
146
|
+
for (const c of match.captures)
|
|
147
|
+
captureMap[c.name] = c.node;
|
|
148
|
+
if (captureMap['import'] || captureMap['import.source'])
|
|
149
|
+
continue;
|
|
150
|
+
if (captureMap['heritage'] || captureMap['heritage.impl'])
|
|
151
|
+
continue;
|
|
152
|
+
if (captureMap['call'] || captureMap['call.name']) {
|
|
153
|
+
const callNameNode = captureMap['call.name'];
|
|
154
|
+
if (callNameNode) {
|
|
155
|
+
callSites.push({
|
|
156
|
+
filePath: relPath, absPath,
|
|
157
|
+
name: callNameNode.text,
|
|
158
|
+
line: callNameNode.startPosition.row,
|
|
159
|
+
character: callNameNode.startPosition.column,
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
const nameNode = captureMap['name'];
|
|
165
|
+
if (!nameNode && !captureMap['definition.constructor'])
|
|
166
|
+
continue;
|
|
167
|
+
const nodeName = nameNode ? nameNode.text : 'init';
|
|
168
|
+
let nodeLabel = 'CodeElement';
|
|
169
|
+
if (captureMap['definition.function'])
|
|
170
|
+
nodeLabel = 'Function';
|
|
171
|
+
else if (captureMap['definition.class'])
|
|
172
|
+
nodeLabel = 'Class';
|
|
173
|
+
else if (captureMap['definition.interface'])
|
|
174
|
+
nodeLabel = 'Interface';
|
|
175
|
+
else if (captureMap['definition.method'])
|
|
176
|
+
nodeLabel = 'Method';
|
|
177
|
+
else if (captureMap['definition.struct'])
|
|
178
|
+
nodeLabel = 'Struct';
|
|
179
|
+
else if (captureMap['definition.enum'])
|
|
180
|
+
nodeLabel = 'Enum';
|
|
181
|
+
else if (captureMap['definition.namespace'])
|
|
182
|
+
nodeLabel = 'Namespace';
|
|
183
|
+
else if (captureMap['definition.module'])
|
|
184
|
+
nodeLabel = 'Module';
|
|
185
|
+
else if (captureMap['definition.trait'])
|
|
186
|
+
nodeLabel = 'Trait';
|
|
187
|
+
else if (captureMap['definition.impl'])
|
|
188
|
+
nodeLabel = 'Impl';
|
|
189
|
+
else if (captureMap['definition.type'])
|
|
190
|
+
nodeLabel = 'TypeAlias';
|
|
191
|
+
else if (captureMap['definition.const'])
|
|
192
|
+
nodeLabel = 'Const';
|
|
193
|
+
else if (captureMap['definition.static'])
|
|
194
|
+
nodeLabel = 'Static';
|
|
195
|
+
else if (captureMap['definition.typedef'])
|
|
196
|
+
nodeLabel = 'Typedef';
|
|
197
|
+
else if (captureMap['definition.macro'])
|
|
198
|
+
nodeLabel = 'Macro';
|
|
199
|
+
else if (captureMap['definition.union'])
|
|
200
|
+
nodeLabel = 'Union';
|
|
201
|
+
else if (captureMap['definition.property'])
|
|
202
|
+
nodeLabel = 'Property';
|
|
203
|
+
else if (captureMap['definition.record'])
|
|
204
|
+
nodeLabel = 'Record';
|
|
205
|
+
else if (captureMap['definition.delegate'])
|
|
206
|
+
nodeLabel = 'Delegate';
|
|
207
|
+
else if (captureMap['definition.annotation'])
|
|
208
|
+
nodeLabel = 'Annotation';
|
|
209
|
+
else if (captureMap['definition.constructor'])
|
|
210
|
+
nodeLabel = 'Constructor';
|
|
211
|
+
else if (captureMap['definition.template'])
|
|
212
|
+
nodeLabel = 'Template';
|
|
213
|
+
const defNode = getDefinitionNodeFromCaptures(captureMap);
|
|
214
|
+
const startLine = defNode ? defNode.startPosition.row : (nameNode ? nameNode.startPosition.row : 0);
|
|
215
|
+
const endLine = defNode ? defNode.endPosition.row : startLine;
|
|
216
|
+
const nodeContent = defNode ? (defNode.text || '').slice(0, 50_000) : '';
|
|
217
|
+
allDefinitions.push({
|
|
218
|
+
nodeId: generateId(nodeLabel, `${relPath}:${nodeName}`),
|
|
219
|
+
name: nodeName, label: nodeLabel, filePath: relPath,
|
|
220
|
+
startLine, endLine, content: nodeContent,
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
// Phase 3: Insert File nodes + symbol nodes + DEFINES edges
|
|
225
|
+
for (const filePath of insertedFilePaths) {
|
|
226
|
+
const fileId = generateId('File', filePath);
|
|
227
|
+
insertNode(db, {
|
|
228
|
+
id: toNodeId(fileId), label: 'File',
|
|
229
|
+
name: path.basename(filePath), filePath, content: '',
|
|
230
|
+
});
|
|
231
|
+
nodesInserted++;
|
|
232
|
+
}
|
|
233
|
+
for (const def of allDefinitions) {
|
|
234
|
+
assertNodeLabel(def.label);
|
|
235
|
+
insertNode(db, {
|
|
236
|
+
id: toNodeId(def.nodeId), label: def.label,
|
|
237
|
+
name: def.name, filePath: def.filePath,
|
|
238
|
+
startLine: def.startLine, endLine: def.endLine,
|
|
239
|
+
content: def.content, description: '',
|
|
240
|
+
});
|
|
241
|
+
nodesInserted++;
|
|
242
|
+
const fileId = generateId('File', def.filePath);
|
|
243
|
+
insertEdge(db, {
|
|
244
|
+
id: toEdgeId(`${fileId}_DEFINES_${def.nodeId}`),
|
|
245
|
+
sourceId: toNodeId(fileId), targetId: toNodeId(def.nodeId),
|
|
246
|
+
type: 'DEFINES', confidence: 1.0, reason: '',
|
|
247
|
+
});
|
|
248
|
+
edgesInserted++;
|
|
249
|
+
}
|
|
250
|
+
// Phase 4 + 5: Resolve call edges and cross-file edges using tsgo LSP
|
|
251
|
+
// (TS/JS files only — tsgo is optional, skip if unavailable)
|
|
252
|
+
console.error(`Code Mapper: refresh tsgo init with repoPath=${repoPath}`);
|
|
253
|
+
const tsgoService = getTsgoService(repoPath);
|
|
254
|
+
let tsgoReady = false;
|
|
255
|
+
try {
|
|
256
|
+
tsgoReady = await tsgoService.start();
|
|
257
|
+
console.error(`Code Mapper: refresh tsgo ready=${tsgoReady}`);
|
|
258
|
+
}
|
|
259
|
+
catch (err) {
|
|
260
|
+
console.error(`Code Mapper: refresh tsgo failed: ${err instanceof Error ? err.message : err}`);
|
|
261
|
+
}
|
|
262
|
+
// Phase 4: Resolve call edges from dirty files
|
|
263
|
+
// Always runs — tsgo provides 0.99 confidence, heuristic fallback provides 0.5-0.95
|
|
264
|
+
if (callSites.length > 0) {
|
|
265
|
+
db.exec('BEGIN');
|
|
266
|
+
try {
|
|
267
|
+
const { findNodesByName } = await import('../db/adapter.js');
|
|
268
|
+
// Notify tsgo about changed files if available
|
|
269
|
+
if (tsgoReady) {
|
|
270
|
+
for (const entry of filesToProcess) {
|
|
271
|
+
const absPath = path.resolve(repoPath, entry.relativePath);
|
|
272
|
+
if (isTypeScriptOrJavaScript(entry.relativePath)) {
|
|
273
|
+
await tsgoService.notifyFileChanged(absPath);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
console.error(`Code Mapper: refresh Phase 4 — ${callSites.length} call sites to resolve`);
|
|
278
|
+
for (const callSite of callSites) {
|
|
279
|
+
const sourceNode = findEnclosingNode(db, callSite.filePath, callSite.line);
|
|
280
|
+
if (!sourceNode)
|
|
281
|
+
continue;
|
|
282
|
+
let targetNode;
|
|
283
|
+
let confidence = 0.5;
|
|
284
|
+
let reason = 'global';
|
|
285
|
+
// Try tsgo first for TS/JS files
|
|
286
|
+
if (tsgoReady && isTypeScriptOrJavaScript(callSite.filePath)) {
|
|
287
|
+
try {
|
|
288
|
+
const def = await tsgoService.resolveDefinition(callSite.absPath, callSite.line, callSite.character);
|
|
289
|
+
if (def) {
|
|
290
|
+
targetNode = findNodeByFileAndLine(db, def.filePath, def.line);
|
|
291
|
+
if (targetNode) {
|
|
292
|
+
confidence = 0.99;
|
|
293
|
+
reason = 'tsgo-lsp';
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
catch { }
|
|
298
|
+
}
|
|
299
|
+
// Heuristic fallback: name-based lookup in DB
|
|
300
|
+
if (!targetNode) {
|
|
301
|
+
const candidates = findNodesByName(db, callSite.name, undefined, 5);
|
|
302
|
+
const sameFile = candidates.find(c => c.filePath === callSite.filePath);
|
|
303
|
+
if (sameFile) {
|
|
304
|
+
targetNode = sameFile;
|
|
305
|
+
confidence = 0.95;
|
|
306
|
+
reason = 'same-file';
|
|
307
|
+
}
|
|
308
|
+
else if (candidates.length === 1) {
|
|
309
|
+
targetNode = candidates[0];
|
|
310
|
+
confidence = 0.9;
|
|
311
|
+
reason = 'import-resolved';
|
|
312
|
+
}
|
|
313
|
+
else if (candidates.length > 0) {
|
|
314
|
+
targetNode = candidates[0];
|
|
315
|
+
confidence = 0.5;
|
|
316
|
+
reason = 'global';
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
if (!targetNode)
|
|
320
|
+
continue;
|
|
321
|
+
if (sourceNode.id === targetNode.id)
|
|
322
|
+
continue;
|
|
323
|
+
const edgeId = toEdgeId(`${sourceNode.id}_CALLS_${targetNode.id}`);
|
|
324
|
+
insertEdge(db, {
|
|
325
|
+
id: edgeId,
|
|
326
|
+
sourceId: sourceNode.id,
|
|
327
|
+
targetId: targetNode.id,
|
|
328
|
+
type: 'CALLS',
|
|
329
|
+
confidence,
|
|
330
|
+
reason,
|
|
331
|
+
callLine: callSite.line,
|
|
332
|
+
});
|
|
333
|
+
edgesInserted++;
|
|
334
|
+
}
|
|
335
|
+
db.exec('COMMIT');
|
|
336
|
+
}
|
|
337
|
+
catch (err) {
|
|
338
|
+
db.exec('ROLLBACK');
|
|
339
|
+
console.error(`Code Mapper: Phase 4 call resolution failed: ${err instanceof Error ? err.message : err}`);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
// Phase 5: Repair cross-file edges (tsgo only — needs findReferences)
|
|
343
|
+
if (tsgoReady) {
|
|
344
|
+
const dirtyFilePaths = new Set(dirtyFiles.map(f => f.relativePath));
|
|
345
|
+
db.exec('BEGIN');
|
|
346
|
+
try {
|
|
347
|
+
// Phase 5: Repair cross-file edges
|
|
348
|
+
// For each newly inserted definition, find references from UNCHANGED files
|
|
349
|
+
// and create CALLS edges so the graph stays consistent.
|
|
350
|
+
for (const def of allDefinitions) {
|
|
351
|
+
if (!isTypeScriptOrJavaScript(def.filePath))
|
|
352
|
+
continue;
|
|
353
|
+
const absPath = path.resolve(repoPath, def.filePath);
|
|
354
|
+
const refs = await tsgoService.findReferences(absPath, def.startLine, 0);
|
|
355
|
+
for (const ref of refs) {
|
|
356
|
+
// Skip refs from dirty files — already handled in Phase 4
|
|
357
|
+
if (dirtyFilePaths.has(ref.filePath))
|
|
358
|
+
continue;
|
|
359
|
+
// Skip refs from non-TS/JS files
|
|
360
|
+
if (!isTypeScriptOrJavaScript(ref.filePath))
|
|
361
|
+
continue;
|
|
362
|
+
// This reference is from an UNCHANGED file → create/update the CALLS edge
|
|
363
|
+
const sourceNode = findEnclosingNode(db, ref.filePath, ref.line);
|
|
364
|
+
if (!sourceNode)
|
|
365
|
+
continue;
|
|
366
|
+
const targetId = toNodeId(def.nodeId);
|
|
367
|
+
// Skip self-edges
|
|
368
|
+
if (sourceNode.id === targetId)
|
|
369
|
+
continue;
|
|
370
|
+
const edgeId = toEdgeId(`${sourceNode.id}_CALLS_${targetId}`);
|
|
371
|
+
insertEdge(db, {
|
|
372
|
+
id: edgeId,
|
|
373
|
+
sourceId: sourceNode.id,
|
|
374
|
+
targetId,
|
|
375
|
+
type: 'CALLS',
|
|
376
|
+
confidence: 0.99,
|
|
377
|
+
reason: 'tsgo-cross-file',
|
|
378
|
+
callLine: ref.line,
|
|
379
|
+
});
|
|
380
|
+
edgesInserted++;
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
db.exec('COMMIT');
|
|
384
|
+
}
|
|
385
|
+
catch (err) {
|
|
386
|
+
db.exec('ROLLBACK');
|
|
387
|
+
// Log but don't fail the entire refresh — Phases 1-3 already committed
|
|
388
|
+
console.error(`Code Mapper: tsgo call resolution failed: ${err instanceof Error ? err.message : err}`);
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
// FTS5 auto-updates via triggers — no manual rebuild needed
|
|
392
|
+
return {
|
|
393
|
+
filesProcessed: filesToProcess.length, filesSkipped,
|
|
394
|
+
nodesDeleted, nodesInserted, edgesInserted,
|
|
395
|
+
durationMs: Date.now() - t0,
|
|
396
|
+
tsgoEnabled: tsgoReady,
|
|
397
|
+
};
|
|
398
|
+
}
|
|
399
|
+
// ---------------------------------------------------------------------------
|
|
400
|
+
// Incremental embedding refresh
|
|
401
|
+
// ---------------------------------------------------------------------------
|
|
402
|
+
/**
|
|
403
|
+
* Update embeddings for dirty files so semantic search stays in sync.
|
|
404
|
+
*
|
|
405
|
+
* Runs ONLY when the repo previously had embeddings (hasEmbeddings = true).
|
|
406
|
+
* Steps:
|
|
407
|
+
* 1. Delete stale embeddings for all dirty file paths
|
|
408
|
+
* 2. Query new embeddable nodes for modified/created files
|
|
409
|
+
* 3. Enrich with graph context (callers, callees, module) — same as full analyze
|
|
410
|
+
* 4. Generate text, batch embed, insert via SQLite adapter
|
|
411
|
+
*
|
|
412
|
+
* @param db - Open better-sqlite3 database instance
|
|
413
|
+
* @param dirtyFiles - Files that changed (modified, created, or deleted)
|
|
414
|
+
* @param hasEmbeddings - Whether the repo previously had embeddings (skip if false)
|
|
415
|
+
*/
|
|
416
|
+
export async function refreshEmbeddings(db, dirtyFiles, hasEmbeddings) {
|
|
417
|
+
// If hasEmbeddings is not explicitly provided, check the DB
|
|
418
|
+
const embeddingsExist = hasEmbeddings ?? countEmbeddings(db) > 0;
|
|
419
|
+
if (!embeddingsExist) {
|
|
420
|
+
console.error('Code Mapper: Skipping embeddings (none in index — run analyze --embeddings to enable)');
|
|
421
|
+
return;
|
|
422
|
+
}
|
|
423
|
+
if (dirtyFiles.length === 0)
|
|
424
|
+
return;
|
|
425
|
+
try {
|
|
426
|
+
// Step 1: Delete stale embeddings for all dirty file paths
|
|
427
|
+
for (const entry of dirtyFiles) {
|
|
428
|
+
deleteEmbeddingsByFile(db, entry.relativePath);
|
|
429
|
+
}
|
|
430
|
+
// Step 2: Query new embeddable nodes for modified/created files
|
|
431
|
+
const embeddableSet = new Set(EMBEDDABLE_LABELS);
|
|
432
|
+
const modifiedPaths = dirtyFiles
|
|
433
|
+
.filter(f => f.changeKind === 'modified' || f.changeKind === 'created')
|
|
434
|
+
.map(f => f.relativePath);
|
|
435
|
+
if (modifiedPaths.length === 0)
|
|
436
|
+
return;
|
|
437
|
+
const newNodes = [];
|
|
438
|
+
for (const relPath of modifiedPaths) {
|
|
439
|
+
const nodes = findNodesByFile(db, relPath);
|
|
440
|
+
for (const node of nodes) {
|
|
441
|
+
if (embeddableSet.has(node.label)) {
|
|
442
|
+
newNodes.push({
|
|
443
|
+
id: node.id,
|
|
444
|
+
name: node.name,
|
|
445
|
+
label: node.label,
|
|
446
|
+
filePath: node.filePath,
|
|
447
|
+
content: node.content,
|
|
448
|
+
...(node.startLine != null ? { startLine: node.startLine } : {}),
|
|
449
|
+
...(node.endLine != null ? { endLine: node.endLine } : {}),
|
|
450
|
+
});
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
if (newNodes.length === 0)
|
|
455
|
+
return;
|
|
456
|
+
// Step 3: Enrich with graph context — same as the full analyze pipeline
|
|
457
|
+
// Lazy import to avoid circular dependency at module load time
|
|
458
|
+
const { fetchGraphContext, enrichTextWithGraphContext } = await import('../embeddings/embedding-pipeline.js');
|
|
459
|
+
const { generateEmbeddingText } = await import('../embeddings/text-generator.js');
|
|
460
|
+
const { initEmbedder, embedBatch, embeddingToArray } = await import('../embeddings/embedder.js');
|
|
461
|
+
const graphContext = fetchGraphContext(db, newNodes);
|
|
462
|
+
// Step 4: Generate enriched text + hash for skip detection
|
|
463
|
+
const { createHash } = await import('crypto');
|
|
464
|
+
const { getEmbeddingHashes } = await import('../db/adapter.js');
|
|
465
|
+
const existingHashes = getEmbeddingHashes(db);
|
|
466
|
+
const toEmbed = [];
|
|
467
|
+
for (const node of newNodes) {
|
|
468
|
+
let text = generateEmbeddingText(node);
|
|
469
|
+
const ctx = graphContext.get(node.id);
|
|
470
|
+
if (ctx) {
|
|
471
|
+
text = enrichTextWithGraphContext(text, ctx);
|
|
472
|
+
}
|
|
473
|
+
const hash = createHash('md5').update(text).digest('hex');
|
|
474
|
+
// Skip if hash unchanged (content + graph context identical)
|
|
475
|
+
if (existingHashes.get(node.id) === hash)
|
|
476
|
+
continue;
|
|
477
|
+
toEmbed.push({ node, text, hash });
|
|
478
|
+
}
|
|
479
|
+
if (toEmbed.length === 0) {
|
|
480
|
+
console.error(`Code Mapper: All ${newNodes.length} node(s) unchanged (hash skip)`);
|
|
481
|
+
return;
|
|
482
|
+
}
|
|
483
|
+
console.error(`Code Mapper: Embedding ${toEmbed.length}/${newNodes.length} node(s) (${newNodes.length - toEmbed.length} unchanged)`);
|
|
484
|
+
// Step 5: Ensure embedder is ready
|
|
485
|
+
await initEmbedder();
|
|
486
|
+
// Step 6: Batch embed only changed nodes
|
|
487
|
+
const embeddings = await embedBatch(toEmbed.map(e => e.text));
|
|
488
|
+
// Step 7: Insert with hashes
|
|
489
|
+
const items = [];
|
|
490
|
+
for (let i = 0; i < toEmbed.length; i++) {
|
|
491
|
+
const entry = toEmbed[i];
|
|
492
|
+
const emb = embeddings[i];
|
|
493
|
+
if (entry?.node && emb) {
|
|
494
|
+
items.push({ nodeId: toNodeId(entry.node.id), embedding: embeddingToArray(emb), textHash: entry.hash });
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
insertEmbeddingsBatch(db, items);
|
|
498
|
+
console.error(`Code Mapper: Embedded ${items.length} node(s) incrementally`);
|
|
499
|
+
}
|
|
500
|
+
catch (err) {
|
|
501
|
+
console.error(`Code Mapper: Incremental embedding failed: ${err instanceof Error ? err.message : err}`);
|
|
502
|
+
}
|
|
503
|
+
}
|
|
@@ -13,7 +13,7 @@ type Brand<T, B extends string> = T & {
|
|
|
13
13
|
export type AbsoluteFilePath = Brand<string, 'AbsoluteFilePath'>;
|
|
14
14
|
/** Path relative to repo root (forward-slash normalized, matches graph filePath) */
|
|
15
15
|
export type RelativeFilePath = Brand<string, 'RelativeFilePath'>;
|
|
16
|
-
/** Absolute path to the
|
|
16
|
+
/** Absolute path to the SQLite database file */
|
|
17
17
|
export type DbPath = Brand<string, 'DbPath'>;
|
|
18
18
|
/** Absolute path to the repository root */
|
|
19
19
|
export type RepoRoot = Brand<string, 'RepoRoot'>;
|
|
@@ -55,6 +55,7 @@ export interface RefreshResult {
|
|
|
55
55
|
readonly nodesInserted: number;
|
|
56
56
|
readonly edgesInserted: number;
|
|
57
57
|
readonly durationMs: number;
|
|
58
|
+
readonly tsgoEnabled: boolean;
|
|
58
59
|
}
|
|
59
60
|
/** Messages from child to parent */
|
|
60
61
|
export type ChildToParentMessage = {
|