@zuvia-software-solutions/code-mapper 1.4.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/ai-context.js +1 -1
- package/dist/cli/analyze.d.ts +1 -0
- package/dist/cli/analyze.js +73 -82
- package/dist/cli/augment.js +0 -2
- package/dist/cli/eval-server.d.ts +2 -2
- package/dist/cli/eval-server.js +6 -6
- package/dist/cli/index.js +6 -10
- package/dist/cli/mcp.d.ts +1 -3
- package/dist/cli/mcp.js +3 -3
- package/dist/cli/refresh.d.ts +2 -2
- package/dist/cli/refresh.js +24 -29
- package/dist/cli/status.js +4 -13
- package/dist/cli/tool.d.ts +5 -4
- package/dist/cli/tool.js +8 -10
- package/dist/config/ignore-service.js +14 -34
- package/dist/core/augmentation/engine.js +53 -83
- package/dist/core/db/adapter.d.ts +99 -0
- package/dist/core/db/adapter.js +402 -0
- package/dist/core/db/graph-loader.d.ts +27 -0
- package/dist/core/db/graph-loader.js +148 -0
- package/dist/core/db/queries.d.ts +160 -0
- package/dist/core/db/queries.js +441 -0
- package/dist/core/db/schema.d.ts +108 -0
- package/dist/core/db/schema.js +136 -0
- package/dist/core/embeddings/embedder.d.ts +21 -12
- package/dist/core/embeddings/embedder.js +104 -50
- package/dist/core/embeddings/embedding-pipeline.d.ts +48 -22
- package/dist/core/embeddings/embedding-pipeline.js +220 -262
- package/dist/core/embeddings/text-generator.js +4 -19
- package/dist/core/embeddings/types.d.ts +1 -1
- package/dist/core/graph/graph.d.ts +1 -1
- package/dist/core/graph/graph.js +1 -0
- package/dist/core/graph/types.d.ts +11 -9
- package/dist/core/graph/types.js +4 -1
- package/dist/core/incremental/refresh.d.ts +46 -0
- package/dist/core/incremental/refresh.js +503 -0
- package/dist/core/incremental/types.d.ts +2 -1
- package/dist/core/incremental/types.js +42 -44
- package/dist/core/ingestion/ast-cache.js +1 -0
- package/dist/core/ingestion/call-processor.d.ts +15 -3
- package/dist/core/ingestion/call-processor.js +448 -60
- package/dist/core/ingestion/cluster-enricher.d.ts +1 -1
- package/dist/core/ingestion/cluster-enricher.js +2 -0
- package/dist/core/ingestion/community-processor.d.ts +1 -1
- package/dist/core/ingestion/community-processor.js +8 -3
- package/dist/core/ingestion/export-detection.d.ts +1 -1
- package/dist/core/ingestion/export-detection.js +1 -1
- package/dist/core/ingestion/filesystem-walker.js +1 -1
- package/dist/core/ingestion/heritage-processor.d.ts +2 -2
- package/dist/core/ingestion/heritage-processor.js +22 -11
- package/dist/core/ingestion/import-processor.d.ts +2 -2
- package/dist/core/ingestion/import-processor.js +24 -9
- package/dist/core/ingestion/language-config.js +7 -4
- package/dist/core/ingestion/mro-processor.d.ts +1 -1
- package/dist/core/ingestion/mro-processor.js +23 -11
- package/dist/core/ingestion/named-binding-extraction.js +5 -5
- package/dist/core/ingestion/parsing-processor.d.ts +4 -4
- package/dist/core/ingestion/parsing-processor.js +26 -18
- package/dist/core/ingestion/pipeline.d.ts +4 -2
- package/dist/core/ingestion/pipeline.js +50 -20
- package/dist/core/ingestion/process-processor.d.ts +2 -2
- package/dist/core/ingestion/process-processor.js +28 -14
- package/dist/core/ingestion/resolution-context.d.ts +1 -1
- package/dist/core/ingestion/resolution-context.js +14 -4
- package/dist/core/ingestion/resolvers/csharp.js +4 -3
- package/dist/core/ingestion/resolvers/go.js +3 -1
- package/dist/core/ingestion/resolvers/jvm.js +13 -4
- package/dist/core/ingestion/resolvers/standard.js +2 -2
- package/dist/core/ingestion/resolvers/utils.js +6 -2
- package/dist/core/ingestion/route-stitcher.d.ts +15 -0
- package/dist/core/ingestion/route-stitcher.js +92 -0
- package/dist/core/ingestion/structure-processor.d.ts +1 -1
- package/dist/core/ingestion/structure-processor.js +3 -2
- package/dist/core/ingestion/symbol-table.d.ts +2 -0
- package/dist/core/ingestion/symbol-table.js +5 -1
- package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
- package/dist/core/ingestion/tree-sitter-queries.js +177 -0
- package/dist/core/ingestion/type-env.js +20 -0
- package/dist/core/ingestion/type-extractors/csharp.js +4 -3
- package/dist/core/ingestion/type-extractors/go.js +23 -12
- package/dist/core/ingestion/type-extractors/php.js +18 -10
- package/dist/core/ingestion/type-extractors/ruby.js +15 -3
- package/dist/core/ingestion/type-extractors/rust.js +3 -2
- package/dist/core/ingestion/type-extractors/shared.js +3 -2
- package/dist/core/ingestion/type-extractors/typescript.js +11 -5
- package/dist/core/ingestion/utils.d.ts +27 -4
- package/dist/core/ingestion/utils.js +145 -100
- package/dist/core/ingestion/workers/parse-worker.d.ts +1 -0
- package/dist/core/ingestion/workers/parse-worker.js +97 -29
- package/dist/core/ingestion/workers/worker-pool.js +3 -0
- package/dist/core/search/bm25-index.d.ts +15 -8
- package/dist/core/search/bm25-index.js +48 -98
- package/dist/core/search/hybrid-search.d.ts +9 -3
- package/dist/core/search/hybrid-search.js +30 -25
- package/dist/core/search/reranker.js +9 -7
- package/dist/core/search/types.d.ts +0 -4
- package/dist/core/semantic/tsgo-service.d.ts +7 -1
- package/dist/core/semantic/tsgo-service.js +165 -66
- package/dist/lib/tsgo-test.d.ts +2 -0
- package/dist/lib/tsgo-test.js +6 -0
- package/dist/lib/type-utils.d.ts +25 -0
- package/dist/lib/type-utils.js +22 -0
- package/dist/lib/utils.d.ts +3 -2
- package/dist/lib/utils.js +3 -2
- package/dist/mcp/compatible-stdio-transport.js +1 -1
- package/dist/mcp/local/local-backend.d.ts +29 -56
- package/dist/mcp/local/local-backend.js +808 -1118
- package/dist/mcp/resources.js +35 -25
- package/dist/mcp/server.d.ts +1 -1
- package/dist/mcp/server.js +5 -5
- package/dist/mcp/tools.js +24 -25
- package/dist/storage/repo-manager.d.ts +2 -12
- package/dist/storage/repo-manager.js +1 -47
- package/dist/types/pipeline.d.ts +8 -5
- package/dist/types/pipeline.js +5 -0
- package/package.json +18 -11
- package/dist/cli/serve.d.ts +0 -5
- package/dist/cli/serve.js +0 -8
- package/dist/core/incremental/child-process.d.ts +0 -8
- package/dist/core/incremental/child-process.js +0 -649
- package/dist/core/incremental/refresh-coordinator.d.ts +0 -32
- package/dist/core/incremental/refresh-coordinator.js +0 -147
- package/dist/core/lbug/csv-generator.d.ts +0 -28
- package/dist/core/lbug/csv-generator.js +0 -355
- package/dist/core/lbug/lbug-adapter.d.ts +0 -96
- package/dist/core/lbug/lbug-adapter.js +0 -753
- package/dist/core/lbug/schema.d.ts +0 -46
- package/dist/core/lbug/schema.js +0 -402
- package/dist/mcp/core/embedder.d.ts +0 -24
- package/dist/mcp/core/embedder.js +0 -168
- package/dist/mcp/core/lbug-adapter.d.ts +0 -29
- package/dist/mcp/core/lbug-adapter.js +0 -330
- package/dist/server/api.d.ts +0 -5
- package/dist/server/api.js +0 -340
- package/dist/server/mcp-http.d.ts +0 -7
- package/dist/server/mcp-http.js +0 -95
- package/models/mlx-embedder.py +0 -185
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Single source of truth for the code knowledge graph schema.
|
|
3
|
+
*
|
|
4
|
+
* ALL types in the system are derived from these const declarations.
|
|
5
|
+
* The compiler enforces exhaustiveness — adding a new node label or edge
|
|
6
|
+
* type requires updating every switch/map that handles them.
|
|
7
|
+
*/
|
|
8
|
+
import { type Brand } from '../../lib/type-utils.js';
|
|
9
|
+
export { assertNever } from '../../lib/type-utils.js';
|
|
10
|
+
export declare const NODE_LABELS: readonly ["File", "Folder", "Function", "Class", "Interface", "Method", "CodeElement", "Community", "Process", "Struct", "Enum", "Macro", "Typedef", "Union", "Namespace", "Trait", "Impl", "TypeAlias", "Const", "Static", "Property", "Record", "Delegate", "Annotation", "Constructor", "Template", "Module"];
|
|
11
|
+
/** Union of all valid node labels — derived from the const tuple */
|
|
12
|
+
export type NodeLabel = typeof NODE_LABELS[number];
|
|
13
|
+
/** Compile-time check: ensure a value is a valid NodeLabel */
|
|
14
|
+
export declare function assertNodeLabel(value: string): asserts value is NodeLabel;
|
|
15
|
+
export declare const EDGE_TYPES: readonly ["CONTAINS", "DEFINES", "IMPORTS", "CALLS", "EXTENDS", "IMPLEMENTS", "HAS_METHOD", "OVERRIDES", "MEMBER_OF", "STEP_IN_PROCESS", "DEPENDS_ON", "PROVIDES"];
|
|
16
|
+
/** Union of all valid edge types — derived from the const tuple */
|
|
17
|
+
export type EdgeType = typeof EDGE_TYPES[number];
|
|
18
|
+
/** Compile-time check: ensure a value is a valid EdgeType */
|
|
19
|
+
export declare function assertEdgeType(value: string): asserts value is EdgeType;
|
|
20
|
+
/** A node ID (format: "Label:filePath:name") */
|
|
21
|
+
export type NodeId = Brand<string, 'NodeId'>;
|
|
22
|
+
/** An edge ID (format: "sourceId_type_targetId") */
|
|
23
|
+
export type EdgeId = Brand<string, 'EdgeId'>;
|
|
24
|
+
/** Construct a NodeId (runtime validation + compile-time branding) */
|
|
25
|
+
export declare function toNodeId(raw: string): NodeId;
|
|
26
|
+
/** Construct an EdgeId */
|
|
27
|
+
export declare function toEdgeId(raw: string): EdgeId;
|
|
28
|
+
/** A node row as stored in the `nodes` table */
|
|
29
|
+
export interface NodeRow {
|
|
30
|
+
readonly id: NodeId;
|
|
31
|
+
readonly label: NodeLabel;
|
|
32
|
+
readonly name: string;
|
|
33
|
+
readonly filePath: string;
|
|
34
|
+
readonly startLine: number | null;
|
|
35
|
+
readonly endLine: number | null;
|
|
36
|
+
readonly isExported: number | null;
|
|
37
|
+
readonly content: string;
|
|
38
|
+
readonly description: string;
|
|
39
|
+
readonly heuristicLabel: string | null;
|
|
40
|
+
readonly cohesion: number | null;
|
|
41
|
+
readonly symbolCount: number | null;
|
|
42
|
+
readonly keywords: string | null;
|
|
43
|
+
readonly enrichedBy: 'heuristic' | 'llm' | null;
|
|
44
|
+
readonly processType: 'intra_community' | 'cross_community' | null;
|
|
45
|
+
readonly stepCount: number | null;
|
|
46
|
+
readonly communities: string | null;
|
|
47
|
+
readonly entryPointId: string | null;
|
|
48
|
+
readonly terminalId: string | null;
|
|
49
|
+
readonly parameterCount: number | null;
|
|
50
|
+
readonly returnType: string | null;
|
|
51
|
+
readonly nameExpanded: string;
|
|
52
|
+
}
|
|
53
|
+
/** An edge row as stored in the `edges` table */
|
|
54
|
+
export interface EdgeRow {
|
|
55
|
+
readonly id: EdgeId;
|
|
56
|
+
readonly sourceId: NodeId;
|
|
57
|
+
readonly targetId: NodeId;
|
|
58
|
+
readonly type: EdgeType;
|
|
59
|
+
readonly confidence: number;
|
|
60
|
+
readonly reason: string;
|
|
61
|
+
readonly step: number;
|
|
62
|
+
readonly callLine: number | null;
|
|
63
|
+
}
|
|
64
|
+
/** An embedding row as stored in the `embeddings` table */
|
|
65
|
+
export interface EmbeddingRow {
|
|
66
|
+
readonly nodeId: NodeId;
|
|
67
|
+
readonly embedding: Buffer;
|
|
68
|
+
readonly textHash: string | null;
|
|
69
|
+
}
|
|
70
|
+
/** Fields required to insert a node */
|
|
71
|
+
export interface NodeInsert {
|
|
72
|
+
readonly id: NodeId;
|
|
73
|
+
readonly label: NodeLabel;
|
|
74
|
+
readonly name?: string;
|
|
75
|
+
readonly filePath?: string;
|
|
76
|
+
readonly startLine?: number | null;
|
|
77
|
+
readonly endLine?: number | null;
|
|
78
|
+
readonly isExported?: number | null;
|
|
79
|
+
readonly content?: string;
|
|
80
|
+
readonly description?: string;
|
|
81
|
+
readonly heuristicLabel?: string | null;
|
|
82
|
+
readonly cohesion?: number | null;
|
|
83
|
+
readonly symbolCount?: number | null;
|
|
84
|
+
readonly keywords?: string | null;
|
|
85
|
+
readonly enrichedBy?: 'heuristic' | 'llm' | null;
|
|
86
|
+
readonly processType?: 'intra_community' | 'cross_community' | null;
|
|
87
|
+
readonly stepCount?: number | null;
|
|
88
|
+
readonly communities?: string | null;
|
|
89
|
+
readonly entryPointId?: string | null;
|
|
90
|
+
readonly terminalId?: string | null;
|
|
91
|
+
readonly parameterCount?: number | null;
|
|
92
|
+
readonly returnType?: string | null;
|
|
93
|
+
readonly nameExpanded?: string;
|
|
94
|
+
}
|
|
95
|
+
/** Fields required to insert an edge */
|
|
96
|
+
export interface EdgeInsert {
|
|
97
|
+
readonly id: EdgeId;
|
|
98
|
+
readonly sourceId: NodeId;
|
|
99
|
+
readonly targetId: NodeId;
|
|
100
|
+
readonly type: EdgeType;
|
|
101
|
+
readonly confidence?: number;
|
|
102
|
+
readonly reason?: string;
|
|
103
|
+
readonly step?: number;
|
|
104
|
+
readonly callLine?: number | null;
|
|
105
|
+
}
|
|
106
|
+
/** Legacy edge table name constant (kept for compatibility) */
|
|
107
|
+
export declare const REL_TABLE_NAME = "CodeRelation";
|
|
108
|
+
export declare const SCHEMA_SQL = "\n-- Nodes: unified table for all code elements\nCREATE TABLE IF NOT EXISTS nodes (\n id TEXT PRIMARY KEY,\n label TEXT NOT NULL,\n name TEXT NOT NULL DEFAULT '',\n filePath TEXT NOT NULL DEFAULT '',\n startLine INTEGER,\n endLine INTEGER,\n isExported INTEGER,\n content TEXT NOT NULL DEFAULT '',\n description TEXT NOT NULL DEFAULT '',\n heuristicLabel TEXT,\n cohesion REAL,\n symbolCount INTEGER,\n keywords TEXT,\n enrichedBy TEXT,\n processType TEXT,\n stepCount INTEGER,\n communities TEXT,\n entryPointId TEXT,\n terminalId TEXT,\n parameterCount INTEGER,\n returnType TEXT,\n nameExpanded TEXT DEFAULT ''\n);\n\nCREATE INDEX IF NOT EXISTS idx_nodes_label ON nodes(label);\nCREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(name);\nCREATE INDEX IF NOT EXISTS idx_nodes_filePath ON nodes(filePath);\nCREATE INDEX IF NOT EXISTS idx_nodes_label_name ON nodes(label, name);\nCREATE INDEX IF NOT EXISTS idx_nodes_filePath_lines ON nodes(filePath, startLine, endLine);\n\n-- Edges: single table for all relationships\nCREATE TABLE IF NOT EXISTS edges (\n id TEXT PRIMARY KEY,\n sourceId TEXT NOT NULL,\n targetId TEXT NOT NULL,\n type TEXT NOT NULL,\n confidence REAL NOT NULL DEFAULT 1.0,\n reason TEXT NOT NULL DEFAULT '',\n step INTEGER NOT NULL DEFAULT 0,\n callLine INTEGER\n);\n\nCREATE INDEX IF NOT EXISTS idx_edges_sourceId ON edges(sourceId);\nCREATE INDEX IF NOT EXISTS idx_edges_targetId ON edges(targetId);\nCREATE INDEX IF NOT EXISTS idx_edges_type ON edges(type);\nCREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(sourceId, type);\nCREATE INDEX IF NOT EXISTS idx_edges_target_type ON edges(targetId, type);\n\n-- Embeddings: vector storage\nCREATE TABLE IF NOT EXISTS embeddings (\n nodeId TEXT PRIMARY KEY,\n embedding BLOB NOT NULL,\n textHash TEXT\n);\n\n-- FTS5 virtual table (auto-updated via triggers)\nCREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(\n name,\n nameExpanded,\n filePath,\n content,\n content='nodes',\n content_rowid='rowid'\n);\n\nCREATE TRIGGER IF NOT EXISTS nodes_fts_ai AFTER INSERT ON nodes BEGIN\n INSERT INTO nodes_fts(rowid, name, nameExpanded, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.filePath, new.content);\nEND;\nCREATE TRIGGER IF NOT EXISTS nodes_fts_ad AFTER DELETE ON nodes BEGIN\n INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.filePath, old.content);\nEND;\nCREATE TRIGGER IF NOT EXISTS nodes_fts_au AFTER UPDATE ON nodes BEGIN\n INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.filePath, old.content);\n INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.filePath, new.content);\nEND;\n";
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
// code-mapper/src/core/db/schema.ts
|
|
2
|
+
/**
|
|
3
|
+
* @file Single source of truth for the code knowledge graph schema.
|
|
4
|
+
*
|
|
5
|
+
* ALL types in the system are derived from these const declarations.
|
|
6
|
+
* The compiler enforces exhaustiveness — adding a new node label or edge
|
|
7
|
+
* type requires updating every switch/map that handles them.
|
|
8
|
+
*/
|
|
9
|
+
import {} from '../../lib/type-utils.js';
|
|
10
|
+
export { assertNever } from '../../lib/type-utils.js';
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// Node labels — const tuple is the single source of truth
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
export const NODE_LABELS = [
|
|
15
|
+
'File', 'Folder', 'Function', 'Class', 'Interface', 'Method', 'CodeElement',
|
|
16
|
+
'Community', 'Process',
|
|
17
|
+
'Struct', 'Enum', 'Macro', 'Typedef', 'Union', 'Namespace', 'Trait', 'Impl',
|
|
18
|
+
'TypeAlias', 'Const', 'Static', 'Property', 'Record', 'Delegate', 'Annotation',
|
|
19
|
+
'Constructor', 'Template', 'Module',
|
|
20
|
+
];
|
|
21
|
+
/** Compile-time check: ensure a value is a valid NodeLabel */
|
|
22
|
+
export function assertNodeLabel(value) {
|
|
23
|
+
if (!NODE_LABELS.includes(value)) {
|
|
24
|
+
throw new TypeError(`Invalid node label: ${value}`);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Edge types — const tuple is the single source of truth
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
export const EDGE_TYPES = [
|
|
31
|
+
'CONTAINS', 'DEFINES', 'IMPORTS', 'CALLS', 'EXTENDS', 'IMPLEMENTS',
|
|
32
|
+
'HAS_METHOD', 'OVERRIDES', 'MEMBER_OF', 'STEP_IN_PROCESS',
|
|
33
|
+
'DEPENDS_ON', 'PROVIDES',
|
|
34
|
+
];
|
|
35
|
+
/** Compile-time check: ensure a value is a valid EdgeType */
|
|
36
|
+
export function assertEdgeType(value) {
|
|
37
|
+
if (!EDGE_TYPES.includes(value)) {
|
|
38
|
+
throw new TypeError(`Invalid edge type: ${value}`);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
/** Construct a NodeId (runtime validation + compile-time branding) */
|
|
42
|
+
export function toNodeId(raw) {
|
|
43
|
+
if (!raw)
|
|
44
|
+
throw new TypeError('NodeId cannot be empty');
|
|
45
|
+
return raw;
|
|
46
|
+
}
|
|
47
|
+
/** Construct an EdgeId */
|
|
48
|
+
export function toEdgeId(raw) {
|
|
49
|
+
if (!raw)
|
|
50
|
+
throw new TypeError('EdgeId cannot be empty');
|
|
51
|
+
return raw;
|
|
52
|
+
}
|
|
53
|
+
/** Legacy edge table name constant (kept for compatibility) */
|
|
54
|
+
export const REL_TABLE_NAME = 'CodeRelation';
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
// SQL schema — the DDL statements that create the SQLite tables
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
export const SCHEMA_SQL = `
|
|
59
|
+
-- Nodes: unified table for all code elements
|
|
60
|
+
CREATE TABLE IF NOT EXISTS nodes (
|
|
61
|
+
id TEXT PRIMARY KEY,
|
|
62
|
+
label TEXT NOT NULL,
|
|
63
|
+
name TEXT NOT NULL DEFAULT '',
|
|
64
|
+
filePath TEXT NOT NULL DEFAULT '',
|
|
65
|
+
startLine INTEGER,
|
|
66
|
+
endLine INTEGER,
|
|
67
|
+
isExported INTEGER,
|
|
68
|
+
content TEXT NOT NULL DEFAULT '',
|
|
69
|
+
description TEXT NOT NULL DEFAULT '',
|
|
70
|
+
heuristicLabel TEXT,
|
|
71
|
+
cohesion REAL,
|
|
72
|
+
symbolCount INTEGER,
|
|
73
|
+
keywords TEXT,
|
|
74
|
+
enrichedBy TEXT,
|
|
75
|
+
processType TEXT,
|
|
76
|
+
stepCount INTEGER,
|
|
77
|
+
communities TEXT,
|
|
78
|
+
entryPointId TEXT,
|
|
79
|
+
terminalId TEXT,
|
|
80
|
+
parameterCount INTEGER,
|
|
81
|
+
returnType TEXT,
|
|
82
|
+
nameExpanded TEXT DEFAULT ''
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_label ON nodes(label);
|
|
86
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(name);
|
|
87
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_filePath ON nodes(filePath);
|
|
88
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_label_name ON nodes(label, name);
|
|
89
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_filePath_lines ON nodes(filePath, startLine, endLine);
|
|
90
|
+
|
|
91
|
+
-- Edges: single table for all relationships
|
|
92
|
+
CREATE TABLE IF NOT EXISTS edges (
|
|
93
|
+
id TEXT PRIMARY KEY,
|
|
94
|
+
sourceId TEXT NOT NULL,
|
|
95
|
+
targetId TEXT NOT NULL,
|
|
96
|
+
type TEXT NOT NULL,
|
|
97
|
+
confidence REAL NOT NULL DEFAULT 1.0,
|
|
98
|
+
reason TEXT NOT NULL DEFAULT '',
|
|
99
|
+
step INTEGER NOT NULL DEFAULT 0,
|
|
100
|
+
callLine INTEGER
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
CREATE INDEX IF NOT EXISTS idx_edges_sourceId ON edges(sourceId);
|
|
104
|
+
CREATE INDEX IF NOT EXISTS idx_edges_targetId ON edges(targetId);
|
|
105
|
+
CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(type);
|
|
106
|
+
CREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(sourceId, type);
|
|
107
|
+
CREATE INDEX IF NOT EXISTS idx_edges_target_type ON edges(targetId, type);
|
|
108
|
+
|
|
109
|
+
-- Embeddings: vector storage
|
|
110
|
+
CREATE TABLE IF NOT EXISTS embeddings (
|
|
111
|
+
nodeId TEXT PRIMARY KEY,
|
|
112
|
+
embedding BLOB NOT NULL,
|
|
113
|
+
textHash TEXT
|
|
114
|
+
);
|
|
115
|
+
|
|
116
|
+
-- FTS5 virtual table (auto-updated via triggers)
|
|
117
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(
|
|
118
|
+
name,
|
|
119
|
+
nameExpanded,
|
|
120
|
+
filePath,
|
|
121
|
+
content,
|
|
122
|
+
content='nodes',
|
|
123
|
+
content_rowid='rowid'
|
|
124
|
+
);
|
|
125
|
+
|
|
126
|
+
CREATE TRIGGER IF NOT EXISTS nodes_fts_ai AFTER INSERT ON nodes BEGIN
|
|
127
|
+
INSERT INTO nodes_fts(rowid, name, nameExpanded, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.filePath, new.content);
|
|
128
|
+
END;
|
|
129
|
+
CREATE TRIGGER IF NOT EXISTS nodes_fts_ad AFTER DELETE ON nodes BEGIN
|
|
130
|
+
INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.filePath, old.content);
|
|
131
|
+
END;
|
|
132
|
+
CREATE TRIGGER IF NOT EXISTS nodes_fts_au AFTER UPDATE ON nodes BEGIN
|
|
133
|
+
INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.filePath, old.content);
|
|
134
|
+
INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.filePath, new.content);
|
|
135
|
+
END;
|
|
136
|
+
`;
|
|
@@ -2,23 +2,24 @@
|
|
|
2
2
|
* @file embedder.ts
|
|
3
3
|
* @description MLX-accelerated code embedder via Python subprocess
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
5
|
+
* Spawns a persistent Python process running Jina Code 1.5B on Apple Silicon
|
|
6
|
+
* Metal via MLX. Communicates via newline-delimited JSON over stdio.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
8
|
+
* Architecture: request queue with sequential processing. Each sendAndReceive()
|
|
9
|
+
* waits for its specific response — no global resolver that can be stolen by
|
|
10
|
+
* out-of-order messages.
|
|
11
|
+
*
|
|
12
|
+
* Model: jinaai/jina-code-embeddings-1.5b-mlx (1.54B params, 256-dim Matryoshka)
|
|
10
13
|
*/
|
|
11
14
|
import { type EmbeddingConfig, type ModelProgress } from './types.js';
|
|
12
|
-
/** Progress callback for model loading */
|
|
13
15
|
export type ModelProgressCallback = (progress: ModelProgress) => void;
|
|
14
|
-
/** Get the current inference device */
|
|
15
16
|
export declare const getCurrentDevice: () => string | null;
|
|
17
|
+
export declare const isEmbedderReady: () => boolean;
|
|
18
|
+
export declare const getEmbeddingDims: () => number;
|
|
16
19
|
/**
|
|
17
|
-
* Initialize the MLX embedder (spawns Python subprocess,
|
|
20
|
+
* Initialize the MLX embedder (spawns Python subprocess, waits for model load)
|
|
18
21
|
*/
|
|
19
|
-
export declare const initEmbedder: (_onProgress?: ModelProgressCallback, _config?: Partial<EmbeddingConfig>) => Promise<
|
|
20
|
-
/** Check if the embedder is initialized and ready */
|
|
21
|
-
export declare const isEmbedderReady: () => boolean;
|
|
22
|
+
export declare const initEmbedder: (_onProgress?: ModelProgressCallback, _config?: Partial<EmbeddingConfig>) => Promise<void>;
|
|
22
23
|
/** Get the embedder instance — not applicable for MLX, returns null */
|
|
23
24
|
export declare const getEmbedder: () => any;
|
|
24
25
|
/**
|
|
@@ -26,10 +27,18 @@ export declare const getEmbedder: () => any;
|
|
|
26
27
|
*/
|
|
27
28
|
export declare const embedText: (text: string) => Promise<Float32Array>;
|
|
28
29
|
/**
|
|
29
|
-
* Embed multiple texts in
|
|
30
|
+
* Embed multiple texts in batches.
|
|
31
|
+
*
|
|
32
|
+
* Sends chunks of 100 texts to Python — keeps JSON responses manageable
|
|
33
|
+
* over stdio while letting Python's internal length-tiered batching
|
|
34
|
+
* optimize GPU utilization within each chunk.
|
|
30
35
|
*/
|
|
31
36
|
export declare const embedBatch: (texts: string[]) => Promise<Float32Array[]>;
|
|
32
|
-
/**
|
|
37
|
+
/**
|
|
38
|
+
* Embed a query text for semantic search (cached, uses "query" prompt type)
|
|
39
|
+
*/
|
|
40
|
+
export declare const embedQuery: (query: string) => Promise<number[]>;
|
|
41
|
+
/** Convert Float32Array to number[] for database storage */
|
|
33
42
|
export declare const embeddingToArray: (embedding: Float32Array) => number[];
|
|
34
43
|
/** Dispose the embedder subprocess */
|
|
35
44
|
export declare const disposeEmbedder: () => Promise<void>;
|
|
@@ -3,43 +3,42 @@
|
|
|
3
3
|
* @file embedder.ts
|
|
4
4
|
* @description MLX-accelerated code embedder via Python subprocess
|
|
5
5
|
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
6
|
+
* Spawns a persistent Python process running Jina Code 1.5B on Apple Silicon
|
|
7
|
+
* Metal via MLX. Communicates via newline-delimited JSON over stdio.
|
|
8
8
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
9
|
+
* Architecture: request queue with sequential processing. Each sendAndReceive()
|
|
10
|
+
* waits for its specific response — no global resolver that can be stolen by
|
|
11
|
+
* out-of-order messages.
|
|
12
|
+
*
|
|
13
|
+
* Model: jinaai/jina-code-embeddings-1.5b-mlx (1.54B params, 256-dim Matryoshka)
|
|
11
14
|
*/
|
|
12
|
-
import { spawn
|
|
15
|
+
import { spawn } from 'child_process';
|
|
13
16
|
import path from 'path';
|
|
14
17
|
import { fileURLToPath } from 'url';
|
|
18
|
+
import { queryEmbeddingCache } from '../search/query-cache.js';
|
|
15
19
|
import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
|
|
16
20
|
const __filename = fileURLToPath(import.meta.url);
|
|
17
21
|
const __dirname = path.dirname(__filename);
|
|
18
|
-
// Path to MLX embedder script (relative to compiled dist/)
|
|
19
22
|
const MLX_SCRIPT = path.resolve(__dirname, '..', '..', '..', 'models', 'mlx-embedder.py');
|
|
20
|
-
//
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Singleton state
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
21
26
|
let mlxProcess = null;
|
|
22
|
-
let pendingResolve = null;
|
|
23
|
-
let pendingReject = null;
|
|
24
|
-
let lineBuffer = '';
|
|
25
27
|
let ready = false;
|
|
26
|
-
|
|
28
|
+
let lineBuffer = '';
|
|
29
|
+
/** Queued requests waiting for responses — FIFO order matches Python's processing */
|
|
30
|
+
const responseQueue = [];
|
|
31
|
+
/** Promise that resolves when the process is ready (model loaded) */
|
|
32
|
+
let readyPromise = null;
|
|
33
|
+
let readyResolve = null;
|
|
27
34
|
export const getCurrentDevice = () => ready ? 'mlx-metal' : null;
|
|
35
|
+
export const isEmbedderReady = () => ready;
|
|
36
|
+
export const getEmbeddingDims = () => DEFAULT_EMBEDDING_CONFIG.dimensions;
|
|
28
37
|
function ensureProcess() {
|
|
29
38
|
if (mlxProcess && !mlxProcess.killed)
|
|
30
39
|
return mlxProcess;
|
|
31
|
-
//
|
|
32
|
-
|
|
33
|
-
execFileSync('python3', ['-c', 'import mlx; import tokenizers'], {
|
|
34
|
-
timeout: 5000,
|
|
35
|
-
stdio: ['pipe', 'pipe', 'pipe'],
|
|
36
|
-
});
|
|
37
|
-
}
|
|
38
|
-
catch {
|
|
39
|
-
throw new Error('MLX embedder requires Python 3 + MLX on Apple Silicon.\n' +
|
|
40
|
-
'Install: pip3 install mlx tokenizers huggingface_hub\n' +
|
|
41
|
-
'The embedding model will download automatically on first use (~3GB).');
|
|
42
|
-
}
|
|
40
|
+
// Create ready promise before spawning so we don't miss the message
|
|
41
|
+
readyPromise = new Promise(resolve => { readyResolve = resolve; });
|
|
43
42
|
mlxProcess = spawn('python3', [MLX_SCRIPT], {
|
|
44
43
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
45
44
|
env: { ...process.env, TOKENIZERS_PARALLELISM: 'false' },
|
|
@@ -54,24 +53,29 @@ function ensureProcess() {
|
|
|
54
53
|
continue;
|
|
55
54
|
try {
|
|
56
55
|
const msg = JSON.parse(line);
|
|
56
|
+
// Startup ready message — NOT a response to any request
|
|
57
57
|
if (msg.status === 'ready' && !ready) {
|
|
58
58
|
ready = true;
|
|
59
|
-
console.error(`Code Mapper: MLX embedder ready (${msg.device}, loaded in ${msg.load_ms}ms)`);
|
|
59
|
+
console.error(`Code Mapper: MLX embedder ready (${msg.device ?? 'unknown'}, loaded in ${msg.load_ms ?? '?'}ms)`);
|
|
60
|
+
readyResolve?.();
|
|
61
|
+
readyResolve = null;
|
|
62
|
+
continue; // Don't dispatch to response queue
|
|
60
63
|
}
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
64
|
+
// Response to a queued request — dispatch FIFO
|
|
65
|
+
const pending = responseQueue.shift();
|
|
66
|
+
if (pending) {
|
|
67
|
+
pending.resolve(msg);
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
console.error(`Code Mapper: MLX embedder unexpected message (no pending request): ${line.slice(0, 100)}`);
|
|
66
71
|
}
|
|
67
72
|
}
|
|
68
73
|
catch {
|
|
69
|
-
// Non-JSON output — ignore
|
|
74
|
+
// Non-JSON output — ignore (Python progress bars, etc.)
|
|
70
75
|
}
|
|
71
76
|
}
|
|
72
77
|
});
|
|
73
78
|
mlxProcess.stderr.on('data', (chunk) => {
|
|
74
|
-
// Forward stderr for debugging
|
|
75
79
|
const msg = chunk.toString().trim();
|
|
76
80
|
if (msg)
|
|
77
81
|
console.error(`[mlx-embedder] ${msg}`);
|
|
@@ -79,49 +83,60 @@ function ensureProcess() {
|
|
|
79
83
|
mlxProcess.on('exit', (code) => {
|
|
80
84
|
ready = false;
|
|
81
85
|
mlxProcess = null;
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
reject(new Error(`MLX embedder exited with code ${code}`));
|
|
86
|
+
// Reject all pending requests
|
|
87
|
+
const err = new Error(`MLX embedder exited with code ${code}`);
|
|
88
|
+
for (const pending of responseQueue) {
|
|
89
|
+
pending.reject(err);
|
|
87
90
|
}
|
|
91
|
+
responseQueue.length = 0;
|
|
92
|
+
// Also resolve readyPromise so init doesn't hang
|
|
93
|
+
readyResolve?.();
|
|
94
|
+
readyResolve = null;
|
|
88
95
|
});
|
|
89
96
|
return mlxProcess;
|
|
90
97
|
}
|
|
98
|
+
/**
|
|
99
|
+
* Send a request and wait for its response.
|
|
100
|
+
*
|
|
101
|
+
* Requests are queued FIFO — Python processes them in order and sends
|
|
102
|
+
* responses in the same order. Each caller gets exactly its own response.
|
|
103
|
+
*/
|
|
91
104
|
function sendAndReceive(request) {
|
|
92
105
|
return new Promise((resolve, reject) => {
|
|
93
106
|
const proc = ensureProcess();
|
|
94
|
-
|
|
95
|
-
pendingReject = reject;
|
|
107
|
+
responseQueue.push({ resolve, reject });
|
|
96
108
|
proc.stdin.write(JSON.stringify(request) + '\n');
|
|
97
109
|
});
|
|
98
110
|
}
|
|
111
|
+
// ---------------------------------------------------------------------------
|
|
112
|
+
// Public API
|
|
113
|
+
// ---------------------------------------------------------------------------
|
|
99
114
|
/**
|
|
100
|
-
* Initialize the MLX embedder (spawns Python subprocess,
|
|
115
|
+
* Initialize the MLX embedder (spawns Python subprocess, waits for model load)
|
|
101
116
|
*/
|
|
102
117
|
export const initEmbedder = async (_onProgress, _config = {}) => {
|
|
103
118
|
if (ready)
|
|
104
119
|
return;
|
|
105
120
|
ensureProcess();
|
|
106
|
-
// Wait for the "ready" message from
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
121
|
+
// Wait for the automatic "ready" message from Python (model loaded)
|
|
122
|
+
// No ping needed — Python sends ready on its own after loading the model
|
|
123
|
+
await readyPromise;
|
|
124
|
+
if (!ready) {
|
|
125
|
+
throw new Error('MLX embedder failed to start — process exited before ready');
|
|
110
126
|
}
|
|
111
|
-
return msg;
|
|
112
127
|
};
|
|
113
|
-
/** Check if the embedder is initialized and ready */
|
|
114
|
-
export const isEmbedderReady = () => ready;
|
|
115
128
|
/** Get the embedder instance — not applicable for MLX, returns null */
|
|
116
129
|
export const getEmbedder = () => {
|
|
117
130
|
if (!ready)
|
|
118
131
|
throw new Error('MLX embedder not initialized. Call initEmbedder() first.');
|
|
119
|
-
return null;
|
|
132
|
+
return null;
|
|
120
133
|
};
|
|
121
134
|
/**
|
|
122
135
|
* Embed a single text string
|
|
123
136
|
*/
|
|
124
137
|
export const embedText = async (text) => {
|
|
138
|
+
if (!ready)
|
|
139
|
+
await initEmbedder();
|
|
125
140
|
const result = await sendAndReceive({
|
|
126
141
|
texts: [text],
|
|
127
142
|
task: 'nl2code',
|
|
@@ -133,11 +148,21 @@ export const embedText = async (text) => {
|
|
|
133
148
|
return new Float32Array(result.embeddings[0]);
|
|
134
149
|
};
|
|
135
150
|
/**
|
|
136
|
-
* Embed multiple texts in
|
|
151
|
+
* Embed multiple texts in batches.
|
|
152
|
+
*
|
|
153
|
+
* Sends chunks of 100 texts to Python — keeps JSON responses manageable
|
|
154
|
+
* over stdio while letting Python's internal length-tiered batching
|
|
155
|
+
* optimize GPU utilization within each chunk.
|
|
137
156
|
*/
|
|
138
157
|
export const embedBatch = async (texts) => {
|
|
139
158
|
if (texts.length === 0)
|
|
140
159
|
return [];
|
|
160
|
+
if (!ready)
|
|
161
|
+
await initEmbedder();
|
|
162
|
+
// Send all texts to Python in one call — Python does optimal length-tiered
|
|
163
|
+
// batching internally for Metal GPU. No need to double-batch at the Node level.
|
|
164
|
+
console.error(`Code Mapper: embedBatch sending ${texts.length} texts to MLX...`);
|
|
165
|
+
const t0 = Date.now();
|
|
141
166
|
const result = await sendAndReceive({
|
|
142
167
|
texts,
|
|
143
168
|
task: 'nl2code',
|
|
@@ -146,9 +171,35 @@ export const embedBatch = async (texts) => {
|
|
|
146
171
|
});
|
|
147
172
|
if (result.error)
|
|
148
173
|
throw new Error(`Batch embedding failed: ${result.error}`);
|
|
174
|
+
if (!result.embeddings || !Array.isArray(result.embeddings)) {
|
|
175
|
+
throw new Error(`Batch embedding returned invalid response: ${JSON.stringify(result).slice(0, 200)}`);
|
|
176
|
+
}
|
|
177
|
+
const elapsed = Date.now() - t0;
|
|
178
|
+
console.error(`Code Mapper: embedBatch complete — ${result.embeddings.length} embeddings in ${elapsed}ms (${result.ms ?? '?'}ms inference)`);
|
|
149
179
|
return result.embeddings.map((e) => new Float32Array(e));
|
|
150
180
|
};
|
|
151
|
-
/**
|
|
181
|
+
/**
|
|
182
|
+
* Embed a query text for semantic search (cached, uses "query" prompt type)
|
|
183
|
+
*/
|
|
184
|
+
export const embedQuery = async (query) => {
|
|
185
|
+
const cached = queryEmbeddingCache.get(query);
|
|
186
|
+
if (cached)
|
|
187
|
+
return cached;
|
|
188
|
+
if (!ready)
|
|
189
|
+
await initEmbedder();
|
|
190
|
+
const result = await sendAndReceive({
|
|
191
|
+
texts: [query],
|
|
192
|
+
task: 'nl2code',
|
|
193
|
+
type: 'query',
|
|
194
|
+
dims: DEFAULT_EMBEDDING_CONFIG.dimensions,
|
|
195
|
+
});
|
|
196
|
+
if (result.error)
|
|
197
|
+
throw new Error(`Query embedding failed: ${result.error}`);
|
|
198
|
+
const embedding = result.embeddings[0];
|
|
199
|
+
queryEmbeddingCache.set(query, embedding);
|
|
200
|
+
return embedding;
|
|
201
|
+
};
|
|
202
|
+
/** Convert Float32Array to number[] for database storage */
|
|
152
203
|
export const embeddingToArray = (embedding) => {
|
|
153
204
|
return Array.from(embedding);
|
|
154
205
|
};
|
|
@@ -157,7 +208,6 @@ export const disposeEmbedder = async () => {
|
|
|
157
208
|
if (mlxProcess && !mlxProcess.killed) {
|
|
158
209
|
try {
|
|
159
210
|
mlxProcess.stdin.write(JSON.stringify({ cmd: 'quit' }) + '\n');
|
|
160
|
-
// Give it a moment to exit gracefully
|
|
161
211
|
await new Promise(resolve => setTimeout(resolve, 500));
|
|
162
212
|
}
|
|
163
213
|
catch { }
|
|
@@ -168,4 +218,8 @@ export const disposeEmbedder = async () => {
|
|
|
168
218
|
mlxProcess = null;
|
|
169
219
|
}
|
|
170
220
|
ready = false;
|
|
221
|
+
readyPromise = null;
|
|
222
|
+
readyResolve = null;
|
|
223
|
+
responseQueue.length = 0;
|
|
224
|
+
queryEmbeddingCache.clear();
|
|
171
225
|
};
|
|
@@ -1,41 +1,67 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @file embedding-pipeline.ts
|
|
3
3
|
* @description Orchestrates the background embedding process:
|
|
4
|
-
* 1) Query embeddable nodes from
|
|
4
|
+
* 1) Query embeddable nodes from SQLite
|
|
5
5
|
* 2) Generate text representations
|
|
6
6
|
* 3) Batch embed using transformers.js
|
|
7
|
-
* 4) Store embeddings in
|
|
8
|
-
* 5)
|
|
7
|
+
* 4) Store embeddings in SQLite
|
|
8
|
+
* 5) Vector search via brute-force cosine similarity in adapter.ts
|
|
9
9
|
*/
|
|
10
10
|
import { type EmbeddingProgress, type EmbeddingConfig, type SemanticSearchResult } from './types.js';
|
|
11
|
+
import type Database from 'better-sqlite3';
|
|
11
12
|
/** Progress callback type */
|
|
12
13
|
export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
|
|
14
|
+
/** Graph context for a node: callers, callees, and community module */
|
|
15
|
+
export interface GraphContext {
|
|
16
|
+
callers: string[];
|
|
17
|
+
callees: string[];
|
|
18
|
+
module: string;
|
|
19
|
+
}
|
|
13
20
|
/**
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
21
|
+
* Fetch graph context (callers, callees, community module) for a set of nodes.
|
|
22
|
+
*
|
|
23
|
+
* This enrichment adds relationship context so that embedding text like
|
|
24
|
+
* "import resolution pipeline" matches `processImports` because its caller
|
|
25
|
+
* "runPipelineFromRepo" contains "pipeline".
|
|
26
|
+
*
|
|
27
|
+
* Reusable by both the full analyze pipeline and incremental refresh.
|
|
28
|
+
*
|
|
29
|
+
* @param db - Open SQLite database instance
|
|
30
|
+
* @param nodes - Nodes to fetch context for (must have `id` field)
|
|
31
|
+
* @returns Map from node ID to graph context
|
|
32
|
+
*/
|
|
33
|
+
export declare function fetchGraphContext(db: Database.Database, nodes: ReadonlyArray<{
|
|
34
|
+
id: string;
|
|
35
|
+
}>): Map<string, GraphContext>;
|
|
36
|
+
/**
|
|
37
|
+
* Enrich embedding text with graph context (callers, callees, module).
|
|
38
|
+
*
|
|
39
|
+
* Inserts context lines (Module, Called by, Calls) after the header
|
|
40
|
+
* section of the generated text, before the code snippet.
|
|
41
|
+
*
|
|
42
|
+
* @param text - Base embedding text from generateEmbeddingText
|
|
43
|
+
* @param ctx - Graph context for this node
|
|
44
|
+
* @returns Enriched text
|
|
45
|
+
*/
|
|
46
|
+
export declare function enrichTextWithGraphContext(text: string, ctx: GraphContext): string;
|
|
47
|
+
/**
|
|
48
|
+
* Run the full embedding pipeline (load model, embed nodes, store in SQLite)
|
|
49
|
+
* @param db - Open SQLite database instance
|
|
17
50
|
* @param onProgress - Progress callback
|
|
18
51
|
* @param config - Configuration override
|
|
19
52
|
* @param skipNodeIds - Node IDs that already have embeddings (incremental mode)
|
|
20
53
|
*/
|
|
21
|
-
export declare
|
|
54
|
+
export declare function runEmbeddingPipeline(db: Database.Database, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>): Promise<void>;
|
|
22
55
|
/**
|
|
23
|
-
*
|
|
24
|
-
* @param executeQuery - Execute Cypher queries
|
|
25
|
-
* @param query - Search query text
|
|
26
|
-
* @param k - Number of results (default: 10)
|
|
27
|
-
* @param maxDistance - Maximum cosine distance threshold (default: 0.5)
|
|
28
|
-
* @returns Search results ordered by relevance
|
|
29
|
-
*/
|
|
30
|
-
export declare const semanticSearch: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, maxDistance?: number) => Promise<SemanticSearchResult[]>;
|
|
31
|
-
/**
|
|
32
|
-
* Semantic search with flattened results (graph expansion placeholder)
|
|
56
|
+
* Semantic vector search against a SQLite database.
|
|
33
57
|
*
|
|
34
|
-
*
|
|
58
|
+
* Uses brute-force cosine similarity via adapter.searchVector, then
|
|
59
|
+
* enriches results with node metadata. This mirrors the pattern in
|
|
60
|
+
* local-backend.ts but as a standalone function for hybrid search.
|
|
35
61
|
*
|
|
36
|
-
* @param
|
|
62
|
+
* @param db - Open SQLite database instance
|
|
37
63
|
* @param query - Search query text
|
|
38
|
-
* @param k - Number of
|
|
39
|
-
* @param
|
|
64
|
+
* @param k - Number of results (default: 10)
|
|
65
|
+
* @param maxDistance - Maximum cosine distance threshold (default: from types.ts)
|
|
40
66
|
*/
|
|
41
|
-
export declare
|
|
67
|
+
export declare function semanticSearchSqlite(db: Database.Database, query: string, k?: number): Promise<SemanticSearchResult[]>;
|