gitnexus 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +181 -0
- package/dist/cli/ai-context.d.ts +21 -0
- package/dist/cli/ai-context.js +219 -0
- package/dist/cli/analyze.d.ts +10 -0
- package/dist/cli/analyze.js +118 -0
- package/dist/cli/clean.d.ts +8 -0
- package/dist/cli/clean.js +29 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +42 -0
- package/dist/cli/list.d.ts +6 -0
- package/dist/cli/list.js +27 -0
- package/dist/cli/mcp.d.ts +7 -0
- package/dist/cli/mcp.js +85 -0
- package/dist/cli/serve.d.ts +3 -0
- package/dist/cli/serve.js +5 -0
- package/dist/cli/status.d.ts +6 -0
- package/dist/cli/status.js +27 -0
- package/dist/config/ignore-service.d.ts +1 -0
- package/dist/config/ignore-service.js +208 -0
- package/dist/config/supported-languages.d.ts +11 -0
- package/dist/config/supported-languages.js +15 -0
- package/dist/core/embeddings/embedder.d.ts +60 -0
- package/dist/core/embeddings/embedder.js +205 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +50 -0
- package/dist/core/embeddings/embedding-pipeline.js +321 -0
- package/dist/core/embeddings/index.d.ts +9 -0
- package/dist/core/embeddings/index.js +9 -0
- package/dist/core/embeddings/text-generator.d.ts +24 -0
- package/dist/core/embeddings/text-generator.js +182 -0
- package/dist/core/embeddings/types.d.ts +87 -0
- package/dist/core/embeddings/types.js +32 -0
- package/dist/core/graph/graph.d.ts +2 -0
- package/dist/core/graph/graph.js +61 -0
- package/dist/core/graph/types.d.ts +50 -0
- package/dist/core/graph/types.js +1 -0
- package/dist/core/ingestion/ast-cache.d.ts +11 -0
- package/dist/core/ingestion/ast-cache.js +34 -0
- package/dist/core/ingestion/call-processor.d.ts +8 -0
- package/dist/core/ingestion/call-processor.js +269 -0
- package/dist/core/ingestion/cluster-enricher.d.ts +38 -0
- package/dist/core/ingestion/cluster-enricher.js +170 -0
- package/dist/core/ingestion/community-processor.d.ts +39 -0
- package/dist/core/ingestion/community-processor.js +269 -0
- package/dist/core/ingestion/entry-point-scoring.d.ts +39 -0
- package/dist/core/ingestion/entry-point-scoring.js +235 -0
- package/dist/core/ingestion/filesystem-walker.d.ts +5 -0
- package/dist/core/ingestion/filesystem-walker.js +26 -0
- package/dist/core/ingestion/framework-detection.d.ts +38 -0
- package/dist/core/ingestion/framework-detection.js +183 -0
- package/dist/core/ingestion/heritage-processor.d.ts +14 -0
- package/dist/core/ingestion/heritage-processor.js +134 -0
- package/dist/core/ingestion/import-processor.d.ts +8 -0
- package/dist/core/ingestion/import-processor.js +490 -0
- package/dist/core/ingestion/parsing-processor.d.ts +8 -0
- package/dist/core/ingestion/parsing-processor.js +249 -0
- package/dist/core/ingestion/pipeline.d.ts +2 -0
- package/dist/core/ingestion/pipeline.js +228 -0
- package/dist/core/ingestion/process-processor.d.ts +51 -0
- package/dist/core/ingestion/process-processor.js +278 -0
- package/dist/core/ingestion/structure-processor.d.ts +2 -0
- package/dist/core/ingestion/structure-processor.js +36 -0
- package/dist/core/ingestion/symbol-table.d.ts +33 -0
- package/dist/core/ingestion/symbol-table.js +38 -0
- package/dist/core/ingestion/tree-sitter-queries.d.ts +11 -0
- package/dist/core/ingestion/tree-sitter-queries.js +319 -0
- package/dist/core/ingestion/utils.d.ts +10 -0
- package/dist/core/ingestion/utils.js +44 -0
- package/dist/core/kuzu/csv-generator.d.ts +22 -0
- package/dist/core/kuzu/csv-generator.js +272 -0
- package/dist/core/kuzu/kuzu-adapter.d.ts +81 -0
- package/dist/core/kuzu/kuzu-adapter.js +568 -0
- package/dist/core/kuzu/schema.d.ts +53 -0
- package/dist/core/kuzu/schema.js +380 -0
- package/dist/core/search/bm25-index.d.ts +22 -0
- package/dist/core/search/bm25-index.js +52 -0
- package/dist/core/search/hybrid-search.d.ts +49 -0
- package/dist/core/search/hybrid-search.js +118 -0
- package/dist/core/tree-sitter/parser-loader.d.ts +4 -0
- package/dist/core/tree-sitter/parser-loader.js +42 -0
- package/dist/lib/utils.d.ts +1 -0
- package/dist/lib/utils.js +3 -0
- package/dist/mcp/core/embedder.d.ts +27 -0
- package/dist/mcp/core/embedder.js +93 -0
- package/dist/mcp/core/kuzu-adapter.d.ts +23 -0
- package/dist/mcp/core/kuzu-adapter.js +62 -0
- package/dist/mcp/local/local-backend.d.ts +73 -0
- package/dist/mcp/local/local-backend.js +752 -0
- package/dist/mcp/resources.d.ts +31 -0
- package/dist/mcp/resources.js +279 -0
- package/dist/mcp/server.d.ts +12 -0
- package/dist/mcp/server.js +130 -0
- package/dist/mcp/staleness.d.ts +15 -0
- package/dist/mcp/staleness.js +29 -0
- package/dist/mcp/tools.d.ts +24 -0
- package/dist/mcp/tools.js +160 -0
- package/dist/server/api.d.ts +6 -0
- package/dist/server/api.js +156 -0
- package/dist/storage/git.d.ts +7 -0
- package/dist/storage/git.js +39 -0
- package/dist/storage/repo-manager.d.ts +61 -0
- package/dist/storage/repo-manager.js +106 -0
- package/dist/types/pipeline.d.ts +28 -0
- package/dist/types/pipeline.js +16 -0
- package/package.json +80 -0
- package/skills/debugging.md +104 -0
- package/skills/exploring.md +112 -0
- package/skills/impact-analysis.md +114 -0
- package/skills/refactoring.md +119 -0
- package/vendor/leiden/index.cjs +355 -0
- package/vendor/leiden/utils.cjs +392 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* KuzuDB Schema Definitions
|
|
3
|
+
*
|
|
4
|
+
* Hybrid Schema:
|
|
5
|
+
* - Separate node tables for each code element type (File, Function, Class, etc.)
|
|
6
|
+
* - Single CodeRelation table with 'type' property for all relationships
|
|
7
|
+
*
|
|
8
|
+
* This allows LLMs to write natural Cypher queries like:
|
|
9
|
+
* MATCH (f:Function)-[r:CodeRelation {type: 'CALLS'}]->(g:Function) RETURN f, g
|
|
10
|
+
*/
|
|
11
|
+
export declare const NODE_TABLES: readonly ["File", "Folder", "Function", "Class", "Interface", "Method", "CodeElement", "Community", "Process", "Struct", "Enum", "Macro", "Typedef", "Union", "Namespace", "Trait", "Impl", "TypeAlias", "Const", "Static", "Property", "Record", "Delegate", "Annotation", "Constructor", "Template", "Module"];
|
|
12
|
+
export type NodeTableName = typeof NODE_TABLES[number];
|
|
13
|
+
export declare const REL_TABLE_NAME = "CodeRelation";
|
|
14
|
+
export declare const REL_TYPES: readonly ["CONTAINS", "DEFINES", "IMPORTS", "CALLS", "EXTENDS", "IMPLEMENTS", "MEMBER_OF", "STEP_IN_PROCESS"];
|
|
15
|
+
export type RelType = typeof REL_TYPES[number];
|
|
16
|
+
export declare const EMBEDDING_TABLE_NAME = "CodeEmbedding";
|
|
17
|
+
export declare const FILE_SCHEMA = "\nCREATE NODE TABLE File (\n id STRING,\n name STRING,\n filePath STRING,\n content STRING,\n PRIMARY KEY (id)\n)";
|
|
18
|
+
export declare const FOLDER_SCHEMA = "\nCREATE NODE TABLE Folder (\n id STRING,\n name STRING,\n filePath STRING,\n PRIMARY KEY (id)\n)";
|
|
19
|
+
export declare const FUNCTION_SCHEMA = "\nCREATE NODE TABLE Function (\n id STRING,\n name STRING,\n filePath STRING,\n startLine INT64,\n endLine INT64,\n isExported BOOLEAN,\n content STRING,\n PRIMARY KEY (id)\n)";
|
|
20
|
+
export declare const CLASS_SCHEMA = "\nCREATE NODE TABLE Class (\n id STRING,\n name STRING,\n filePath STRING,\n startLine INT64,\n endLine INT64,\n isExported BOOLEAN,\n content STRING,\n PRIMARY KEY (id)\n)";
|
|
21
|
+
export declare const INTERFACE_SCHEMA = "\nCREATE NODE TABLE Interface (\n id STRING,\n name STRING,\n filePath STRING,\n startLine INT64,\n endLine INT64,\n isExported BOOLEAN,\n content STRING,\n PRIMARY KEY (id)\n)";
|
|
22
|
+
export declare const METHOD_SCHEMA = "\nCREATE NODE TABLE Method (\n id STRING,\n name STRING,\n filePath STRING,\n startLine INT64,\n endLine INT64,\n isExported BOOLEAN,\n content STRING,\n PRIMARY KEY (id)\n)";
|
|
23
|
+
export declare const CODE_ELEMENT_SCHEMA = "\nCREATE NODE TABLE CodeElement (\n id STRING,\n name STRING,\n filePath STRING,\n startLine INT64,\n endLine INT64,\n isExported BOOLEAN,\n content STRING,\n PRIMARY KEY (id)\n)";
|
|
24
|
+
export declare const COMMUNITY_SCHEMA = "\nCREATE NODE TABLE Community (\n id STRING,\n label STRING,\n heuristicLabel STRING,\n keywords STRING[],\n description STRING,\n enrichedBy STRING,\n cohesion DOUBLE,\n symbolCount INT32,\n PRIMARY KEY (id)\n)";
|
|
25
|
+
export declare const PROCESS_SCHEMA = "\nCREATE NODE TABLE Process (\n id STRING,\n label STRING,\n heuristicLabel STRING,\n processType STRING,\n stepCount INT32,\n communities STRING[],\n entryPointId STRING,\n terminalId STRING,\n PRIMARY KEY (id)\n)";
|
|
26
|
+
export declare const STRUCT_SCHEMA: string;
|
|
27
|
+
export declare const ENUM_SCHEMA: string;
|
|
28
|
+
export declare const MACRO_SCHEMA: string;
|
|
29
|
+
export declare const TYPEDEF_SCHEMA: string;
|
|
30
|
+
export declare const UNION_SCHEMA: string;
|
|
31
|
+
export declare const NAMESPACE_SCHEMA: string;
|
|
32
|
+
export declare const TRAIT_SCHEMA: string;
|
|
33
|
+
export declare const IMPL_SCHEMA: string;
|
|
34
|
+
export declare const TYPE_ALIAS_SCHEMA: string;
|
|
35
|
+
export declare const CONST_SCHEMA: string;
|
|
36
|
+
export declare const STATIC_SCHEMA: string;
|
|
37
|
+
export declare const PROPERTY_SCHEMA: string;
|
|
38
|
+
export declare const RECORD_SCHEMA: string;
|
|
39
|
+
export declare const DELEGATE_SCHEMA: string;
|
|
40
|
+
export declare const ANNOTATION_SCHEMA: string;
|
|
41
|
+
export declare const CONSTRUCTOR_SCHEMA: string;
|
|
42
|
+
export declare const TEMPLATE_SCHEMA: string;
|
|
43
|
+
export declare const MODULE_SCHEMA: string;
|
|
44
|
+
export declare const RELATION_SCHEMA = "\nCREATE REL TABLE CodeRelation (\n FROM File TO File,\n FROM File TO Folder,\n FROM File TO Function,\n FROM File TO Class,\n FROM File TO Interface,\n FROM File TO Method,\n FROM File TO CodeElement,\n FROM File TO `Struct`,\n FROM File TO `Enum`,\n FROM File TO `Macro`,\n FROM File TO `Typedef`,\n FROM File TO `Union`,\n FROM File TO `Namespace`,\n FROM File TO `Trait`,\n FROM File TO `Impl`,\n FROM File TO `TypeAlias`,\n FROM File TO `Const`,\n FROM File TO `Static`,\n FROM File TO `Property`,\n FROM File TO `Record`,\n FROM File TO `Delegate`,\n FROM File TO `Annotation`,\n FROM File TO `Constructor`,\n FROM File TO `Template`,\n FROM File TO `Module`,\n FROM Folder TO Folder,\n FROM Folder TO File,\n FROM Function TO Function,\n FROM Function TO Method,\n FROM Function TO Class,\n FROM Function TO Community,\n FROM Function TO `Macro`,\n FROM Function TO `Struct`,\n FROM Function TO `Template`,\n FROM Function TO `Enum`,\n FROM Function TO `Namespace`,\n FROM Function TO `TypeAlias`,\n FROM Function TO `Module`,\n FROM Function TO `Impl`,\n FROM Function TO Interface,\n FROM Function TO `Constructor`,\n FROM Class TO Method,\n FROM Class TO Function,\n FROM Class TO Class,\n FROM Class TO Interface,\n FROM Class TO Community,\n FROM Class TO `Template`,\n FROM Class TO `TypeAlias`,\n FROM Class TO `Struct`,\n FROM Class TO `Enum`,\n FROM Class TO `Constructor`,\n FROM Method TO Function,\n FROM Method TO Method,\n FROM Method TO Class,\n FROM Method TO Community,\n FROM Method TO `Template`,\n FROM Method TO `Struct`,\n FROM Method TO `TypeAlias`,\n FROM Method TO `Enum`,\n FROM Method TO `Macro`,\n FROM Method TO `Namespace`,\n FROM Method TO `Module`,\n FROM Method TO `Impl`,\n FROM Method TO Interface,\n FROM Method TO `Constructor`,\n FROM `Template` TO `Template`,\n FROM `Template` TO Function,\n FROM `Template` TO Method,\n FROM `Template` TO Class,\n FROM `Template` TO `Struct`,\n FROM `Template` TO `TypeAlias`,\n FROM `Template` TO `Enum`,\n FROM `Template` TO `Macro`,\n FROM `Template` TO Interface,\n FROM `Template` TO `Constructor`,\n FROM `Module` TO `Module`,\n FROM CodeElement TO Community,\n FROM Interface TO Community,\n FROM Interface TO Function,\n FROM Interface TO Method,\n FROM Interface TO Class,\n FROM Interface TO Interface,\n FROM Interface TO `TypeAlias`,\n FROM Interface TO `Struct`,\n FROM Interface TO `Constructor`,\n FROM `Struct` TO Community,\n FROM `Struct` TO `Trait`,\n FROM `Struct` TO Function,\n FROM `Struct` TO Method,\n FROM `Enum` TO Community,\n FROM `Macro` TO Community,\n FROM `Macro` TO Function,\n FROM `Macro` TO Method,\n FROM `Module` TO Function,\n FROM `Module` TO Method,\n FROM `Typedef` TO Community,\n FROM `Union` TO Community,\n FROM `Namespace` TO Community,\n FROM `Trait` TO Community,\n FROM `Impl` TO Community,\n FROM `Impl` TO `Trait`,\n FROM `TypeAlias` TO Community,\n FROM `Const` TO Community,\n FROM `Static` TO Community,\n FROM `Property` TO Community,\n FROM `Record` TO Community,\n FROM `Delegate` TO Community,\n FROM `Annotation` TO Community,\n FROM `Constructor` TO Community,\n FROM `Constructor` TO Interface,\n FROM `Constructor` TO Class,\n FROM `Constructor` TO Method,\n FROM `Constructor` TO Function,\n FROM `Constructor` TO `Constructor`,\n FROM `Constructor` TO `Struct`,\n FROM `Constructor` TO `Macro`,\n FROM `Constructor` TO `Template`,\n FROM `Constructor` TO `TypeAlias`,\n FROM `Constructor` TO `Enum`,\n FROM `Constructor` TO `Impl`,\n FROM `Constructor` TO `Namespace`,\n FROM `Template` TO Community,\n FROM `Module` TO Community,\n FROM Function TO Process,\n FROM Method TO Process,\n FROM Class TO Process,\n FROM Interface TO Process,\n FROM `Struct` TO Process,\n FROM `Constructor` TO Process,\n FROM `Module` TO Process,\n FROM `Macro` TO Process,\n FROM `Impl` TO Process,\n FROM `Typedef` TO Process,\n FROM `TypeAlias` TO Process,\n FROM `Enum` TO Process,\n FROM `Union` TO Process,\n FROM `Namespace` TO Process,\n FROM `Trait` TO Process,\n FROM `Const` TO Process,\n FROM `Static` TO Process,\n FROM `Property` TO Process,\n FROM `Record` TO Process,\n FROM `Delegate` TO Process,\n FROM `Annotation` TO Process,\n FROM `Template` TO Process,\n FROM CodeElement TO Process,\n type STRING,\n confidence DOUBLE,\n reason STRING,\n step INT32\n)";
|
|
45
|
+
export declare const EMBEDDING_SCHEMA = "\nCREATE NODE TABLE CodeEmbedding (\n nodeId STRING,\n embedding FLOAT[384],\n PRIMARY KEY (nodeId)\n)";
|
|
46
|
+
/**
|
|
47
|
+
* Create vector index for semantic search
|
|
48
|
+
* Uses HNSW (Hierarchical Navigable Small World) algorithm with cosine similarity
|
|
49
|
+
*/
|
|
50
|
+
export declare const CREATE_VECTOR_INDEX_QUERY = "\nCALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine')\n";
|
|
51
|
+
export declare const NODE_SCHEMA_QUERIES: string[];
|
|
52
|
+
export declare const REL_SCHEMA_QUERIES: string[];
|
|
53
|
+
export declare const SCHEMA_QUERIES: string[];
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* KuzuDB Schema Definitions
|
|
3
|
+
*
|
|
4
|
+
* Hybrid Schema:
|
|
5
|
+
* - Separate node tables for each code element type (File, Function, Class, etc.)
|
|
6
|
+
* - Single CodeRelation table with 'type' property for all relationships
|
|
7
|
+
*
|
|
8
|
+
* This allows LLMs to write natural Cypher queries like:
|
|
9
|
+
* MATCH (f:Function)-[r:CodeRelation {type: 'CALLS'}]->(g:Function) RETURN f, g
|
|
10
|
+
*/
|
|
11
|
+
// ============================================================================
|
|
12
|
+
// NODE TABLE NAMES
|
|
13
|
+
// ============================================================================
|
|
14
|
+
export const NODE_TABLES = [
|
|
15
|
+
'File', 'Folder', 'Function', 'Class', 'Interface', 'Method', 'CodeElement', 'Community', 'Process',
|
|
16
|
+
// Multi-language support
|
|
17
|
+
'Struct', 'Enum', 'Macro', 'Typedef', 'Union', 'Namespace', 'Trait', 'Impl',
|
|
18
|
+
'TypeAlias', 'Const', 'Static', 'Property', 'Record', 'Delegate', 'Annotation', 'Constructor', 'Template', 'Module'
|
|
19
|
+
];
|
|
20
|
+
// ============================================================================
|
|
21
|
+
// RELATION TABLE
|
|
22
|
+
// ============================================================================
|
|
23
|
+
export const REL_TABLE_NAME = 'CodeRelation';
|
|
24
|
+
// Valid relation types
|
|
25
|
+
export const REL_TYPES = ['CONTAINS', 'DEFINES', 'IMPORTS', 'CALLS', 'EXTENDS', 'IMPLEMENTS', 'MEMBER_OF', 'STEP_IN_PROCESS'];
|
|
26
|
+
// ============================================================================
|
|
27
|
+
// EMBEDDING TABLE
|
|
28
|
+
// ============================================================================
|
|
29
|
+
export const EMBEDDING_TABLE_NAME = 'CodeEmbedding';
|
|
30
|
+
// ============================================================================
|
|
31
|
+
// NODE TABLE SCHEMAS
|
|
32
|
+
// ============================================================================
|
|
33
|
+
export const FILE_SCHEMA = `
|
|
34
|
+
CREATE NODE TABLE File (
|
|
35
|
+
id STRING,
|
|
36
|
+
name STRING,
|
|
37
|
+
filePath STRING,
|
|
38
|
+
content STRING,
|
|
39
|
+
PRIMARY KEY (id)
|
|
40
|
+
)`;
|
|
41
|
+
export const FOLDER_SCHEMA = `
|
|
42
|
+
CREATE NODE TABLE Folder (
|
|
43
|
+
id STRING,
|
|
44
|
+
name STRING,
|
|
45
|
+
filePath STRING,
|
|
46
|
+
PRIMARY KEY (id)
|
|
47
|
+
)`;
|
|
48
|
+
export const FUNCTION_SCHEMA = `
|
|
49
|
+
CREATE NODE TABLE Function (
|
|
50
|
+
id STRING,
|
|
51
|
+
name STRING,
|
|
52
|
+
filePath STRING,
|
|
53
|
+
startLine INT64,
|
|
54
|
+
endLine INT64,
|
|
55
|
+
isExported BOOLEAN,
|
|
56
|
+
content STRING,
|
|
57
|
+
PRIMARY KEY (id)
|
|
58
|
+
)`;
|
|
59
|
+
export const CLASS_SCHEMA = `
|
|
60
|
+
CREATE NODE TABLE Class (
|
|
61
|
+
id STRING,
|
|
62
|
+
name STRING,
|
|
63
|
+
filePath STRING,
|
|
64
|
+
startLine INT64,
|
|
65
|
+
endLine INT64,
|
|
66
|
+
isExported BOOLEAN,
|
|
67
|
+
content STRING,
|
|
68
|
+
PRIMARY KEY (id)
|
|
69
|
+
)`;
|
|
70
|
+
export const INTERFACE_SCHEMA = `
|
|
71
|
+
CREATE NODE TABLE Interface (
|
|
72
|
+
id STRING,
|
|
73
|
+
name STRING,
|
|
74
|
+
filePath STRING,
|
|
75
|
+
startLine INT64,
|
|
76
|
+
endLine INT64,
|
|
77
|
+
isExported BOOLEAN,
|
|
78
|
+
content STRING,
|
|
79
|
+
PRIMARY KEY (id)
|
|
80
|
+
)`;
|
|
81
|
+
export const METHOD_SCHEMA = `
|
|
82
|
+
CREATE NODE TABLE Method (
|
|
83
|
+
id STRING,
|
|
84
|
+
name STRING,
|
|
85
|
+
filePath STRING,
|
|
86
|
+
startLine INT64,
|
|
87
|
+
endLine INT64,
|
|
88
|
+
isExported BOOLEAN,
|
|
89
|
+
content STRING,
|
|
90
|
+
PRIMARY KEY (id)
|
|
91
|
+
)`;
|
|
92
|
+
export const CODE_ELEMENT_SCHEMA = `
|
|
93
|
+
CREATE NODE TABLE CodeElement (
|
|
94
|
+
id STRING,
|
|
95
|
+
name STRING,
|
|
96
|
+
filePath STRING,
|
|
97
|
+
startLine INT64,
|
|
98
|
+
endLine INT64,
|
|
99
|
+
isExported BOOLEAN,
|
|
100
|
+
content STRING,
|
|
101
|
+
PRIMARY KEY (id)
|
|
102
|
+
)`;
|
|
103
|
+
// ============================================================================
|
|
104
|
+
// COMMUNITY NODE TABLE (for Leiden algorithm clusters)
|
|
105
|
+
// ============================================================================
|
|
106
|
+
export const COMMUNITY_SCHEMA = `
|
|
107
|
+
CREATE NODE TABLE Community (
|
|
108
|
+
id STRING,
|
|
109
|
+
label STRING,
|
|
110
|
+
heuristicLabel STRING,
|
|
111
|
+
keywords STRING[],
|
|
112
|
+
description STRING,
|
|
113
|
+
enrichedBy STRING,
|
|
114
|
+
cohesion DOUBLE,
|
|
115
|
+
symbolCount INT32,
|
|
116
|
+
PRIMARY KEY (id)
|
|
117
|
+
)`;
|
|
118
|
+
// ============================================================================
|
|
119
|
+
// PROCESS NODE TABLE (for execution flow detection)
|
|
120
|
+
// ============================================================================
|
|
121
|
+
export const PROCESS_SCHEMA = `
|
|
122
|
+
CREATE NODE TABLE Process (
|
|
123
|
+
id STRING,
|
|
124
|
+
label STRING,
|
|
125
|
+
heuristicLabel STRING,
|
|
126
|
+
processType STRING,
|
|
127
|
+
stepCount INT32,
|
|
128
|
+
communities STRING[],
|
|
129
|
+
entryPointId STRING,
|
|
130
|
+
terminalId STRING,
|
|
131
|
+
PRIMARY KEY (id)
|
|
132
|
+
)`;
|
|
133
|
+
// ============================================================================
|
|
134
|
+
// MULTI-LANGUAGE NODE TABLE SCHEMAS
|
|
135
|
+
// ============================================================================
|
|
136
|
+
// Generic code element with startLine/endLine for C, C++, Rust, Go, Java, C#
|
|
137
|
+
const CODE_ELEMENT_BASE = (name) => `
|
|
138
|
+
CREATE NODE TABLE \`${name}\` (
|
|
139
|
+
id STRING,
|
|
140
|
+
name STRING,
|
|
141
|
+
filePath STRING,
|
|
142
|
+
startLine INT64,
|
|
143
|
+
endLine INT64,
|
|
144
|
+
content STRING,
|
|
145
|
+
PRIMARY KEY (id)
|
|
146
|
+
)`;
|
|
147
|
+
export const STRUCT_SCHEMA = CODE_ELEMENT_BASE('Struct');
|
|
148
|
+
export const ENUM_SCHEMA = CODE_ELEMENT_BASE('Enum');
|
|
149
|
+
export const MACRO_SCHEMA = CODE_ELEMENT_BASE('Macro');
|
|
150
|
+
export const TYPEDEF_SCHEMA = CODE_ELEMENT_BASE('Typedef');
|
|
151
|
+
export const UNION_SCHEMA = CODE_ELEMENT_BASE('Union');
|
|
152
|
+
export const NAMESPACE_SCHEMA = CODE_ELEMENT_BASE('Namespace');
|
|
153
|
+
export const TRAIT_SCHEMA = CODE_ELEMENT_BASE('Trait');
|
|
154
|
+
export const IMPL_SCHEMA = CODE_ELEMENT_BASE('Impl');
|
|
155
|
+
export const TYPE_ALIAS_SCHEMA = CODE_ELEMENT_BASE('TypeAlias');
|
|
156
|
+
export const CONST_SCHEMA = CODE_ELEMENT_BASE('Const');
|
|
157
|
+
export const STATIC_SCHEMA = CODE_ELEMENT_BASE('Static');
|
|
158
|
+
export const PROPERTY_SCHEMA = CODE_ELEMENT_BASE('Property');
|
|
159
|
+
export const RECORD_SCHEMA = CODE_ELEMENT_BASE('Record');
|
|
160
|
+
export const DELEGATE_SCHEMA = CODE_ELEMENT_BASE('Delegate');
|
|
161
|
+
export const ANNOTATION_SCHEMA = CODE_ELEMENT_BASE('Annotation');
|
|
162
|
+
export const CONSTRUCTOR_SCHEMA = CODE_ELEMENT_BASE('Constructor');
|
|
163
|
+
export const TEMPLATE_SCHEMA = CODE_ELEMENT_BASE('Template');
|
|
164
|
+
export const MODULE_SCHEMA = CODE_ELEMENT_BASE('Module');
|
|
165
|
+
// ============================================================================
|
|
166
|
+
// RELATION TABLE SCHEMA
|
|
167
|
+
// Single table with 'type' property - connects all node tables
|
|
168
|
+
// ============================================================================
|
|
169
|
+
export const RELATION_SCHEMA = `
|
|
170
|
+
CREATE REL TABLE ${REL_TABLE_NAME} (
|
|
171
|
+
FROM File TO File,
|
|
172
|
+
FROM File TO Folder,
|
|
173
|
+
FROM File TO Function,
|
|
174
|
+
FROM File TO Class,
|
|
175
|
+
FROM File TO Interface,
|
|
176
|
+
FROM File TO Method,
|
|
177
|
+
FROM File TO CodeElement,
|
|
178
|
+
FROM File TO \`Struct\`,
|
|
179
|
+
FROM File TO \`Enum\`,
|
|
180
|
+
FROM File TO \`Macro\`,
|
|
181
|
+
FROM File TO \`Typedef\`,
|
|
182
|
+
FROM File TO \`Union\`,
|
|
183
|
+
FROM File TO \`Namespace\`,
|
|
184
|
+
FROM File TO \`Trait\`,
|
|
185
|
+
FROM File TO \`Impl\`,
|
|
186
|
+
FROM File TO \`TypeAlias\`,
|
|
187
|
+
FROM File TO \`Const\`,
|
|
188
|
+
FROM File TO \`Static\`,
|
|
189
|
+
FROM File TO \`Property\`,
|
|
190
|
+
FROM File TO \`Record\`,
|
|
191
|
+
FROM File TO \`Delegate\`,
|
|
192
|
+
FROM File TO \`Annotation\`,
|
|
193
|
+
FROM File TO \`Constructor\`,
|
|
194
|
+
FROM File TO \`Template\`,
|
|
195
|
+
FROM File TO \`Module\`,
|
|
196
|
+
FROM Folder TO Folder,
|
|
197
|
+
FROM Folder TO File,
|
|
198
|
+
FROM Function TO Function,
|
|
199
|
+
FROM Function TO Method,
|
|
200
|
+
FROM Function TO Class,
|
|
201
|
+
FROM Function TO Community,
|
|
202
|
+
FROM Function TO \`Macro\`,
|
|
203
|
+
FROM Function TO \`Struct\`,
|
|
204
|
+
FROM Function TO \`Template\`,
|
|
205
|
+
FROM Function TO \`Enum\`,
|
|
206
|
+
FROM Function TO \`Namespace\`,
|
|
207
|
+
FROM Function TO \`TypeAlias\`,
|
|
208
|
+
FROM Function TO \`Module\`,
|
|
209
|
+
FROM Function TO \`Impl\`,
|
|
210
|
+
FROM Function TO Interface,
|
|
211
|
+
FROM Function TO \`Constructor\`,
|
|
212
|
+
FROM Class TO Method,
|
|
213
|
+
FROM Class TO Function,
|
|
214
|
+
FROM Class TO Class,
|
|
215
|
+
FROM Class TO Interface,
|
|
216
|
+
FROM Class TO Community,
|
|
217
|
+
FROM Class TO \`Template\`,
|
|
218
|
+
FROM Class TO \`TypeAlias\`,
|
|
219
|
+
FROM Class TO \`Struct\`,
|
|
220
|
+
FROM Class TO \`Enum\`,
|
|
221
|
+
FROM Class TO \`Constructor\`,
|
|
222
|
+
FROM Method TO Function,
|
|
223
|
+
FROM Method TO Method,
|
|
224
|
+
FROM Method TO Class,
|
|
225
|
+
FROM Method TO Community,
|
|
226
|
+
FROM Method TO \`Template\`,
|
|
227
|
+
FROM Method TO \`Struct\`,
|
|
228
|
+
FROM Method TO \`TypeAlias\`,
|
|
229
|
+
FROM Method TO \`Enum\`,
|
|
230
|
+
FROM Method TO \`Macro\`,
|
|
231
|
+
FROM Method TO \`Namespace\`,
|
|
232
|
+
FROM Method TO \`Module\`,
|
|
233
|
+
FROM Method TO \`Impl\`,
|
|
234
|
+
FROM Method TO Interface,
|
|
235
|
+
FROM Method TO \`Constructor\`,
|
|
236
|
+
FROM \`Template\` TO \`Template\`,
|
|
237
|
+
FROM \`Template\` TO Function,
|
|
238
|
+
FROM \`Template\` TO Method,
|
|
239
|
+
FROM \`Template\` TO Class,
|
|
240
|
+
FROM \`Template\` TO \`Struct\`,
|
|
241
|
+
FROM \`Template\` TO \`TypeAlias\`,
|
|
242
|
+
FROM \`Template\` TO \`Enum\`,
|
|
243
|
+
FROM \`Template\` TO \`Macro\`,
|
|
244
|
+
FROM \`Template\` TO Interface,
|
|
245
|
+
FROM \`Template\` TO \`Constructor\`,
|
|
246
|
+
FROM \`Module\` TO \`Module\`,
|
|
247
|
+
FROM CodeElement TO Community,
|
|
248
|
+
FROM Interface TO Community,
|
|
249
|
+
FROM Interface TO Function,
|
|
250
|
+
FROM Interface TO Method,
|
|
251
|
+
FROM Interface TO Class,
|
|
252
|
+
FROM Interface TO Interface,
|
|
253
|
+
FROM Interface TO \`TypeAlias\`,
|
|
254
|
+
FROM Interface TO \`Struct\`,
|
|
255
|
+
FROM Interface TO \`Constructor\`,
|
|
256
|
+
FROM \`Struct\` TO Community,
|
|
257
|
+
FROM \`Struct\` TO \`Trait\`,
|
|
258
|
+
FROM \`Struct\` TO Function,
|
|
259
|
+
FROM \`Struct\` TO Method,
|
|
260
|
+
FROM \`Enum\` TO Community,
|
|
261
|
+
FROM \`Macro\` TO Community,
|
|
262
|
+
FROM \`Macro\` TO Function,
|
|
263
|
+
FROM \`Macro\` TO Method,
|
|
264
|
+
FROM \`Module\` TO Function,
|
|
265
|
+
FROM \`Module\` TO Method,
|
|
266
|
+
FROM \`Typedef\` TO Community,
|
|
267
|
+
FROM \`Union\` TO Community,
|
|
268
|
+
FROM \`Namespace\` TO Community,
|
|
269
|
+
FROM \`Trait\` TO Community,
|
|
270
|
+
FROM \`Impl\` TO Community,
|
|
271
|
+
FROM \`Impl\` TO \`Trait\`,
|
|
272
|
+
FROM \`TypeAlias\` TO Community,
|
|
273
|
+
FROM \`Const\` TO Community,
|
|
274
|
+
FROM \`Static\` TO Community,
|
|
275
|
+
FROM \`Property\` TO Community,
|
|
276
|
+
FROM \`Record\` TO Community,
|
|
277
|
+
FROM \`Delegate\` TO Community,
|
|
278
|
+
FROM \`Annotation\` TO Community,
|
|
279
|
+
FROM \`Constructor\` TO Community,
|
|
280
|
+
FROM \`Constructor\` TO Interface,
|
|
281
|
+
FROM \`Constructor\` TO Class,
|
|
282
|
+
FROM \`Constructor\` TO Method,
|
|
283
|
+
FROM \`Constructor\` TO Function,
|
|
284
|
+
FROM \`Constructor\` TO \`Constructor\`,
|
|
285
|
+
FROM \`Constructor\` TO \`Struct\`,
|
|
286
|
+
FROM \`Constructor\` TO \`Macro\`,
|
|
287
|
+
FROM \`Constructor\` TO \`Template\`,
|
|
288
|
+
FROM \`Constructor\` TO \`TypeAlias\`,
|
|
289
|
+
FROM \`Constructor\` TO \`Enum\`,
|
|
290
|
+
FROM \`Constructor\` TO \`Impl\`,
|
|
291
|
+
FROM \`Constructor\` TO \`Namespace\`,
|
|
292
|
+
FROM \`Template\` TO Community,
|
|
293
|
+
FROM \`Module\` TO Community,
|
|
294
|
+
FROM Function TO Process,
|
|
295
|
+
FROM Method TO Process,
|
|
296
|
+
FROM Class TO Process,
|
|
297
|
+
FROM Interface TO Process,
|
|
298
|
+
FROM \`Struct\` TO Process,
|
|
299
|
+
FROM \`Constructor\` TO Process,
|
|
300
|
+
FROM \`Module\` TO Process,
|
|
301
|
+
FROM \`Macro\` TO Process,
|
|
302
|
+
FROM \`Impl\` TO Process,
|
|
303
|
+
FROM \`Typedef\` TO Process,
|
|
304
|
+
FROM \`TypeAlias\` TO Process,
|
|
305
|
+
FROM \`Enum\` TO Process,
|
|
306
|
+
FROM \`Union\` TO Process,
|
|
307
|
+
FROM \`Namespace\` TO Process,
|
|
308
|
+
FROM \`Trait\` TO Process,
|
|
309
|
+
FROM \`Const\` TO Process,
|
|
310
|
+
FROM \`Static\` TO Process,
|
|
311
|
+
FROM \`Property\` TO Process,
|
|
312
|
+
FROM \`Record\` TO Process,
|
|
313
|
+
FROM \`Delegate\` TO Process,
|
|
314
|
+
FROM \`Annotation\` TO Process,
|
|
315
|
+
FROM \`Template\` TO Process,
|
|
316
|
+
FROM CodeElement TO Process,
|
|
317
|
+
type STRING,
|
|
318
|
+
confidence DOUBLE,
|
|
319
|
+
reason STRING,
|
|
320
|
+
step INT32
|
|
321
|
+
)`;
|
|
322
|
+
// ============================================================================
|
|
323
|
+
// EMBEDDING TABLE SCHEMA
|
|
324
|
+
// Separate table for vector storage to avoid copy-on-write overhead
|
|
325
|
+
// ============================================================================
|
|
326
|
+
export const EMBEDDING_SCHEMA = `
|
|
327
|
+
CREATE NODE TABLE ${EMBEDDING_TABLE_NAME} (
|
|
328
|
+
nodeId STRING,
|
|
329
|
+
embedding FLOAT[384],
|
|
330
|
+
PRIMARY KEY (nodeId)
|
|
331
|
+
)`;
|
|
332
|
+
/**
|
|
333
|
+
* Create vector index for semantic search
|
|
334
|
+
* Uses HNSW (Hierarchical Navigable Small World) algorithm with cosine similarity
|
|
335
|
+
*/
|
|
336
|
+
export const CREATE_VECTOR_INDEX_QUERY = `
|
|
337
|
+
CALL CREATE_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', 'code_embedding_idx', 'embedding', metric := 'cosine')
|
|
338
|
+
`;
|
|
339
|
+
// ============================================================================
|
|
340
|
+
// ALL SCHEMA QUERIES IN ORDER
|
|
341
|
+
// Node tables must be created before relationship tables that reference them
|
|
342
|
+
// ============================================================================
|
|
343
|
+
export const NODE_SCHEMA_QUERIES = [
|
|
344
|
+
FILE_SCHEMA,
|
|
345
|
+
FOLDER_SCHEMA,
|
|
346
|
+
FUNCTION_SCHEMA,
|
|
347
|
+
CLASS_SCHEMA,
|
|
348
|
+
INTERFACE_SCHEMA,
|
|
349
|
+
METHOD_SCHEMA,
|
|
350
|
+
CODE_ELEMENT_SCHEMA,
|
|
351
|
+
COMMUNITY_SCHEMA,
|
|
352
|
+
PROCESS_SCHEMA,
|
|
353
|
+
// Multi-language support
|
|
354
|
+
STRUCT_SCHEMA,
|
|
355
|
+
ENUM_SCHEMA,
|
|
356
|
+
MACRO_SCHEMA,
|
|
357
|
+
TYPEDEF_SCHEMA,
|
|
358
|
+
UNION_SCHEMA,
|
|
359
|
+
NAMESPACE_SCHEMA,
|
|
360
|
+
TRAIT_SCHEMA,
|
|
361
|
+
IMPL_SCHEMA,
|
|
362
|
+
TYPE_ALIAS_SCHEMA,
|
|
363
|
+
CONST_SCHEMA,
|
|
364
|
+
STATIC_SCHEMA,
|
|
365
|
+
PROPERTY_SCHEMA,
|
|
366
|
+
RECORD_SCHEMA,
|
|
367
|
+
DELEGATE_SCHEMA,
|
|
368
|
+
ANNOTATION_SCHEMA,
|
|
369
|
+
CONSTRUCTOR_SCHEMA,
|
|
370
|
+
TEMPLATE_SCHEMA,
|
|
371
|
+
MODULE_SCHEMA,
|
|
372
|
+
];
|
|
373
|
+
export const REL_SCHEMA_QUERIES = [
|
|
374
|
+
RELATION_SCHEMA,
|
|
375
|
+
];
|
|
376
|
+
export const SCHEMA_QUERIES = [
|
|
377
|
+
...NODE_SCHEMA_QUERIES,
|
|
378
|
+
...REL_SCHEMA_QUERIES,
|
|
379
|
+
EMBEDDING_SCHEMA,
|
|
380
|
+
];
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Full-Text Search via KuzuDB FTS
|
|
3
|
+
*
|
|
4
|
+
* Uses KuzuDB's built-in full-text search indexes for keyword-based search.
|
|
5
|
+
* Always reads from the database (no cached state to drift).
|
|
6
|
+
*/
|
|
7
|
+
export interface BM25SearchResult {
|
|
8
|
+
filePath: string;
|
|
9
|
+
score: number;
|
|
10
|
+
rank: number;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Search using KuzuDB's built-in FTS (always fresh, reads from disk)
|
|
14
|
+
*
|
|
15
|
+
* Queries multiple node tables (File, Function, Class, Method) in parallel
|
|
16
|
+
* and merges results by filePath, summing scores for the same file.
|
|
17
|
+
*
|
|
18
|
+
* @param query - Search query string
|
|
19
|
+
* @param limit - Maximum results
|
|
20
|
+
* @returns Ranked search results from FTS indexes
|
|
21
|
+
*/
|
|
22
|
+
export declare const searchFTSFromKuzu: (query: string, limit?: number) => Promise<BM25SearchResult[]>;
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Full-Text Search via KuzuDB FTS
|
|
3
|
+
*
|
|
4
|
+
* Uses KuzuDB's built-in full-text search indexes for keyword-based search.
|
|
5
|
+
* Always reads from the database (no cached state to drift).
|
|
6
|
+
*/
|
|
7
|
+
import { queryFTS } from '../kuzu/kuzu-adapter.js';
|
|
8
|
+
/**
|
|
9
|
+
* Search using KuzuDB's built-in FTS (always fresh, reads from disk)
|
|
10
|
+
*
|
|
11
|
+
* Queries multiple node tables (File, Function, Class, Method) in parallel
|
|
12
|
+
* and merges results by filePath, summing scores for the same file.
|
|
13
|
+
*
|
|
14
|
+
* @param query - Search query string
|
|
15
|
+
* @param limit - Maximum results
|
|
16
|
+
* @returns Ranked search results from FTS indexes
|
|
17
|
+
*/
|
|
18
|
+
export const searchFTSFromKuzu = async (query, limit = 20) => {
|
|
19
|
+
// Search multiple tables with searchable content
|
|
20
|
+
const [fileResults, functionResults, classResults, methodResults] = await Promise.all([
|
|
21
|
+
queryFTS('File', 'file_fts', query, limit, false).catch(() => []),
|
|
22
|
+
queryFTS('Function', 'function_fts', query, limit, false).catch(() => []),
|
|
23
|
+
queryFTS('Class', 'class_fts', query, limit, false).catch(() => []),
|
|
24
|
+
queryFTS('Method', 'method_fts', query, limit, false).catch(() => []),
|
|
25
|
+
]);
|
|
26
|
+
// Merge results by filePath, summing scores for same file
|
|
27
|
+
const merged = new Map();
|
|
28
|
+
const addResults = (results) => {
|
|
29
|
+
for (const r of results) {
|
|
30
|
+
const existing = merged.get(r.filePath);
|
|
31
|
+
if (existing) {
|
|
32
|
+
existing.score += r.score;
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
merged.set(r.filePath, { filePath: r.filePath, score: r.score });
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
addResults(fileResults);
|
|
40
|
+
addResults(functionResults);
|
|
41
|
+
addResults(classResults);
|
|
42
|
+
addResults(methodResults);
|
|
43
|
+
// Sort by score descending and add rank
|
|
44
|
+
const sorted = Array.from(merged.values())
|
|
45
|
+
.sort((a, b) => b.score - a.score)
|
|
46
|
+
.slice(0, limit);
|
|
47
|
+
return sorted.map((r, index) => ({
|
|
48
|
+
filePath: r.filePath,
|
|
49
|
+
score: r.score,
|
|
50
|
+
rank: index + 1,
|
|
51
|
+
}));
|
|
52
|
+
};
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid Search with Reciprocal Rank Fusion (RRF)
|
|
3
|
+
*
|
|
4
|
+
* Combines BM25 (keyword) and semantic (embedding) search results.
|
|
5
|
+
* Uses RRF to merge rankings without needing score normalization.
|
|
6
|
+
*
|
|
7
|
+
* This is the same approach used by Elasticsearch, Pinecone, and other
|
|
8
|
+
* production search systems.
|
|
9
|
+
*/
|
|
10
|
+
import { type BM25SearchResult } from './bm25-index.js';
|
|
11
|
+
import type { SemanticSearchResult } from '../embeddings/types.js';
|
|
12
|
+
export interface HybridSearchResult {
|
|
13
|
+
filePath: string;
|
|
14
|
+
score: number;
|
|
15
|
+
rank: number;
|
|
16
|
+
sources: ('bm25' | 'semantic')[];
|
|
17
|
+
nodeId?: string;
|
|
18
|
+
name?: string;
|
|
19
|
+
label?: string;
|
|
20
|
+
startLine?: number;
|
|
21
|
+
endLine?: number;
|
|
22
|
+
bm25Score?: number;
|
|
23
|
+
semanticScore?: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Perform hybrid search combining BM25 and semantic results
|
|
27
|
+
*
|
|
28
|
+
* @param bm25Results - Results from BM25 keyword search
|
|
29
|
+
* @param semanticResults - Results from semantic/embedding search
|
|
30
|
+
* @param limit - Maximum results to return
|
|
31
|
+
* @returns Merged and re-ranked results
|
|
32
|
+
*/
|
|
33
|
+
export declare const mergeWithRRF: (bm25Results: BM25SearchResult[], semanticResults: SemanticSearchResult[], limit?: number) => HybridSearchResult[];
|
|
34
|
+
/**
|
|
35
|
+
* Check if hybrid search is available
|
|
36
|
+
* KuzuDB FTS is always available once the database is initialized.
|
|
37
|
+
* Semantic search is optional - hybrid works with just FTS if embeddings aren't ready.
|
|
38
|
+
*/
|
|
39
|
+
export declare const isHybridSearchReady: () => boolean;
|
|
40
|
+
/**
|
|
41
|
+
* Format hybrid results for LLM consumption
|
|
42
|
+
*/
|
|
43
|
+
export declare const formatHybridResults: (results: HybridSearchResult[]) => string;
|
|
44
|
+
/**
|
|
45
|
+
* Execute BM25 + semantic search and merge with RRF.
|
|
46
|
+
* Uses KuzuDB FTS for always-fresh BM25 results (no cached data).
|
|
47
|
+
* The semanticSearch function is injected to keep this module environment-agnostic.
|
|
48
|
+
*/
|
|
49
|
+
export declare const hybridSearch: (query: string, limit: number, executeQuery: (cypher: string) => Promise<any[]>, semanticSearch: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number) => Promise<SemanticSearchResult[]>) => Promise<HybridSearchResult[]>;
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid Search with Reciprocal Rank Fusion (RRF)
|
|
3
|
+
*
|
|
4
|
+
* Combines BM25 (keyword) and semantic (embedding) search results.
|
|
5
|
+
* Uses RRF to merge rankings without needing score normalization.
|
|
6
|
+
*
|
|
7
|
+
* This is the same approach used by Elasticsearch, Pinecone, and other
|
|
8
|
+
* production search systems.
|
|
9
|
+
*/
|
|
10
|
+
import { searchFTSFromKuzu } from './bm25-index.js';
|
|
11
|
+
/**
|
|
12
|
+
* RRF constant - standard value used in the literature
|
|
13
|
+
* Higher values give more weight to lower-ranked results
|
|
14
|
+
*/
|
|
15
|
+
const RRF_K = 60;
|
|
16
|
+
/**
|
|
17
|
+
* Perform hybrid search combining BM25 and semantic results
|
|
18
|
+
*
|
|
19
|
+
* @param bm25Results - Results from BM25 keyword search
|
|
20
|
+
* @param semanticResults - Results from semantic/embedding search
|
|
21
|
+
* @param limit - Maximum results to return
|
|
22
|
+
* @returns Merged and re-ranked results
|
|
23
|
+
*/
|
|
24
|
+
export const mergeWithRRF = (bm25Results, semanticResults, limit = 10) => {
|
|
25
|
+
const merged = new Map();
|
|
26
|
+
// Process BM25 results
|
|
27
|
+
for (let i = 0; i < bm25Results.length; i++) {
|
|
28
|
+
const r = bm25Results[i];
|
|
29
|
+
const rrfScore = 1 / (RRF_K + i + 1); // i+1 because rank starts at 1
|
|
30
|
+
merged.set(r.filePath, {
|
|
31
|
+
filePath: r.filePath,
|
|
32
|
+
score: rrfScore,
|
|
33
|
+
rank: 0, // Will be set after sorting
|
|
34
|
+
sources: ['bm25'],
|
|
35
|
+
bm25Score: r.score,
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
// Process semantic results and merge
|
|
39
|
+
for (let i = 0; i < semanticResults.length; i++) {
|
|
40
|
+
const r = semanticResults[i];
|
|
41
|
+
const rrfScore = 1 / (RRF_K + i + 1);
|
|
42
|
+
const existing = merged.get(r.filePath);
|
|
43
|
+
if (existing) {
|
|
44
|
+
// Found by both methods - add scores
|
|
45
|
+
existing.score += rrfScore;
|
|
46
|
+
existing.sources.push('semantic');
|
|
47
|
+
existing.semanticScore = 1 - r.distance;
|
|
48
|
+
// Add semantic metadata
|
|
49
|
+
existing.nodeId = r.nodeId;
|
|
50
|
+
existing.name = r.name;
|
|
51
|
+
existing.label = r.label;
|
|
52
|
+
existing.startLine = r.startLine;
|
|
53
|
+
existing.endLine = r.endLine;
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
// Only found by semantic
|
|
57
|
+
merged.set(r.filePath, {
|
|
58
|
+
filePath: r.filePath,
|
|
59
|
+
score: rrfScore,
|
|
60
|
+
rank: 0,
|
|
61
|
+
sources: ['semantic'],
|
|
62
|
+
semanticScore: 1 - r.distance,
|
|
63
|
+
nodeId: r.nodeId,
|
|
64
|
+
name: r.name,
|
|
65
|
+
label: r.label,
|
|
66
|
+
startLine: r.startLine,
|
|
67
|
+
endLine: r.endLine,
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Sort by RRF score descending
|
|
72
|
+
const sorted = Array.from(merged.values())
|
|
73
|
+
.sort((a, b) => b.score - a.score)
|
|
74
|
+
.slice(0, limit);
|
|
75
|
+
// Assign final ranks
|
|
76
|
+
sorted.forEach((r, i) => {
|
|
77
|
+
r.rank = i + 1;
|
|
78
|
+
});
|
|
79
|
+
return sorted;
|
|
80
|
+
};
|
|
81
|
+
/**
|
|
82
|
+
* Check if hybrid search is available
|
|
83
|
+
* KuzuDB FTS is always available once the database is initialized.
|
|
84
|
+
* Semantic search is optional - hybrid works with just FTS if embeddings aren't ready.
|
|
85
|
+
*/
|
|
86
|
+
export const isHybridSearchReady = () => {
|
|
87
|
+
return true; // FTS is always available via KuzuDB when DB is open
|
|
88
|
+
};
|
|
89
|
+
/**
|
|
90
|
+
* Format hybrid results for LLM consumption
|
|
91
|
+
*/
|
|
92
|
+
export const formatHybridResults = (results) => {
|
|
93
|
+
if (results.length === 0) {
|
|
94
|
+
return 'No results found.';
|
|
95
|
+
}
|
|
96
|
+
const formatted = results.map((r, i) => {
|
|
97
|
+
const sources = r.sources.join(' + ');
|
|
98
|
+
const location = r.startLine ? ` (lines ${r.startLine}-${r.endLine})` : '';
|
|
99
|
+
const label = r.label ? `${r.label}: ` : 'File: ';
|
|
100
|
+
const name = r.name || r.filePath.split('/').pop() || r.filePath;
|
|
101
|
+
return `[${i + 1}] ${label}${name}
|
|
102
|
+
File: ${r.filePath}${location}
|
|
103
|
+
Found by: ${sources}
|
|
104
|
+
Relevance: ${r.score.toFixed(4)}`;
|
|
105
|
+
});
|
|
106
|
+
return `Found ${results.length} results:\n\n${formatted.join('\n\n')}`;
|
|
107
|
+
};
|
|
108
|
+
/**
|
|
109
|
+
* Execute BM25 + semantic search and merge with RRF.
|
|
110
|
+
* Uses KuzuDB FTS for always-fresh BM25 results (no cached data).
|
|
111
|
+
* The semanticSearch function is injected to keep this module environment-agnostic.
|
|
112
|
+
*/
|
|
113
|
+
export const hybridSearch = async (query, limit, executeQuery, semanticSearch) => {
|
|
114
|
+
// Use KuzuDB FTS for always-fresh BM25 results
|
|
115
|
+
const bm25Results = await searchFTSFromKuzu(query, limit);
|
|
116
|
+
const semanticResults = await semanticSearch(executeQuery, query, limit);
|
|
117
|
+
return mergeWithRRF(bm25Results, semanticResults, limit);
|
|
118
|
+
};
|