gitnexus 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +181 -0
- package/dist/cli/ai-context.d.ts +21 -0
- package/dist/cli/ai-context.js +219 -0
- package/dist/cli/analyze.d.ts +10 -0
- package/dist/cli/analyze.js +118 -0
- package/dist/cli/clean.d.ts +8 -0
- package/dist/cli/clean.js +29 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +42 -0
- package/dist/cli/list.d.ts +6 -0
- package/dist/cli/list.js +27 -0
- package/dist/cli/mcp.d.ts +7 -0
- package/dist/cli/mcp.js +85 -0
- package/dist/cli/serve.d.ts +3 -0
- package/dist/cli/serve.js +5 -0
- package/dist/cli/status.d.ts +6 -0
- package/dist/cli/status.js +27 -0
- package/dist/config/ignore-service.d.ts +1 -0
- package/dist/config/ignore-service.js +208 -0
- package/dist/config/supported-languages.d.ts +11 -0
- package/dist/config/supported-languages.js +15 -0
- package/dist/core/embeddings/embedder.d.ts +60 -0
- package/dist/core/embeddings/embedder.js +205 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +50 -0
- package/dist/core/embeddings/embedding-pipeline.js +321 -0
- package/dist/core/embeddings/index.d.ts +9 -0
- package/dist/core/embeddings/index.js +9 -0
- package/dist/core/embeddings/text-generator.d.ts +24 -0
- package/dist/core/embeddings/text-generator.js +182 -0
- package/dist/core/embeddings/types.d.ts +87 -0
- package/dist/core/embeddings/types.js +32 -0
- package/dist/core/graph/graph.d.ts +2 -0
- package/dist/core/graph/graph.js +61 -0
- package/dist/core/graph/types.d.ts +50 -0
- package/dist/core/graph/types.js +1 -0
- package/dist/core/ingestion/ast-cache.d.ts +11 -0
- package/dist/core/ingestion/ast-cache.js +34 -0
- package/dist/core/ingestion/call-processor.d.ts +8 -0
- package/dist/core/ingestion/call-processor.js +269 -0
- package/dist/core/ingestion/cluster-enricher.d.ts +38 -0
- package/dist/core/ingestion/cluster-enricher.js +170 -0
- package/dist/core/ingestion/community-processor.d.ts +39 -0
- package/dist/core/ingestion/community-processor.js +269 -0
- package/dist/core/ingestion/entry-point-scoring.d.ts +39 -0
- package/dist/core/ingestion/entry-point-scoring.js +235 -0
- package/dist/core/ingestion/filesystem-walker.d.ts +5 -0
- package/dist/core/ingestion/filesystem-walker.js +26 -0
- package/dist/core/ingestion/framework-detection.d.ts +38 -0
- package/dist/core/ingestion/framework-detection.js +183 -0
- package/dist/core/ingestion/heritage-processor.d.ts +14 -0
- package/dist/core/ingestion/heritage-processor.js +134 -0
- package/dist/core/ingestion/import-processor.d.ts +8 -0
- package/dist/core/ingestion/import-processor.js +490 -0
- package/dist/core/ingestion/parsing-processor.d.ts +8 -0
- package/dist/core/ingestion/parsing-processor.js +249 -0
- package/dist/core/ingestion/pipeline.d.ts +2 -0
- package/dist/core/ingestion/pipeline.js +228 -0
- package/dist/core/ingestion/process-processor.d.ts +51 -0
- package/dist/core/ingestion/process-processor.js +278 -0
- package/dist/core/ingestion/structure-processor.d.ts +2 -0
- package/dist/core/ingestion/structure-processor.js +36 -0
- package/dist/core/ingestion/symbol-table.d.ts +33 -0
- package/dist/core/ingestion/symbol-table.js +38 -0
- package/dist/core/ingestion/tree-sitter-queries.d.ts +11 -0
- package/dist/core/ingestion/tree-sitter-queries.js +319 -0
- package/dist/core/ingestion/utils.d.ts +10 -0
- package/dist/core/ingestion/utils.js +44 -0
- package/dist/core/kuzu/csv-generator.d.ts +22 -0
- package/dist/core/kuzu/csv-generator.js +272 -0
- package/dist/core/kuzu/kuzu-adapter.d.ts +81 -0
- package/dist/core/kuzu/kuzu-adapter.js +568 -0
- package/dist/core/kuzu/schema.d.ts +53 -0
- package/dist/core/kuzu/schema.js +380 -0
- package/dist/core/search/bm25-index.d.ts +22 -0
- package/dist/core/search/bm25-index.js +52 -0
- package/dist/core/search/hybrid-search.d.ts +49 -0
- package/dist/core/search/hybrid-search.js +118 -0
- package/dist/core/tree-sitter/parser-loader.d.ts +4 -0
- package/dist/core/tree-sitter/parser-loader.js +42 -0
- package/dist/lib/utils.d.ts +1 -0
- package/dist/lib/utils.js +3 -0
- package/dist/mcp/core/embedder.d.ts +27 -0
- package/dist/mcp/core/embedder.js +93 -0
- package/dist/mcp/core/kuzu-adapter.d.ts +23 -0
- package/dist/mcp/core/kuzu-adapter.js +62 -0
- package/dist/mcp/local/local-backend.d.ts +73 -0
- package/dist/mcp/local/local-backend.js +752 -0
- package/dist/mcp/resources.d.ts +31 -0
- package/dist/mcp/resources.js +279 -0
- package/dist/mcp/server.d.ts +12 -0
- package/dist/mcp/server.js +130 -0
- package/dist/mcp/staleness.d.ts +15 -0
- package/dist/mcp/staleness.js +29 -0
- package/dist/mcp/tools.d.ts +24 -0
- package/dist/mcp/tools.js +160 -0
- package/dist/server/api.d.ts +6 -0
- package/dist/server/api.js +156 -0
- package/dist/storage/git.d.ts +7 -0
- package/dist/storage/git.js +39 -0
- package/dist/storage/repo-manager.d.ts +61 -0
- package/dist/storage/repo-manager.js +106 -0
- package/dist/types/pipeline.d.ts +28 -0
- package/dist/types/pipeline.js +16 -0
- package/package.json +80 -0
- package/skills/debugging.md +104 -0
- package/skills/exploring.md +112 -0
- package/skills/impact-analysis.md +114 -0
- package/skills/refactoring.md +119 -0
- package/vendor/leiden/index.cjs +355 -0
- package/vendor/leiden/utils.cjs +392 -0
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
import { SupportedLanguages } from '../../config/supported-languages.js';
|
|
2
|
+
/*
|
|
3
|
+
* Tree-sitter queries for extracting code definitions.
|
|
4
|
+
*
|
|
5
|
+
* Note: Different grammars (typescript vs tsx vs javascript) may have
|
|
6
|
+
* slightly different node types. These queries are designed to be
|
|
7
|
+
* compatible with the standard tree-sitter grammars.
|
|
8
|
+
*/
|
|
9
|
+
// TypeScript queries - works with tree-sitter-typescript
|
|
10
|
+
export const TYPESCRIPT_QUERIES = `
|
|
11
|
+
(class_declaration
|
|
12
|
+
name: (type_identifier) @name) @definition.class
|
|
13
|
+
|
|
14
|
+
(interface_declaration
|
|
15
|
+
name: (type_identifier) @name) @definition.interface
|
|
16
|
+
|
|
17
|
+
(function_declaration
|
|
18
|
+
name: (identifier) @name) @definition.function
|
|
19
|
+
|
|
20
|
+
(method_definition
|
|
21
|
+
name: (property_identifier) @name) @definition.method
|
|
22
|
+
|
|
23
|
+
(lexical_declaration
|
|
24
|
+
(variable_declarator
|
|
25
|
+
name: (identifier) @name
|
|
26
|
+
value: (arrow_function))) @definition.function
|
|
27
|
+
|
|
28
|
+
(lexical_declaration
|
|
29
|
+
(variable_declarator
|
|
30
|
+
name: (identifier) @name
|
|
31
|
+
value: (function_expression))) @definition.function
|
|
32
|
+
|
|
33
|
+
(export_statement
|
|
34
|
+
declaration: (lexical_declaration
|
|
35
|
+
(variable_declarator
|
|
36
|
+
name: (identifier) @name
|
|
37
|
+
value: (arrow_function)))) @definition.function
|
|
38
|
+
|
|
39
|
+
(export_statement
|
|
40
|
+
declaration: (lexical_declaration
|
|
41
|
+
(variable_declarator
|
|
42
|
+
name: (identifier) @name
|
|
43
|
+
value: (function_expression)))) @definition.function
|
|
44
|
+
|
|
45
|
+
(import_statement
|
|
46
|
+
source: (string) @import.source) @import
|
|
47
|
+
|
|
48
|
+
(call_expression
|
|
49
|
+
function: (identifier) @call.name) @call
|
|
50
|
+
|
|
51
|
+
(call_expression
|
|
52
|
+
function: (member_expression
|
|
53
|
+
property: (property_identifier) @call.name)) @call
|
|
54
|
+
|
|
55
|
+
; Heritage queries - class extends
|
|
56
|
+
(class_declaration
|
|
57
|
+
name: (type_identifier) @heritage.class
|
|
58
|
+
(class_heritage
|
|
59
|
+
(extends_clause
|
|
60
|
+
value: (identifier) @heritage.extends))) @heritage
|
|
61
|
+
|
|
62
|
+
; Heritage queries - class implements interface
|
|
63
|
+
(class_declaration
|
|
64
|
+
name: (type_identifier) @heritage.class
|
|
65
|
+
(class_heritage
|
|
66
|
+
(implements_clause
|
|
67
|
+
(type_identifier) @heritage.implements))) @heritage.impl
|
|
68
|
+
`;
|
|
69
|
+
// JavaScript queries - works with tree-sitter-javascript
|
|
70
|
+
export const JAVASCRIPT_QUERIES = `
|
|
71
|
+
(class_declaration
|
|
72
|
+
name: (identifier) @name) @definition.class
|
|
73
|
+
|
|
74
|
+
(function_declaration
|
|
75
|
+
name: (identifier) @name) @definition.function
|
|
76
|
+
|
|
77
|
+
(method_definition
|
|
78
|
+
name: (property_identifier) @name) @definition.method
|
|
79
|
+
|
|
80
|
+
(lexical_declaration
|
|
81
|
+
(variable_declarator
|
|
82
|
+
name: (identifier) @name
|
|
83
|
+
value: (arrow_function))) @definition.function
|
|
84
|
+
|
|
85
|
+
(lexical_declaration
|
|
86
|
+
(variable_declarator
|
|
87
|
+
name: (identifier) @name
|
|
88
|
+
value: (function_expression))) @definition.function
|
|
89
|
+
|
|
90
|
+
(export_statement
|
|
91
|
+
declaration: (lexical_declaration
|
|
92
|
+
(variable_declarator
|
|
93
|
+
name: (identifier) @name
|
|
94
|
+
value: (arrow_function)))) @definition.function
|
|
95
|
+
|
|
96
|
+
(export_statement
|
|
97
|
+
declaration: (lexical_declaration
|
|
98
|
+
(variable_declarator
|
|
99
|
+
name: (identifier) @name
|
|
100
|
+
value: (function_expression)))) @definition.function
|
|
101
|
+
|
|
102
|
+
(import_statement
|
|
103
|
+
source: (string) @import.source) @import
|
|
104
|
+
|
|
105
|
+
(call_expression
|
|
106
|
+
function: (identifier) @call.name) @call
|
|
107
|
+
|
|
108
|
+
(call_expression
|
|
109
|
+
function: (member_expression
|
|
110
|
+
property: (property_identifier) @call.name)) @call
|
|
111
|
+
|
|
112
|
+
; Heritage queries - class extends (JavaScript uses different AST than TypeScript)
|
|
113
|
+
; In tree-sitter-javascript, class_heritage directly contains the parent identifier
|
|
114
|
+
(class_declaration
|
|
115
|
+
name: (identifier) @heritage.class
|
|
116
|
+
(class_heritage
|
|
117
|
+
(identifier) @heritage.extends)) @heritage
|
|
118
|
+
`;
|
|
119
|
+
// Python queries - works with tree-sitter-python
|
|
120
|
+
export const PYTHON_QUERIES = `
|
|
121
|
+
(class_definition
|
|
122
|
+
name: (identifier) @name) @definition.class
|
|
123
|
+
|
|
124
|
+
(function_definition
|
|
125
|
+
name: (identifier) @name) @definition.function
|
|
126
|
+
|
|
127
|
+
(import_statement
|
|
128
|
+
name: (dotted_name) @import.source) @import
|
|
129
|
+
|
|
130
|
+
(import_from_statement
|
|
131
|
+
module_name: (dotted_name) @import.source) @import
|
|
132
|
+
|
|
133
|
+
(call
|
|
134
|
+
function: (identifier) @call.name) @call
|
|
135
|
+
|
|
136
|
+
(call
|
|
137
|
+
function: (attribute
|
|
138
|
+
attribute: (identifier) @call.name)) @call
|
|
139
|
+
|
|
140
|
+
; Heritage queries - Python class inheritance
|
|
141
|
+
(class_definition
|
|
142
|
+
name: (identifier) @heritage.class
|
|
143
|
+
superclasses: (argument_list
|
|
144
|
+
(identifier) @heritage.extends)) @heritage
|
|
145
|
+
`;
|
|
146
|
+
// Java queries - works with tree-sitter-java
|
|
147
|
+
export const JAVA_QUERIES = `
|
|
148
|
+
; Classes, Interfaces, Enums, Annotations
|
|
149
|
+
(class_declaration name: (identifier) @name) @definition.class
|
|
150
|
+
(interface_declaration name: (identifier) @name) @definition.interface
|
|
151
|
+
(enum_declaration name: (identifier) @name) @definition.enum
|
|
152
|
+
(annotation_type_declaration name: (identifier) @name) @definition.annotation
|
|
153
|
+
|
|
154
|
+
; Methods & Constructors
|
|
155
|
+
(method_declaration name: (identifier) @name) @definition.method
|
|
156
|
+
(constructor_declaration name: (identifier) @name) @definition.constructor
|
|
157
|
+
|
|
158
|
+
; Imports - capture any import declaration child as source
|
|
159
|
+
(import_declaration (_) @import.source) @import
|
|
160
|
+
|
|
161
|
+
; Calls
|
|
162
|
+
(method_invocation name: (identifier) @call.name) @call
|
|
163
|
+
(method_invocation object: (_) name: (identifier) @call.name) @call
|
|
164
|
+
|
|
165
|
+
; Heritage - extends class
|
|
166
|
+
(class_declaration name: (identifier) @heritage.class
|
|
167
|
+
(superclass (type_identifier) @heritage.extends)) @heritage
|
|
168
|
+
|
|
169
|
+
; Heritage - implements interfaces
|
|
170
|
+
(class_declaration name: (identifier) @heritage.class
|
|
171
|
+
(super_interfaces (type_list (type_identifier) @heritage.implements))) @heritage.impl
|
|
172
|
+
`;
|
|
173
|
+
// C queries - works with tree-sitter-c
|
|
174
|
+
export const C_QUERIES = `
|
|
175
|
+
; Functions
|
|
176
|
+
(function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function
|
|
177
|
+
(declaration declarator: (function_declarator declarator: (identifier) @name)) @definition.function
|
|
178
|
+
|
|
179
|
+
; Structs, Unions, Enums, Typedefs
|
|
180
|
+
(struct_specifier name: (type_identifier) @name) @definition.struct
|
|
181
|
+
(union_specifier name: (type_identifier) @name) @definition.union
|
|
182
|
+
(enum_specifier name: (type_identifier) @name) @definition.enum
|
|
183
|
+
(type_definition declarator: (type_identifier) @name) @definition.typedef
|
|
184
|
+
|
|
185
|
+
; Macros
|
|
186
|
+
(preproc_function_def name: (identifier) @name) @definition.macro
|
|
187
|
+
(preproc_def name: (identifier) @name) @definition.macro
|
|
188
|
+
|
|
189
|
+
; Includes
|
|
190
|
+
(preproc_include path: (_) @import.source) @import
|
|
191
|
+
|
|
192
|
+
; Calls
|
|
193
|
+
(call_expression function: (identifier) @call.name) @call
|
|
194
|
+
(call_expression function: (field_expression field: (field_identifier) @call.name)) @call
|
|
195
|
+
`;
|
|
196
|
+
// Go queries - works with tree-sitter-go
|
|
197
|
+
export const GO_QUERIES = `
|
|
198
|
+
; Functions & Methods
|
|
199
|
+
(function_declaration name: (identifier) @name) @definition.function
|
|
200
|
+
(method_declaration name: (field_identifier) @name) @definition.method
|
|
201
|
+
|
|
202
|
+
; Types
|
|
203
|
+
(type_declaration (type_spec name: (type_identifier) @name type: (struct_type))) @definition.struct
|
|
204
|
+
(type_declaration (type_spec name: (type_identifier) @name type: (interface_type))) @definition.interface
|
|
205
|
+
(type_declaration (type_spec name: (type_identifier) @name)) @definition.type
|
|
206
|
+
|
|
207
|
+
; Imports
|
|
208
|
+
(import_declaration (import_spec path: (interpreted_string_literal) @import.source)) @import
|
|
209
|
+
(import_declaration (import_spec_list (import_spec path: (interpreted_string_literal) @import.source))) @import
|
|
210
|
+
|
|
211
|
+
; Calls
|
|
212
|
+
(call_expression function: (identifier) @call.name) @call
|
|
213
|
+
(call_expression function: (selector_expression field: (field_identifier) @call.name)) @call
|
|
214
|
+
`;
|
|
215
|
+
// C++ queries - works with tree-sitter-cpp
|
|
216
|
+
export const CPP_QUERIES = `
|
|
217
|
+
; Classes, Structs, Namespaces
|
|
218
|
+
(class_specifier name: (type_identifier) @name) @definition.class
|
|
219
|
+
(struct_specifier name: (type_identifier) @name) @definition.struct
|
|
220
|
+
(namespace_definition name: (namespace_identifier) @name) @definition.namespace
|
|
221
|
+
(enum_specifier name: (type_identifier) @name) @definition.enum
|
|
222
|
+
|
|
223
|
+
; Functions & Methods
|
|
224
|
+
(function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function
|
|
225
|
+
(function_definition declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name))) @definition.method
|
|
226
|
+
|
|
227
|
+
; Templates
|
|
228
|
+
(template_declaration (class_specifier name: (type_identifier) @name)) @definition.template
|
|
229
|
+
(template_declaration (function_definition declarator: (function_declarator declarator: (identifier) @name))) @definition.template
|
|
230
|
+
|
|
231
|
+
; Includes
|
|
232
|
+
(preproc_include path: (_) @import.source) @import
|
|
233
|
+
|
|
234
|
+
; Calls
|
|
235
|
+
(call_expression function: (identifier) @call.name) @call
|
|
236
|
+
(call_expression function: (field_expression field: (field_identifier) @call.name)) @call
|
|
237
|
+
(call_expression function: (qualified_identifier name: (identifier) @call.name)) @call
|
|
238
|
+
(call_expression function: (template_function name: (identifier) @call.name)) @call
|
|
239
|
+
|
|
240
|
+
; Heritage
|
|
241
|
+
(class_specifier name: (type_identifier) @heritage.class
|
|
242
|
+
(base_class_clause (type_identifier) @heritage.extends)) @heritage
|
|
243
|
+
(class_specifier name: (type_identifier) @heritage.class
|
|
244
|
+
(base_class_clause (access_specifier) (type_identifier) @heritage.extends)) @heritage
|
|
245
|
+
`;
|
|
246
|
+
// C# queries - works with tree-sitter-c-sharp
|
|
247
|
+
export const CSHARP_QUERIES = `
|
|
248
|
+
; Types
|
|
249
|
+
(class_declaration name: (identifier) @name) @definition.class
|
|
250
|
+
(interface_declaration name: (identifier) @name) @definition.interface
|
|
251
|
+
(struct_declaration name: (identifier) @name) @definition.struct
|
|
252
|
+
(enum_declaration name: (identifier) @name) @definition.enum
|
|
253
|
+
(record_declaration name: (identifier) @name) @definition.record
|
|
254
|
+
(delegate_declaration name: (identifier) @name) @definition.delegate
|
|
255
|
+
|
|
256
|
+
; Namespaces
|
|
257
|
+
(namespace_declaration name: (identifier) @name) @definition.namespace
|
|
258
|
+
(namespace_declaration name: (qualified_name) @name) @definition.namespace
|
|
259
|
+
|
|
260
|
+
; Methods & Properties
|
|
261
|
+
(method_declaration name: (identifier) @name) @definition.method
|
|
262
|
+
(local_function_statement name: (identifier) @name) @definition.function
|
|
263
|
+
(constructor_declaration name: (identifier) @name) @definition.constructor
|
|
264
|
+
(property_declaration name: (identifier) @name) @definition.property
|
|
265
|
+
|
|
266
|
+
; Using
|
|
267
|
+
(using_directive (qualified_name) @import.source) @import
|
|
268
|
+
(using_directive (identifier) @import.source) @import
|
|
269
|
+
|
|
270
|
+
; Calls
|
|
271
|
+
(invocation_expression function: (identifier) @call.name) @call
|
|
272
|
+
(invocation_expression function: (member_access_expression name: (identifier) @call.name)) @call
|
|
273
|
+
|
|
274
|
+
; Heritage
|
|
275
|
+
(class_declaration name: (identifier) @heritage.class
|
|
276
|
+
(base_list (simple_base_type (identifier) @heritage.extends))) @heritage
|
|
277
|
+
(class_declaration name: (identifier) @heritage.class
|
|
278
|
+
(base_list (simple_base_type (generic_name (identifier) @heritage.extends)))) @heritage
|
|
279
|
+
`;
|
|
280
|
+
// Rust queries - works with tree-sitter-rust
|
|
281
|
+
export const RUST_QUERIES = `
|
|
282
|
+
; Functions & Items
|
|
283
|
+
(function_item name: (identifier) @name) @definition.function
|
|
284
|
+
(struct_item name: (type_identifier) @name) @definition.struct
|
|
285
|
+
(enum_item name: (type_identifier) @name) @definition.enum
|
|
286
|
+
(trait_item name: (type_identifier) @name) @definition.trait
|
|
287
|
+
(impl_item type: (type_identifier) @name) @definition.impl
|
|
288
|
+
(mod_item name: (identifier) @name) @definition.module
|
|
289
|
+
|
|
290
|
+
; Type aliases, const, static, macros
|
|
291
|
+
(type_item name: (type_identifier) @name) @definition.type
|
|
292
|
+
(const_item name: (identifier) @name) @definition.const
|
|
293
|
+
(static_item name: (identifier) @name) @definition.static
|
|
294
|
+
(macro_definition name: (identifier) @name) @definition.macro
|
|
295
|
+
|
|
296
|
+
; Use statements
|
|
297
|
+
(use_declaration argument: (_) @import.source) @import
|
|
298
|
+
|
|
299
|
+
; Calls
|
|
300
|
+
(call_expression function: (identifier) @call.name) @call
|
|
301
|
+
(call_expression function: (field_expression field: (field_identifier) @call.name)) @call
|
|
302
|
+
(call_expression function: (scoped_identifier name: (identifier) @call.name)) @call
|
|
303
|
+
(call_expression function: (generic_function function: (identifier) @call.name)) @call
|
|
304
|
+
|
|
305
|
+
; Heritage (trait implementation)
|
|
306
|
+
(impl_item trait: (type_identifier) @heritage.trait type: (type_identifier) @heritage.class) @heritage
|
|
307
|
+
(impl_item trait: (generic_type type: (type_identifier) @heritage.trait) type: (type_identifier) @heritage.class) @heritage
|
|
308
|
+
`;
|
|
309
|
+
export const LANGUAGE_QUERIES = {
|
|
310
|
+
[SupportedLanguages.TypeScript]: TYPESCRIPT_QUERIES,
|
|
311
|
+
[SupportedLanguages.JavaScript]: JAVASCRIPT_QUERIES,
|
|
312
|
+
[SupportedLanguages.Python]: PYTHON_QUERIES,
|
|
313
|
+
[SupportedLanguages.Java]: JAVA_QUERIES,
|
|
314
|
+
[SupportedLanguages.C]: C_QUERIES,
|
|
315
|
+
[SupportedLanguages.Go]: GO_QUERIES,
|
|
316
|
+
[SupportedLanguages.CPlusPlus]: CPP_QUERIES,
|
|
317
|
+
[SupportedLanguages.CSharp]: CSHARP_QUERIES,
|
|
318
|
+
[SupportedLanguages.Rust]: RUST_QUERIES,
|
|
319
|
+
};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { SupportedLanguages } from '../../config/supported-languages.js';
|
|
2
|
+
/**
|
|
3
|
+
* Yield control to the event loop so spinners/progress can render.
|
|
4
|
+
* Call periodically in hot loops to prevent UI freezes.
|
|
5
|
+
*/
|
|
6
|
+
export declare const yieldToEventLoop: () => Promise<void>;
|
|
7
|
+
/**
|
|
8
|
+
* Map file extension to SupportedLanguage enum
|
|
9
|
+
*/
|
|
10
|
+
export declare const getLanguageFromFilename: (filename: string) => SupportedLanguages | null;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { SupportedLanguages } from '../../config/supported-languages.js';
|
|
2
|
+
/**
|
|
3
|
+
* Yield control to the event loop so spinners/progress can render.
|
|
4
|
+
* Call periodically in hot loops to prevent UI freezes.
|
|
5
|
+
*/
|
|
6
|
+
export const yieldToEventLoop = () => new Promise(resolve => setImmediate(resolve));
|
|
7
|
+
/**
|
|
8
|
+
* Map file extension to SupportedLanguage enum
|
|
9
|
+
*/
|
|
10
|
+
export const getLanguageFromFilename = (filename) => {
|
|
11
|
+
// TypeScript (including TSX)
|
|
12
|
+
if (filename.endsWith('.tsx'))
|
|
13
|
+
return SupportedLanguages.TypeScript;
|
|
14
|
+
if (filename.endsWith('.ts'))
|
|
15
|
+
return SupportedLanguages.TypeScript;
|
|
16
|
+
// JavaScript (including JSX)
|
|
17
|
+
if (filename.endsWith('.jsx'))
|
|
18
|
+
return SupportedLanguages.JavaScript;
|
|
19
|
+
if (filename.endsWith('.js'))
|
|
20
|
+
return SupportedLanguages.JavaScript;
|
|
21
|
+
// Python
|
|
22
|
+
if (filename.endsWith('.py'))
|
|
23
|
+
return SupportedLanguages.Python;
|
|
24
|
+
// Java
|
|
25
|
+
if (filename.endsWith('.java'))
|
|
26
|
+
return SupportedLanguages.Java;
|
|
27
|
+
// C (source and headers)
|
|
28
|
+
if (filename.endsWith('.c') || filename.endsWith('.h'))
|
|
29
|
+
return SupportedLanguages.C;
|
|
30
|
+
// C++ (all common extensions)
|
|
31
|
+
if (filename.endsWith('.cpp') || filename.endsWith('.cc') || filename.endsWith('.cxx') ||
|
|
32
|
+
filename.endsWith('.hpp') || filename.endsWith('.hxx') || filename.endsWith('.hh'))
|
|
33
|
+
return SupportedLanguages.CPlusPlus;
|
|
34
|
+
// C#
|
|
35
|
+
if (filename.endsWith('.cs'))
|
|
36
|
+
return SupportedLanguages.CSharp;
|
|
37
|
+
// Go
|
|
38
|
+
if (filename.endsWith('.go'))
|
|
39
|
+
return SupportedLanguages.Go;
|
|
40
|
+
// Rust
|
|
41
|
+
if (filename.endsWith('.rs'))
|
|
42
|
+
return SupportedLanguages.Rust;
|
|
43
|
+
return null;
|
|
44
|
+
};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSV Generator for KuzuDB Hybrid Schema
|
|
3
|
+
*
|
|
4
|
+
* Generates separate CSV files for each node table and one relation CSV.
|
|
5
|
+
* This enables efficient bulk loading via COPY FROM for hybrid schema.
|
|
6
|
+
*
|
|
7
|
+
* RFC 4180 Compliant:
|
|
8
|
+
* - Fields containing commas, double quotes, or newlines are enclosed in double quotes
|
|
9
|
+
* - Double quotes within fields are escaped by doubling them ("")
|
|
10
|
+
* - All fields are consistently quoted for safety with code content
|
|
11
|
+
*/
|
|
12
|
+
import { KnowledgeGraph } from '../graph/types.js';
|
|
13
|
+
import { NodeTableName } from './schema.js';
|
|
14
|
+
export interface CSVData {
|
|
15
|
+
nodes: Map<NodeTableName, string>;
|
|
16
|
+
relCSV: string;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Generate all CSV data for hybrid schema bulk loading
|
|
20
|
+
* Returns Maps of node table name -> CSV content, and single relation CSV
|
|
21
|
+
*/
|
|
22
|
+
export declare const generateAllCSVs: (graph: KnowledgeGraph, fileContents: Map<string, string>) => CSVData;
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSV Generator for KuzuDB Hybrid Schema
|
|
3
|
+
*
|
|
4
|
+
* Generates separate CSV files for each node table and one relation CSV.
|
|
5
|
+
* This enables efficient bulk loading via COPY FROM for hybrid schema.
|
|
6
|
+
*
|
|
7
|
+
* RFC 4180 Compliant:
|
|
8
|
+
* - Fields containing commas, double quotes, or newlines are enclosed in double quotes
|
|
9
|
+
* - Double quotes within fields are escaped by doubling them ("")
|
|
10
|
+
* - All fields are consistently quoted for safety with code content
|
|
11
|
+
*/
|
|
12
|
+
// ============================================================================
|
|
13
|
+
// CSV ESCAPE UTILITIES
|
|
14
|
+
// ============================================================================
|
|
15
|
+
/**
|
|
16
|
+
* Sanitize string to ensure valid UTF-8 and safe CSV content for KuzuDB
|
|
17
|
+
* Removes or replaces invalid characters that would break CSV parsing.
|
|
18
|
+
*
|
|
19
|
+
* Critical: KuzuDB's native CSV parser on Windows can misinterpret \r\n
|
|
20
|
+
* inside quoted fields. We normalize all line endings to \n only.
|
|
21
|
+
*/
|
|
22
|
+
const sanitizeUTF8 = (str) => {
|
|
23
|
+
return str
|
|
24
|
+
.replace(/\r\n/g, '\n') // Normalize Windows line endings first
|
|
25
|
+
.replace(/\r/g, '\n') // Normalize remaining \r to \n
|
|
26
|
+
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '') // Remove control chars except \t \n
|
|
27
|
+
.replace(/[\uD800-\uDFFF]/g, '') // Remove surrogate pairs (invalid standalone)
|
|
28
|
+
.replace(/[\uFFFE\uFFFF]/g, ''); // Remove BOM and special chars
|
|
29
|
+
};
|
|
30
|
+
/**
|
|
31
|
+
* RFC 4180 compliant CSV field escaping
|
|
32
|
+
* ALWAYS wraps in double quotes for safety with code content
|
|
33
|
+
*/
|
|
34
|
+
const escapeCSVField = (value) => {
|
|
35
|
+
if (value === undefined || value === null) {
|
|
36
|
+
return '""';
|
|
37
|
+
}
|
|
38
|
+
let str = String(value);
|
|
39
|
+
str = sanitizeUTF8(str);
|
|
40
|
+
return `"${str.replace(/"/g, '""')}"`;
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Escape a numeric value (no quotes needed for numbers)
|
|
44
|
+
*/
|
|
45
|
+
const escapeCSVNumber = (value, defaultValue = -1) => {
|
|
46
|
+
if (value === undefined || value === null) {
|
|
47
|
+
return String(defaultValue);
|
|
48
|
+
}
|
|
49
|
+
return String(value);
|
|
50
|
+
};
|
|
51
|
+
// ============================================================================
|
|
52
|
+
// CONTENT EXTRACTION
|
|
53
|
+
// ============================================================================
|
|
54
|
+
/**
|
|
55
|
+
* Check if content looks like binary data
|
|
56
|
+
*/
|
|
57
|
+
const isBinaryContent = (content) => {
|
|
58
|
+
if (!content || content.length === 0)
|
|
59
|
+
return false;
|
|
60
|
+
const sample = content.slice(0, 1000);
|
|
61
|
+
let nonPrintable = 0;
|
|
62
|
+
for (let i = 0; i < sample.length; i++) {
|
|
63
|
+
const code = sample.charCodeAt(i);
|
|
64
|
+
if ((code < 9) || (code > 13 && code < 32) || code === 127) {
|
|
65
|
+
nonPrintable++;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return (nonPrintable / sample.length) > 0.1;
|
|
69
|
+
};
|
|
70
|
+
/**
|
|
71
|
+
* Extract code content for a node
|
|
72
|
+
*/
|
|
73
|
+
const extractContent = (node, fileContents) => {
|
|
74
|
+
const filePath = node.properties.filePath;
|
|
75
|
+
const content = fileContents.get(filePath);
|
|
76
|
+
if (!content)
|
|
77
|
+
return '';
|
|
78
|
+
if (node.label === 'Folder')
|
|
79
|
+
return '';
|
|
80
|
+
if (isBinaryContent(content))
|
|
81
|
+
return '[Binary file - content not stored]';
|
|
82
|
+
// For File nodes, return content (limited)
|
|
83
|
+
if (node.label === 'File') {
|
|
84
|
+
const MAX_FILE_CONTENT = 10000;
|
|
85
|
+
if (content.length > MAX_FILE_CONTENT) {
|
|
86
|
+
return content.slice(0, MAX_FILE_CONTENT) + '\n... [truncated]';
|
|
87
|
+
}
|
|
88
|
+
return content;
|
|
89
|
+
}
|
|
90
|
+
// For code elements, extract the relevant lines with context
|
|
91
|
+
const startLine = node.properties.startLine;
|
|
92
|
+
const endLine = node.properties.endLine;
|
|
93
|
+
if (startLine === undefined || endLine === undefined)
|
|
94
|
+
return '';
|
|
95
|
+
const lines = content.split('\n');
|
|
96
|
+
const contextLines = 2;
|
|
97
|
+
const start = Math.max(0, startLine - contextLines);
|
|
98
|
+
const end = Math.min(lines.length - 1, endLine + contextLines);
|
|
99
|
+
const snippet = lines.slice(start, end + 1).join('\n');
|
|
100
|
+
const MAX_SNIPPET = 5000;
|
|
101
|
+
if (snippet.length > MAX_SNIPPET) {
|
|
102
|
+
return snippet.slice(0, MAX_SNIPPET) + '\n... [truncated]';
|
|
103
|
+
}
|
|
104
|
+
return snippet;
|
|
105
|
+
};
|
|
106
|
+
// ============================================================================
|
|
107
|
+
// NODE CSV GENERATORS
|
|
108
|
+
// ============================================================================
|
|
109
|
+
/**
|
|
110
|
+
* Generate CSV for File nodes
|
|
111
|
+
* Headers: id,name,filePath,content
|
|
112
|
+
*/
|
|
113
|
+
const generateFileCSV = (nodes, fileContents) => {
|
|
114
|
+
const headers = ['id', 'name', 'filePath', 'content'];
|
|
115
|
+
const rows = [headers.join(',')];
|
|
116
|
+
const seenIds = new Set();
|
|
117
|
+
for (const node of nodes) {
|
|
118
|
+
if (node.label !== 'File')
|
|
119
|
+
continue;
|
|
120
|
+
// Skip duplicates
|
|
121
|
+
if (seenIds.has(node.id))
|
|
122
|
+
continue;
|
|
123
|
+
seenIds.add(node.id);
|
|
124
|
+
const content = extractContent(node, fileContents);
|
|
125
|
+
rows.push([
|
|
126
|
+
escapeCSVField(node.id),
|
|
127
|
+
escapeCSVField(node.properties.name || ''),
|
|
128
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
129
|
+
escapeCSVField(content),
|
|
130
|
+
].join(','));
|
|
131
|
+
}
|
|
132
|
+
return rows.join('\n');
|
|
133
|
+
};
|
|
134
|
+
/**
|
|
135
|
+
* Generate CSV for Folder nodes
|
|
136
|
+
* Headers: id,name,filePath
|
|
137
|
+
*/
|
|
138
|
+
const generateFolderCSV = (nodes) => {
|
|
139
|
+
const headers = ['id', 'name', 'filePath'];
|
|
140
|
+
const rows = [headers.join(',')];
|
|
141
|
+
for (const node of nodes) {
|
|
142
|
+
if (node.label !== 'Folder')
|
|
143
|
+
continue;
|
|
144
|
+
rows.push([
|
|
145
|
+
escapeCSVField(node.id),
|
|
146
|
+
escapeCSVField(node.properties.name || ''),
|
|
147
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
148
|
+
].join(','));
|
|
149
|
+
}
|
|
150
|
+
return rows.join('\n');
|
|
151
|
+
};
|
|
152
|
+
/**
|
|
153
|
+
* Generate CSV for code element nodes (Function, Class, Interface, Method, CodeElement)
|
|
154
|
+
* Headers: id,name,filePath,startLine,endLine,isExported,content
|
|
155
|
+
*/
|
|
156
|
+
const generateCodeElementCSV = (nodes, label, fileContents) => {
|
|
157
|
+
const headers = ['id', 'name', 'filePath', 'startLine', 'endLine', 'isExported', 'content'];
|
|
158
|
+
const rows = [headers.join(',')];
|
|
159
|
+
for (const node of nodes) {
|
|
160
|
+
if (node.label !== label)
|
|
161
|
+
continue;
|
|
162
|
+
const content = extractContent(node, fileContents);
|
|
163
|
+
rows.push([
|
|
164
|
+
escapeCSVField(node.id),
|
|
165
|
+
escapeCSVField(node.properties.name || ''),
|
|
166
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
167
|
+
escapeCSVNumber(node.properties.startLine, -1),
|
|
168
|
+
escapeCSVNumber(node.properties.endLine, -1),
|
|
169
|
+
node.properties.isExported ? 'true' : 'false',
|
|
170
|
+
escapeCSVField(content),
|
|
171
|
+
].join(','));
|
|
172
|
+
}
|
|
173
|
+
return rows.join('\n');
|
|
174
|
+
};
|
|
175
|
+
/**
|
|
176
|
+
* Generate CSV for Community nodes (from Leiden algorithm)
|
|
177
|
+
* Headers: id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount
|
|
178
|
+
*/
|
|
179
|
+
const generateCommunityCSV = (nodes) => {
|
|
180
|
+
const headers = ['id', 'label', 'heuristicLabel', 'keywords', 'description', 'enrichedBy', 'cohesion', 'symbolCount'];
|
|
181
|
+
const rows = [headers.join(',')];
|
|
182
|
+
for (const node of nodes) {
|
|
183
|
+
if (node.label !== 'Community')
|
|
184
|
+
continue;
|
|
185
|
+
// Handle keywords array - convert to KuzuDB array format
|
|
186
|
+
const keywords = node.properties.keywords || [];
|
|
187
|
+
const keywordsStr = `[${keywords.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
|
|
188
|
+
rows.push([
|
|
189
|
+
escapeCSVField(node.id),
|
|
190
|
+
escapeCSVField(node.properties.name || ''), // label is stored in name
|
|
191
|
+
escapeCSVField(node.properties.heuristicLabel || ''),
|
|
192
|
+
keywordsStr, // Array format for KuzuDB
|
|
193
|
+
escapeCSVField(node.properties.description || ''),
|
|
194
|
+
escapeCSVField(node.properties.enrichedBy || 'heuristic'),
|
|
195
|
+
escapeCSVNumber(node.properties.cohesion, 0),
|
|
196
|
+
escapeCSVNumber(node.properties.symbolCount, 0),
|
|
197
|
+
].join(','));
|
|
198
|
+
}
|
|
199
|
+
return rows.join('\n');
|
|
200
|
+
};
|
|
201
|
+
/**
|
|
202
|
+
* Generate CSV for Process nodes
|
|
203
|
+
* Headers: id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId
|
|
204
|
+
*/
|
|
205
|
+
const generateProcessCSV = (nodes) => {
|
|
206
|
+
const headers = ['id', 'label', 'heuristicLabel', 'processType', 'stepCount', 'communities', 'entryPointId', 'terminalId'];
|
|
207
|
+
const rows = [headers.join(',')];
|
|
208
|
+
for (const node of nodes) {
|
|
209
|
+
if (node.label !== 'Process')
|
|
210
|
+
continue;
|
|
211
|
+
// Handle communities array (string[])
|
|
212
|
+
const communities = node.properties.communities || [];
|
|
213
|
+
const communitiesStr = `[${communities.map((c) => `'${c.replace(/'/g, "''")}'`).join(',')}]`;
|
|
214
|
+
rows.push([
|
|
215
|
+
escapeCSVField(node.id),
|
|
216
|
+
escapeCSVField(node.properties.name || ''), // label stores name
|
|
217
|
+
escapeCSVField(node.properties.heuristicLabel || ''),
|
|
218
|
+
escapeCSVField(node.properties.processType || ''),
|
|
219
|
+
escapeCSVNumber(node.properties.stepCount, 0),
|
|
220
|
+
escapeCSVField(communitiesStr), // Needs CSV escaping because it contains commas!
|
|
221
|
+
escapeCSVField(node.properties.entryPointId || ''),
|
|
222
|
+
escapeCSVField(node.properties.terminalId || ''),
|
|
223
|
+
].join(','));
|
|
224
|
+
}
|
|
225
|
+
return rows.join('\n');
|
|
226
|
+
};
|
|
227
|
+
/**
|
|
228
|
+
* Generate CSV for the single CodeRelation table
|
|
229
|
+
* Headers: from,to,type,confidence,reason
|
|
230
|
+
*
|
|
231
|
+
* confidence: 0-1 score for CALLS edges (how sure are we about the target?)
|
|
232
|
+
* reason: 'import-resolved' | 'same-file' | 'fuzzy-global' (or empty for non-CALLS)
|
|
233
|
+
*/
|
|
234
|
+
const generateRelationCSV = (graph) => {
|
|
235
|
+
const headers = ['from', 'to', 'type', 'confidence', 'reason', 'step'];
|
|
236
|
+
const rows = [headers.join(',')];
|
|
237
|
+
for (const rel of graph.relationships) {
|
|
238
|
+
rows.push([
|
|
239
|
+
escapeCSVField(rel.sourceId),
|
|
240
|
+
escapeCSVField(rel.targetId),
|
|
241
|
+
escapeCSVField(rel.type),
|
|
242
|
+
escapeCSVNumber(rel.confidence, 1.0),
|
|
243
|
+
escapeCSVField(rel.reason),
|
|
244
|
+
escapeCSVNumber(rel.step, 0),
|
|
245
|
+
].join(','));
|
|
246
|
+
}
|
|
247
|
+
return rows.join('\n');
|
|
248
|
+
};
|
|
249
|
+
// ============================================================================
|
|
250
|
+
// MAIN CSV GENERATION FUNCTION
|
|
251
|
+
// ============================================================================
|
|
252
|
+
/**
|
|
253
|
+
* Generate all CSV data for hybrid schema bulk loading
|
|
254
|
+
* Returns Maps of node table name -> CSV content, and single relation CSV
|
|
255
|
+
*/
|
|
256
|
+
export const generateAllCSVs = (graph, fileContents) => {
|
|
257
|
+
const nodes = Array.from(graph.nodes);
|
|
258
|
+
// Generate node CSVs
|
|
259
|
+
const nodeCSVs = new Map();
|
|
260
|
+
nodeCSVs.set('File', generateFileCSV(nodes, fileContents));
|
|
261
|
+
nodeCSVs.set('Folder', generateFolderCSV(nodes));
|
|
262
|
+
nodeCSVs.set('Function', generateCodeElementCSV(nodes, 'Function', fileContents));
|
|
263
|
+
nodeCSVs.set('Class', generateCodeElementCSV(nodes, 'Class', fileContents));
|
|
264
|
+
nodeCSVs.set('Interface', generateCodeElementCSV(nodes, 'Interface', fileContents));
|
|
265
|
+
nodeCSVs.set('Method', generateCodeElementCSV(nodes, 'Method', fileContents));
|
|
266
|
+
nodeCSVs.set('CodeElement', generateCodeElementCSV(nodes, 'CodeElement', fileContents));
|
|
267
|
+
nodeCSVs.set('Community', generateCommunityCSV(nodes));
|
|
268
|
+
nodeCSVs.set('Process', generateProcessCSV(nodes));
|
|
269
|
+
// Generate single relation CSV
|
|
270
|
+
const relCSV = generateRelationCSV(graph);
|
|
271
|
+
return { nodes: nodeCSVs, relCSV };
|
|
272
|
+
};
|