@mars167/git-ai 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +364 -0
- package/README.zh-CN.md +361 -0
- package/assets/hooks/post-checkout +28 -0
- package/assets/hooks/post-merge +28 -0
- package/assets/hooks/pre-commit +17 -0
- package/assets/hooks/pre-push +29 -0
- package/dist/bin/git-ai.js +62 -0
- package/dist/src/commands/ai.js +30 -0
- package/dist/src/commands/checkIndex.js +19 -0
- package/dist/src/commands/dsr.js +156 -0
- package/dist/src/commands/graph.js +203 -0
- package/dist/src/commands/hooks.js +125 -0
- package/dist/src/commands/index.js +92 -0
- package/dist/src/commands/pack.js +31 -0
- package/dist/src/commands/query.js +139 -0
- package/dist/src/commands/semantic.js +134 -0
- package/dist/src/commands/serve.js +14 -0
- package/dist/src/commands/status.js +78 -0
- package/dist/src/commands/trae.js +75 -0
- package/dist/src/commands/unpack.js +28 -0
- package/dist/src/core/archive.js +91 -0
- package/dist/src/core/astGraph.js +127 -0
- package/dist/src/core/astGraphQuery.js +142 -0
- package/dist/src/core/cozo.js +266 -0
- package/dist/src/core/cpg/astLayer.js +56 -0
- package/dist/src/core/cpg/callGraph.js +483 -0
- package/dist/src/core/cpg/cfgLayer.js +490 -0
- package/dist/src/core/cpg/dfgLayer.js +237 -0
- package/dist/src/core/cpg/index.js +80 -0
- package/dist/src/core/cpg/types.js +108 -0
- package/dist/src/core/crypto.js +10 -0
- package/dist/src/core/dsr/generate.js +308 -0
- package/dist/src/core/dsr/gitContext.js +74 -0
- package/dist/src/core/dsr/indexMaterialize.js +106 -0
- package/dist/src/core/dsr/paths.js +26 -0
- package/dist/src/core/dsr/query.js +73 -0
- package/dist/src/core/dsr/snapshotParser.js +73 -0
- package/dist/src/core/dsr/state.js +27 -0
- package/dist/src/core/dsr/types.js +2 -0
- package/dist/src/core/embedding/fusion.js +52 -0
- package/dist/src/core/embedding/index.js +43 -0
- package/dist/src/core/embedding/parser.js +14 -0
- package/dist/src/core/embedding/semantic.js +254 -0
- package/dist/src/core/embedding/structural.js +97 -0
- package/dist/src/core/embedding/symbolic.js +117 -0
- package/dist/src/core/embedding/tokenizer.js +91 -0
- package/dist/src/core/embedding/types.js +2 -0
- package/dist/src/core/embedding.js +36 -0
- package/dist/src/core/git.js +49 -0
- package/dist/src/core/gitDiff.js +73 -0
- package/dist/src/core/indexCheck.js +131 -0
- package/dist/src/core/indexer.js +185 -0
- package/dist/src/core/indexerIncremental.js +303 -0
- package/dist/src/core/indexing/config.js +51 -0
- package/dist/src/core/indexing/hnsw.js +568 -0
- package/dist/src/core/indexing/index.js +17 -0
- package/dist/src/core/indexing/monitor.js +82 -0
- package/dist/src/core/indexing/parallel.js +252 -0
- package/dist/src/core/lancedb.js +111 -0
- package/dist/src/core/lfs.js +27 -0
- package/dist/src/core/log.js +62 -0
- package/dist/src/core/manifest.js +88 -0
- package/dist/src/core/parser/adapter.js +2 -0
- package/dist/src/core/parser/c.js +93 -0
- package/dist/src/core/parser/chunkRelations.js +178 -0
- package/dist/src/core/parser/chunker.js +274 -0
- package/dist/src/core/parser/go.js +98 -0
- package/dist/src/core/parser/java.js +80 -0
- package/dist/src/core/parser/markdown.js +76 -0
- package/dist/src/core/parser/python.js +81 -0
- package/dist/src/core/parser/rust.js +103 -0
- package/dist/src/core/parser/typescript.js +98 -0
- package/dist/src/core/parser/utils.js +62 -0
- package/dist/src/core/parser/yaml.js +53 -0
- package/dist/src/core/parser.js +75 -0
- package/dist/src/core/paths.js +10 -0
- package/dist/src/core/repoMap.js +164 -0
- package/dist/src/core/retrieval/cache.js +31 -0
- package/dist/src/core/retrieval/classifier.js +74 -0
- package/dist/src/core/retrieval/expander.js +80 -0
- package/dist/src/core/retrieval/fuser.js +40 -0
- package/dist/src/core/retrieval/index.js +32 -0
- package/dist/src/core/retrieval/reranker.js +304 -0
- package/dist/src/core/retrieval/types.js +2 -0
- package/dist/src/core/retrieval/weights.js +42 -0
- package/dist/src/core/search.js +41 -0
- package/dist/src/core/sq8.js +65 -0
- package/dist/src/core/symbolSearch.js +143 -0
- package/dist/src/core/types.js +2 -0
- package/dist/src/core/workspace.js +116 -0
- package/dist/src/mcp/server.js +794 -0
- package/docs/README.md +44 -0
- package/docs/cross-encoder.md +157 -0
- package/docs/embedding.md +158 -0
- package/docs/logo.png +0 -0
- package/docs/windows-setup.md +67 -0
- package/docs/zh-CN/DESIGN.md +102 -0
- package/docs/zh-CN/README.md +46 -0
- package/docs/zh-CN/advanced.md +26 -0
- package/docs/zh-CN/architecture_explained.md +116 -0
- package/docs/zh-CN/cli.md +109 -0
- package/docs/zh-CN/dsr.md +91 -0
- package/docs/zh-CN/graph_scenarios.md +173 -0
- package/docs/zh-CN/hooks.md +14 -0
- package/docs/zh-CN/manifests.md +136 -0
- package/docs/zh-CN/mcp.md +205 -0
- package/docs/zh-CN/quickstart.md +35 -0
- package/docs/zh-CN/rules.md +7 -0
- package/docs/zh-CN/technical-details.md +454 -0
- package/docs/zh-CN/troubleshooting.md +19 -0
- package/docs/zh-CN/windows-setup.md +67 -0
- package/install.sh +183 -0
- package/package.json +97 -0
- package/skills/git-ai-mcp/SKILL.md +86 -0
- package/skills/git-ai-mcp/references/constraints.md +143 -0
- package/skills/git-ai-mcp/references/tools.md +263 -0
- package/templates/agents/common/documents/Fix EISDIR error and enable multi-language indexing.md +14 -0
- package/templates/agents/common/documents/Fix git-ai index error in CodaGraph directory.md +13 -0
- package/templates/agents/common/skills/git-ai-mcp/SKILL.md +86 -0
- package/templates/agents/common/skills/git-ai-mcp/references/constraints.md +143 -0
- package/templates/agents/common/skills/git-ai-mcp/references/tools.md +263 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.inferChunkRelations = inferChunkRelations;
|
|
4
|
+
exports.getRelatedChunks = getRelatedChunks;
|
|
5
|
+
exports.getChunksReferencingSymbol = getChunksReferencingSymbol;
|
|
6
|
+
exports.getChunksDefiningSymbol = getChunksDefiningSymbol;
|
|
7
|
+
/**
|
|
8
|
+
* Build relationships between chunks
|
|
9
|
+
*/
|
|
10
|
+
function inferChunkRelations(chunks) {
|
|
11
|
+
const relations = {
|
|
12
|
+
callerMap: new Map(),
|
|
13
|
+
calleeMap: new Map(),
|
|
14
|
+
parentMap: new Map(),
|
|
15
|
+
childMap: new Map(),
|
|
16
|
+
typeMap: new Map(),
|
|
17
|
+
fileMap: new Map(),
|
|
18
|
+
};
|
|
19
|
+
// Build file map
|
|
20
|
+
for (const chunk of chunks) {
|
|
21
|
+
if (!relations.fileMap.has(chunk.filePath)) {
|
|
22
|
+
relations.fileMap.set(chunk.filePath, []);
|
|
23
|
+
}
|
|
24
|
+
relations.fileMap.get(chunk.filePath).push(chunk.id);
|
|
25
|
+
}
|
|
26
|
+
// Build type map
|
|
27
|
+
for (const chunk of chunks) {
|
|
28
|
+
if (!relations.typeMap.has(chunk.nodeType)) {
|
|
29
|
+
relations.typeMap.set(chunk.nodeType, []);
|
|
30
|
+
}
|
|
31
|
+
relations.typeMap.get(chunk.nodeType).push(chunk.id);
|
|
32
|
+
}
|
|
33
|
+
// Build parent-child relationships based on AST path nesting
|
|
34
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
35
|
+
const chunk = chunks[i];
|
|
36
|
+
// Find parent (chunk whose AST path is a prefix of this chunk's path)
|
|
37
|
+
for (let j = 0; j < i; j++) {
|
|
38
|
+
const other = chunks[j];
|
|
39
|
+
if (other.filePath !== chunk.filePath)
|
|
40
|
+
continue;
|
|
41
|
+
if (isParentPath(other.astPath, chunk.astPath)) {
|
|
42
|
+
relations.parentMap.set(chunk.id, other.id);
|
|
43
|
+
if (!relations.childMap.has(other.id)) {
|
|
44
|
+
relations.childMap.set(other.id, []);
|
|
45
|
+
}
|
|
46
|
+
relations.childMap.get(other.id).push(chunk.id);
|
|
47
|
+
break;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
// Infer call relationships from symbol references
|
|
52
|
+
for (const chunk of chunks) {
|
|
53
|
+
const calls = [];
|
|
54
|
+
for (const ref of chunk.symbolReferences) {
|
|
55
|
+
// Find chunks that define this symbol
|
|
56
|
+
for (const other of chunks) {
|
|
57
|
+
if (other.id === chunk.id)
|
|
58
|
+
continue;
|
|
59
|
+
if (extractDefNames(other.content).includes(ref)) {
|
|
60
|
+
calls.push(other.id);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
if (calls.length > 0) {
|
|
65
|
+
relations.callerMap.set(chunk.id, [...new Set(calls)]);
|
|
66
|
+
for (const calleeId of calls) {
|
|
67
|
+
if (!relations.calleeMap.has(calleeId)) {
|
|
68
|
+
relations.calleeMap.set(calleeId, []);
|
|
69
|
+
}
|
|
70
|
+
relations.calleeMap.get(calleeId).push(chunk.id);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return relations;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Check if pathA is a parent prefix of pathB
|
|
78
|
+
*/
|
|
79
|
+
function isParentPath(pathA, pathB) {
|
|
80
|
+
if (pathA.length >= pathB.length)
|
|
81
|
+
return false;
|
|
82
|
+
for (let i = 0; i < pathA.length; i++) {
|
|
83
|
+
if (pathA[i] !== pathB[i])
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
return true;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Extract definition names from chunk content
|
|
90
|
+
*/
|
|
91
|
+
function extractDefNames(content) {
|
|
92
|
+
const names = [];
|
|
93
|
+
// Match function declarations
|
|
94
|
+
const fnMatch = content.match(/function\s+(\w+)/g);
|
|
95
|
+
if (fnMatch) {
|
|
96
|
+
for (const m of fnMatch) {
|
|
97
|
+
names.push(m.replace('function ', ''));
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// Match class declarations
|
|
101
|
+
const classMatch = content.match(/class\s+(\w+)/g);
|
|
102
|
+
if (classMatch) {
|
|
103
|
+
for (const m of classMatch) {
|
|
104
|
+
names.push(m.replace('class ', ''));
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
// Match method definitions (simplified)
|
|
108
|
+
const methodMatch = content.match(/(\w+)\s*\([^)]*\)\s*\{/g);
|
|
109
|
+
if (methodMatch) {
|
|
110
|
+
for (const m of methodMatch) {
|
|
111
|
+
const match = m.match(/^(\w+)/);
|
|
112
|
+
if (match)
|
|
113
|
+
names.push(match[1]);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return [...new Set(names)];
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Find related chunks for a given chunk
|
|
120
|
+
*/
|
|
121
|
+
function getRelatedChunks(chunkId, relations, maxDepth = 2) {
|
|
122
|
+
const visited = new Set([chunkId]);
|
|
123
|
+
const queue = [{ id: chunkId, depth: 0 }];
|
|
124
|
+
const result = [];
|
|
125
|
+
while (queue.length > 0) {
|
|
126
|
+
const { id, depth } = queue.shift();
|
|
127
|
+
if (depth > 0) {
|
|
128
|
+
result.push(id);
|
|
129
|
+
}
|
|
130
|
+
if (depth >= maxDepth)
|
|
131
|
+
continue;
|
|
132
|
+
// Get all related chunk IDs
|
|
133
|
+
const related = [];
|
|
134
|
+
// Parents
|
|
135
|
+
const parent = relations.parentMap.get(id);
|
|
136
|
+
if (parent && !visited.has(parent)) {
|
|
137
|
+
related.push(parent);
|
|
138
|
+
}
|
|
139
|
+
// Children
|
|
140
|
+
const children = relations.childMap.get(id) || [];
|
|
141
|
+
for (const child of children) {
|
|
142
|
+
if (!visited.has(child))
|
|
143
|
+
related.push(child);
|
|
144
|
+
}
|
|
145
|
+
// Callers
|
|
146
|
+
const callers = relations.calleeMap.get(id) || [];
|
|
147
|
+
for (const caller of callers) {
|
|
148
|
+
if (!visited.has(caller))
|
|
149
|
+
related.push(caller);
|
|
150
|
+
}
|
|
151
|
+
// Callees
|
|
152
|
+
const callersOf = relations.callerMap.get(id) || [];
|
|
153
|
+
for (const callee of callersOf) {
|
|
154
|
+
if (!visited.has(callee))
|
|
155
|
+
related.push(callee);
|
|
156
|
+
}
|
|
157
|
+
for (const rid of related) {
|
|
158
|
+
visited.add(rid);
|
|
159
|
+
queue.push({ id: rid, depth: depth + 1 });
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return result;
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Get chunks that reference a given symbol
|
|
166
|
+
*/
|
|
167
|
+
function getChunksReferencingSymbol(symbolName, chunks) {
|
|
168
|
+
return chunks.filter(chunk => chunk.symbolReferences.includes(symbolName));
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Get chunks that define a given symbol
|
|
172
|
+
*/
|
|
173
|
+
function getChunksDefiningSymbol(symbolName, chunks) {
|
|
174
|
+
return chunks.filter(chunk => {
|
|
175
|
+
const defs = extractDefNames(chunk.content);
|
|
176
|
+
return defs.includes(symbolName);
|
|
177
|
+
});
|
|
178
|
+
}
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.defaultChunkingConfig = void 0;
|
|
4
|
+
exports.countTokens = countTokens;
|
|
5
|
+
exports.getAstPath = getAstPath;
|
|
6
|
+
exports.findTopLevelDefinitions = findTopLevelDefinitions;
|
|
7
|
+
exports.astAwareChunking = astAwareChunking;
|
|
8
|
+
exports.defaultChunkingConfig = {
|
|
9
|
+
maxTokens: 512,
|
|
10
|
+
minTokens: 50,
|
|
11
|
+
priorityConstructs: [
|
|
12
|
+
'function_declaration',
|
|
13
|
+
'method_definition',
|
|
14
|
+
'class_declaration',
|
|
15
|
+
'interface_declaration',
|
|
16
|
+
'module',
|
|
17
|
+
'namespace',
|
|
18
|
+
'arrow_function',
|
|
19
|
+
],
|
|
20
|
+
preserveContext: true,
|
|
21
|
+
overlapTokens: 32,
|
|
22
|
+
};
|
|
23
|
+
function countTokens(text) {
|
|
24
|
+
return text.split(/\s+/).filter(t => t.length > 0).length;
|
|
25
|
+
}
|
|
26
|
+
function getAstPath(node) {
|
|
27
|
+
const path = [];
|
|
28
|
+
let current = node;
|
|
29
|
+
while (current) {
|
|
30
|
+
path.unshift(current.type);
|
|
31
|
+
current = current.parent;
|
|
32
|
+
}
|
|
33
|
+
return path;
|
|
34
|
+
}
|
|
35
|
+
function isDefinitionNode(node) {
|
|
36
|
+
const defTypes = [
|
|
37
|
+
'function_declaration',
|
|
38
|
+
'method_definition',
|
|
39
|
+
'class_declaration',
|
|
40
|
+
'interface_declaration',
|
|
41
|
+
'module',
|
|
42
|
+
'namespace',
|
|
43
|
+
'arrow_function',
|
|
44
|
+
'const_declaration',
|
|
45
|
+
'let_declaration',
|
|
46
|
+
'variable_declaration',
|
|
47
|
+
];
|
|
48
|
+
return defTypes.includes(node.type);
|
|
49
|
+
}
|
|
50
|
+
function findTopLevelDefinitions(root) {
|
|
51
|
+
const definitions = [];
|
|
52
|
+
for (let i = 0; i < root.childCount; i++) {
|
|
53
|
+
const child = root.child(i);
|
|
54
|
+
if (child && isDefinitionNode(child)) {
|
|
55
|
+
definitions.push(child);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return definitions;
|
|
59
|
+
}
|
|
60
|
+
function buildChunkContent(node, filePath) {
|
|
61
|
+
return {
|
|
62
|
+
text: node.text,
|
|
63
|
+
startLine: node.startPosition.row + 1,
|
|
64
|
+
endLine: node.endPosition.row + 1,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
function generateChunkId(filePath, nodeType, startLine, contentHash) {
|
|
68
|
+
return `${filePath}:${nodeType}:${startLine}:${contentHash.slice(0, 8)}`;
|
|
69
|
+
}
|
|
70
|
+
function hashContent(text) {
|
|
71
|
+
let hash = 0;
|
|
72
|
+
for (let i = 0; i < text.length; i++) {
|
|
73
|
+
const char = text.charCodeAt(i);
|
|
74
|
+
hash = ((hash << 5) - hash) + char;
|
|
75
|
+
hash = hash & hash;
|
|
76
|
+
}
|
|
77
|
+
return Math.abs(hash).toString(16);
|
|
78
|
+
}
|
|
79
|
+
function extractSymbolReferences(node) {
|
|
80
|
+
const symbols = [];
|
|
81
|
+
const traverse = (n) => {
|
|
82
|
+
if (n.type === 'identifier') {
|
|
83
|
+
symbols.push(n.text);
|
|
84
|
+
}
|
|
85
|
+
for (let i = 0; i < n.childCount; i++) {
|
|
86
|
+
traverse(n.child(i));
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
traverse(node);
|
|
90
|
+
return [...new Set(symbols)];
|
|
91
|
+
}
|
|
92
|
+
function astAwareChunking(tree, filePath, config = exports.defaultChunkingConfig) {
|
|
93
|
+
const chunks = [];
|
|
94
|
+
const root = tree.rootNode;
|
|
95
|
+
const topLevelDefs = findTopLevelDefinitions(root);
|
|
96
|
+
for (const def of topLevelDefs) {
|
|
97
|
+
const defChunks = chunkNode(def, filePath, config);
|
|
98
|
+
chunks.push(...defChunks);
|
|
99
|
+
}
|
|
100
|
+
// Handle remaining content
|
|
101
|
+
const coveredLines = new Set();
|
|
102
|
+
for (const chunk of chunks) {
|
|
103
|
+
for (let line = chunk.startLine; line <= chunk.endLine; line++) {
|
|
104
|
+
coveredLines.add(line);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
const remainingChunks = chunkRemainingContent(root, filePath, coveredLines, config);
|
|
108
|
+
chunks.push(...remainingChunks);
|
|
109
|
+
chunks.sort((a, b) => a.startLine - b.startLine);
|
|
110
|
+
return {
|
|
111
|
+
chunks,
|
|
112
|
+
totalTokens: chunks.reduce((sum, c) => sum + c.tokenCount, 0),
|
|
113
|
+
totalChunks: chunks.length,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
function chunkNode(node, filePath, config) {
|
|
117
|
+
const chunks = [];
|
|
118
|
+
const { text, startLine, endLine } = buildChunkContent(node, filePath);
|
|
119
|
+
const tokenCount = countTokens(text);
|
|
120
|
+
const astPath = getAstPath(node);
|
|
121
|
+
const contentHash = hashContent(text);
|
|
122
|
+
if (tokenCount <= config.maxTokens) {
|
|
123
|
+
const chunk = {
|
|
124
|
+
id: generateChunkId(filePath, node.type, startLine, contentHash),
|
|
125
|
+
content: text,
|
|
126
|
+
astPath,
|
|
127
|
+
filePath,
|
|
128
|
+
startLine,
|
|
129
|
+
endLine,
|
|
130
|
+
symbolReferences: extractSymbolReferences(node),
|
|
131
|
+
relatedChunkIds: [],
|
|
132
|
+
tokenCount,
|
|
133
|
+
nodeType: node.type,
|
|
134
|
+
};
|
|
135
|
+
chunks.push(chunk);
|
|
136
|
+
return chunks;
|
|
137
|
+
}
|
|
138
|
+
// Try to split by children
|
|
139
|
+
const childChunks = [];
|
|
140
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
141
|
+
const child = node.child(i);
|
|
142
|
+
if (child && isDefinitionNode(child)) {
|
|
143
|
+
const subChunks = chunkNode(child, filePath, config);
|
|
144
|
+
childChunks.push(...subChunks);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
if (childChunks.length > 0) {
|
|
148
|
+
for (const childChunk of childChunks) {
|
|
149
|
+
chunks.push(childChunk);
|
|
150
|
+
}
|
|
151
|
+
const usedLines = new Set();
|
|
152
|
+
for (const chunk of childChunks) {
|
|
153
|
+
for (let line = chunk.startLine; line <= chunk.endLine; line++) {
|
|
154
|
+
usedLines.add(line);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
const remaining = chunkRemainingContent(node, filePath, usedLines, config);
|
|
158
|
+
chunks.push(...remaining);
|
|
159
|
+
}
|
|
160
|
+
else {
|
|
161
|
+
const forcedChunks = createForcedChunks(node, filePath, config);
|
|
162
|
+
chunks.push(...forcedChunks);
|
|
163
|
+
}
|
|
164
|
+
return chunks;
|
|
165
|
+
}
|
|
166
|
+
function chunkRemainingContent(node, filePath, coveredLines, config, baseLine = node.startPosition.row + 1) {
|
|
167
|
+
const chunks = [];
|
|
168
|
+
const lines = node.text.split('\n');
|
|
169
|
+
let currentChunkLines = [];
|
|
170
|
+
let chunkStartLine = baseLine;
|
|
171
|
+
let currentLine = baseLine;
|
|
172
|
+
for (let i = 0; i < lines.length; i++) {
|
|
173
|
+
const lineNum = baseLine + i;
|
|
174
|
+
if (coveredLines.has(lineNum)) {
|
|
175
|
+
if (currentChunkLines.length > 0) {
|
|
176
|
+
const chunkText = currentChunkLines.join('\n');
|
|
177
|
+
const tokenCount = countTokens(chunkText);
|
|
178
|
+
if (tokenCount >= config.minTokens) {
|
|
179
|
+
const chunk = {
|
|
180
|
+
id: generateChunkId(filePath, 'fragment', chunkStartLine, hashContent(chunkText)),
|
|
181
|
+
content: chunkText,
|
|
182
|
+
astPath: [...getAstPath(node), 'fragment'],
|
|
183
|
+
filePath,
|
|
184
|
+
startLine: chunkStartLine,
|
|
185
|
+
endLine: currentLine - 1,
|
|
186
|
+
symbolReferences: [],
|
|
187
|
+
relatedChunkIds: [],
|
|
188
|
+
tokenCount,
|
|
189
|
+
nodeType: 'fragment',
|
|
190
|
+
};
|
|
191
|
+
chunks.push(chunk);
|
|
192
|
+
}
|
|
193
|
+
currentChunkLines = [];
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
else {
|
|
197
|
+
if (currentChunkLines.length === 0) {
|
|
198
|
+
chunkStartLine = lineNum;
|
|
199
|
+
}
|
|
200
|
+
currentChunkLines.push(lines[i]);
|
|
201
|
+
}
|
|
202
|
+
currentLine = lineNum + 1;
|
|
203
|
+
}
|
|
204
|
+
if (currentChunkLines.length > 0) {
|
|
205
|
+
const chunkText = currentChunkLines.join('\n');
|
|
206
|
+
const tokenCount = countTokens(chunkText);
|
|
207
|
+
if (tokenCount >= config.minTokens) {
|
|
208
|
+
const chunk = {
|
|
209
|
+
id: generateChunkId(filePath, 'fragment', chunkStartLine, hashContent(chunkText)),
|
|
210
|
+
content: chunkText,
|
|
211
|
+
astPath: [...getAstPath(node), 'fragment'],
|
|
212
|
+
filePath,
|
|
213
|
+
startLine: chunkStartLine,
|
|
214
|
+
endLine: currentLine - 1,
|
|
215
|
+
symbolReferences: [],
|
|
216
|
+
relatedChunkIds: [],
|
|
217
|
+
tokenCount,
|
|
218
|
+
nodeType: 'fragment',
|
|
219
|
+
};
|
|
220
|
+
chunks.push(chunk);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
return chunks;
|
|
224
|
+
}
|
|
225
|
+
function createForcedChunks(node, filePath, config) {
|
|
226
|
+
const chunks = [];
|
|
227
|
+
const lines = node.text.split('\n');
|
|
228
|
+
const tokensPerLine = lines.map(l => countTokens(l));
|
|
229
|
+
let currentChunkLines = [];
|
|
230
|
+
let currentChunkTokens = 0;
|
|
231
|
+
let chunkStartLine = node.startPosition.row + 1;
|
|
232
|
+
for (let i = 0; i < lines.length; i++) {
|
|
233
|
+
const lineTokens = tokensPerLine[i];
|
|
234
|
+
if (currentChunkTokens + lineTokens > config.maxTokens && currentChunkTokens > config.minTokens) {
|
|
235
|
+
const chunkText = currentChunkLines.join('\n');
|
|
236
|
+
const chunk = {
|
|
237
|
+
id: generateChunkId(filePath, 'forced_split', chunkStartLine, hashContent(chunkText)),
|
|
238
|
+
content: chunkText,
|
|
239
|
+
astPath: [...getAstPath(node), 'forced_split'],
|
|
240
|
+
filePath,
|
|
241
|
+
startLine: chunkStartLine,
|
|
242
|
+
endLine: node.startPosition.row + 1 + i,
|
|
243
|
+
symbolReferences: [],
|
|
244
|
+
relatedChunkIds: [],
|
|
245
|
+
tokenCount: currentChunkTokens,
|
|
246
|
+
nodeType: 'forced_split',
|
|
247
|
+
};
|
|
248
|
+
chunks.push(chunk);
|
|
249
|
+
const overlapStart = Math.max(0, currentChunkLines.length - Math.ceil(config.overlapTokens / 10));
|
|
250
|
+
currentChunkLines = currentChunkLines.slice(overlapStart);
|
|
251
|
+
currentChunkTokens = currentChunkLines.reduce((sum, l) => sum + countTokens(l), 0);
|
|
252
|
+
chunkStartLine = node.startPosition.row + 1 + i - overlapStart;
|
|
253
|
+
}
|
|
254
|
+
currentChunkLines.push(lines[i]);
|
|
255
|
+
currentChunkTokens += lineTokens;
|
|
256
|
+
}
|
|
257
|
+
if (currentChunkTokens >= config.minTokens) {
|
|
258
|
+
const chunkText = currentChunkLines.join('\n');
|
|
259
|
+
const chunk = {
|
|
260
|
+
id: generateChunkId(filePath, 'forced_split', chunkStartLine, hashContent(chunkText)),
|
|
261
|
+
content: chunkText,
|
|
262
|
+
astPath: [...getAstPath(node), 'forced_split'],
|
|
263
|
+
filePath,
|
|
264
|
+
startLine: chunkStartLine,
|
|
265
|
+
endLine: node.endPosition.row + 1,
|
|
266
|
+
symbolReferences: [],
|
|
267
|
+
relatedChunkIds: [],
|
|
268
|
+
tokenCount: currentChunkTokens,
|
|
269
|
+
nodeType: 'forced_split',
|
|
270
|
+
};
|
|
271
|
+
chunks.push(chunk);
|
|
272
|
+
}
|
|
273
|
+
return chunks;
|
|
274
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.GoAdapter = void 0;
|
|
7
|
+
const tree_sitter_go_1 = __importDefault(require("tree-sitter-go"));
|
|
8
|
+
const utils_1 = require("./utils");
|
|
9
|
+
class GoAdapter {
|
|
10
|
+
getLanguageId() {
|
|
11
|
+
return 'go';
|
|
12
|
+
}
|
|
13
|
+
getTreeSitterLanguage() {
|
|
14
|
+
return tree_sitter_go_1.default;
|
|
15
|
+
}
|
|
16
|
+
getSupportedFileExtensions() {
|
|
17
|
+
return ['.go'];
|
|
18
|
+
}
|
|
19
|
+
extractSymbolsAndRefs(node) {
|
|
20
|
+
const symbols = [];
|
|
21
|
+
const refs = [];
|
|
22
|
+
const traverse = (n, container) => {
|
|
23
|
+
if (n.type === 'call_expression') {
|
|
24
|
+
const fn = n.childForFieldName('function');
|
|
25
|
+
const nameNode = this.getCallNameNode(fn);
|
|
26
|
+
if (nameNode)
|
|
27
|
+
(0, utils_1.pushRef)(refs, nameNode.text, 'call', nameNode);
|
|
28
|
+
}
|
|
29
|
+
else if (n.type === 'type_identifier') {
|
|
30
|
+
(0, utils_1.pushRef)(refs, n.text, 'type', n);
|
|
31
|
+
}
|
|
32
|
+
let currentContainer = container;
|
|
33
|
+
if (n.type === 'function_declaration') {
|
|
34
|
+
const nameNode = n.childForFieldName('name');
|
|
35
|
+
if (nameNode) {
|
|
36
|
+
const newSymbol = {
|
|
37
|
+
name: nameNode.text,
|
|
38
|
+
kind: 'function',
|
|
39
|
+
startLine: n.startPosition.row + 1,
|
|
40
|
+
endLine: n.endPosition.row + 1,
|
|
41
|
+
signature: this.getSignature(n),
|
|
42
|
+
container: container,
|
|
43
|
+
};
|
|
44
|
+
symbols.push(newSymbol);
|
|
45
|
+
currentContainer = newSymbol;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
else if (n.type === 'method_declaration') {
|
|
49
|
+
const nameNode = n.childForFieldName('name');
|
|
50
|
+
if (nameNode) {
|
|
51
|
+
const newSymbol = {
|
|
52
|
+
name: nameNode.text,
|
|
53
|
+
kind: 'method',
|
|
54
|
+
startLine: n.startPosition.row + 1,
|
|
55
|
+
endLine: n.endPosition.row + 1,
|
|
56
|
+
signature: this.getSignature(n),
|
|
57
|
+
container: container,
|
|
58
|
+
};
|
|
59
|
+
symbols.push(newSymbol);
|
|
60
|
+
currentContainer = newSymbol;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
else if (n.type === 'type_specifier') {
|
|
64
|
+
const nameNode = n.childForFieldName('name');
|
|
65
|
+
if (nameNode) {
|
|
66
|
+
const newSymbol = {
|
|
67
|
+
name: nameNode.text,
|
|
68
|
+
kind: 'class',
|
|
69
|
+
startLine: n.startPosition.row + 1,
|
|
70
|
+
endLine: n.endPosition.row + 1,
|
|
71
|
+
signature: `type ${nameNode.text}`,
|
|
72
|
+
container: container,
|
|
73
|
+
};
|
|
74
|
+
symbols.push(newSymbol);
|
|
75
|
+
currentContainer = newSymbol;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
for (let i = 0; i < n.childCount; i++)
|
|
79
|
+
traverse(n.child(i), currentContainer);
|
|
80
|
+
};
|
|
81
|
+
traverse(node, undefined);
|
|
82
|
+
return { symbols, refs };
|
|
83
|
+
}
|
|
84
|
+
getCallNameNode(node) {
|
|
85
|
+
if (!node)
|
|
86
|
+
return null;
|
|
87
|
+
if (node.type === 'identifier')
|
|
88
|
+
return node;
|
|
89
|
+
if (node.type === 'selector_expression') {
|
|
90
|
+
return node.childForFieldName('field');
|
|
91
|
+
}
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
getSignature(node) {
|
|
95
|
+
return node.text.split('{')[0].trim();
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
exports.GoAdapter = GoAdapter;
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.JavaAdapter = void 0;
|
|
7
|
+
const tree_sitter_java_1 = __importDefault(require("tree-sitter-java"));
|
|
8
|
+
const utils_1 = require("./utils");
|
|
9
|
+
class JavaAdapter {
|
|
10
|
+
getLanguageId() {
|
|
11
|
+
return 'java';
|
|
12
|
+
}
|
|
13
|
+
getTreeSitterLanguage() {
|
|
14
|
+
return tree_sitter_java_1.default;
|
|
15
|
+
}
|
|
16
|
+
getSupportedFileExtensions() {
|
|
17
|
+
return ['.java'];
|
|
18
|
+
}
|
|
19
|
+
extractSymbolsAndRefs(node) {
|
|
20
|
+
const symbols = [];
|
|
21
|
+
const refs = [];
|
|
22
|
+
const traverse = (n, container) => {
|
|
23
|
+
if (n.type === 'method_invocation') {
|
|
24
|
+
const nameNode = n.childForFieldName('name');
|
|
25
|
+
if (nameNode)
|
|
26
|
+
(0, utils_1.pushRef)(refs, nameNode.text, 'call', nameNode);
|
|
27
|
+
}
|
|
28
|
+
else if (n.type === 'object_creation_expression') {
|
|
29
|
+
const typeNode = (0, utils_1.findFirstByType)(n, ['type_identifier', 'identifier']);
|
|
30
|
+
if (typeNode)
|
|
31
|
+
(0, utils_1.pushRef)(refs, typeNode.text, 'new', typeNode);
|
|
32
|
+
}
|
|
33
|
+
let currentContainer = container;
|
|
34
|
+
if (n.type === 'method_declaration' || n.type === 'constructor_declaration') {
|
|
35
|
+
const nameNode = n.childForFieldName('name');
|
|
36
|
+
if (nameNode) {
|
|
37
|
+
const head = n.text.split('{')[0].split(';')[0].trim();
|
|
38
|
+
const newSymbol = {
|
|
39
|
+
name: nameNode.text,
|
|
40
|
+
kind: 'method',
|
|
41
|
+
startLine: n.startPosition.row + 1,
|
|
42
|
+
endLine: n.endPosition.row + 1,
|
|
43
|
+
signature: head,
|
|
44
|
+
container: container,
|
|
45
|
+
};
|
|
46
|
+
symbols.push(newSymbol);
|
|
47
|
+
currentContainer = newSymbol;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
else if (n.type === 'class_declaration'
|
|
51
|
+
|| n.type === 'interface_declaration'
|
|
52
|
+
|| n.type === 'enum_declaration'
|
|
53
|
+
|| n.type === 'record_declaration'
|
|
54
|
+
|| n.type === 'annotation_type_declaration') {
|
|
55
|
+
const nameNode = n.childForFieldName('name');
|
|
56
|
+
if (nameNode) {
|
|
57
|
+
const head = n.text.split('{')[0].split(';')[0].trim();
|
|
58
|
+
const heritage = (0, utils_1.parseHeritage)(head);
|
|
59
|
+
const classSym = {
|
|
60
|
+
name: nameNode.text,
|
|
61
|
+
kind: 'class',
|
|
62
|
+
startLine: n.startPosition.row + 1,
|
|
63
|
+
endLine: n.endPosition.row + 1,
|
|
64
|
+
signature: `${n.type.replace(/_declaration$/, '')} ${nameNode.text}`,
|
|
65
|
+
container,
|
|
66
|
+
extends: heritage.extends,
|
|
67
|
+
implements: heritage.implements,
|
|
68
|
+
};
|
|
69
|
+
symbols.push(classSym);
|
|
70
|
+
currentContainer = classSym;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
for (let i = 0; i < n.childCount; i++)
|
|
74
|
+
traverse(n.child(i), currentContainer);
|
|
75
|
+
};
|
|
76
|
+
traverse(node, undefined);
|
|
77
|
+
return { symbols, refs };
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
exports.JavaAdapter = JavaAdapter;
|