dev-mcp-server 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +68 -0
- package/README.md +333 -0
- package/cli.js +248 -0
- package/package.json +60 -0
- package/src/api/routes/ingest.js +69 -0
- package/src/api/routes/knowledge.js +65 -0
- package/src/api/routes/query.js +105 -0
- package/src/api/server.js +91 -0
- package/src/core/indexer.js +171 -0
- package/src/core/ingester.js +155 -0
- package/src/core/queryEngine.js +236 -0
- package/src/storage/store.js +125 -0
- package/src/utils/fileParser.js +183 -0
- package/src/utils/llmClient.js +206 -0
- package/src/utils/logger.js +28 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
const path = require('path');
|
|
2
|
+
const fs = require('fs');
|
|
3
|
+
const { glob } = require('glob');
|
|
4
|
+
const { parseFile, shouldSkip, FILE_TYPE_MAP } = require('../utils/fileParser');
|
|
5
|
+
const store = require('../storage/store');
|
|
6
|
+
const indexer = require('./indexer');
|
|
7
|
+
const logger = require('../utils/logger');
|
|
8
|
+
|
|
9
|
+
class Ingester {
|
|
10
|
+
async ingestFile(filePath) {
|
|
11
|
+
const absPath = path.resolve(filePath);
|
|
12
|
+
|
|
13
|
+
if (!fs.existsSync(absPath)) {
|
|
14
|
+
throw new Error(`File not found: ${absPath}`);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if (shouldSkip(absPath)) {
|
|
18
|
+
logger.warn(`Skipping: ${absPath}`);
|
|
19
|
+
return { skipped: true };
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const ext = path.extname(absPath).toLowerCase();
|
|
23
|
+
if (!FILE_TYPE_MAP[ext]) {
|
|
24
|
+
logger.warn(`Unsupported file type: ${ext}`);
|
|
25
|
+
return { skipped: true, reason: 'unsupported type' };
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const stat = fs.statSync(absPath);
|
|
29
|
+
if (stat.size > 500 * 1024) { // skip files > 500KB
|
|
30
|
+
logger.warn(`File too large (${(stat.size / 1024).toFixed(0)}KB): ${absPath}`);
|
|
31
|
+
return { skipped: true, reason: 'file too large' };
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
try {
|
|
35
|
+
store.removeByPath(absPath);
|
|
36
|
+
|
|
37
|
+
const chunks = parseFile(absPath);
|
|
38
|
+
if (chunks.length === 0) {
|
|
39
|
+
return { skipped: true, reason: 'empty file' };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const result = store.upsertDocs(chunks);
|
|
43
|
+
|
|
44
|
+
indexer.invalidate();
|
|
45
|
+
|
|
46
|
+
logger.info(`Ingested: ${path.basename(absPath)} (${chunks.length} chunk(s))`);
|
|
47
|
+
return { success: true, file: absPath, chunks: chunks.length, ...result };
|
|
48
|
+
|
|
49
|
+
} catch (err) {
|
|
50
|
+
logger.error(`Failed to ingest ${absPath}: ${err.message}`);
|
|
51
|
+
throw err;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async ingestDirectory(dirPath, options = {}) {
|
|
56
|
+
const absDir = path.resolve(dirPath);
|
|
57
|
+
|
|
58
|
+
if (!fs.existsSync(absDir)) {
|
|
59
|
+
throw new Error(`Directory not found: ${absDir}`);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const extensions = Object.keys(FILE_TYPE_MAP).map(e => e.slice(1));
|
|
63
|
+
const pattern = `**/*.{${extensions.join(',')}}`;
|
|
64
|
+
|
|
65
|
+
logger.info(`Scanning: ${absDir}`);
|
|
66
|
+
const files = await glob(pattern, {
|
|
67
|
+
cwd: absDir,
|
|
68
|
+
absolute: true,
|
|
69
|
+
ignore: [
|
|
70
|
+
'**/node_modules/**',
|
|
71
|
+
'**/.git/**',
|
|
72
|
+
'**/dist/**',
|
|
73
|
+
'**/build/**',
|
|
74
|
+
'**/coverage/**',
|
|
75
|
+
'**/*.min.js',
|
|
76
|
+
'**/package-lock.json',
|
|
77
|
+
'**/yarn.lock',
|
|
78
|
+
],
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
logger.info(`Found ${files.length} files to process`);
|
|
82
|
+
|
|
83
|
+
const results = {
|
|
84
|
+
total: files.length,
|
|
85
|
+
ingested: 0,
|
|
86
|
+
skipped: 0,
|
|
87
|
+
failed: 0,
|
|
88
|
+
totalChunks: 0,
|
|
89
|
+
errors: [],
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
for (const file of files) {
|
|
93
|
+
try {
|
|
94
|
+
const result = await this.ingestFile(file);
|
|
95
|
+
if (result.skipped) {
|
|
96
|
+
results.skipped++;
|
|
97
|
+
} else {
|
|
98
|
+
results.ingested++;
|
|
99
|
+
results.totalChunks += result.chunks || 0;
|
|
100
|
+
}
|
|
101
|
+
} catch (err) {
|
|
102
|
+
results.failed++;
|
|
103
|
+
results.errors.push({ file, error: err.message });
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if ((results.ingested + results.skipped) % 50 === 0) {
|
|
107
|
+
logger.info(`Progress: ${results.ingested + results.skipped}/${files.length}`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const docCount = indexer.build();
|
|
112
|
+
logger.info(`Index rebuilt with ${docCount} documents`);
|
|
113
|
+
|
|
114
|
+
return results;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
async ingestRawText(content, options = {}) {
|
|
118
|
+
const {
|
|
119
|
+
kind = 'documentation',
|
|
120
|
+
label = 'manual-entry',
|
|
121
|
+
tags = [],
|
|
122
|
+
} = options;
|
|
123
|
+
|
|
124
|
+
if (!content || content.trim().length === 0) {
|
|
125
|
+
throw new Error('Content cannot be empty');
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const id = `raw::${label}::${Date.now()}`;
|
|
129
|
+
const doc = {
|
|
130
|
+
id,
|
|
131
|
+
filePath: `raw://${label}`,
|
|
132
|
+
filename: label,
|
|
133
|
+
ext: '.txt',
|
|
134
|
+
kind,
|
|
135
|
+
chunkIndex: 0,
|
|
136
|
+
totalChunks: 1,
|
|
137
|
+
content: content.trim(),
|
|
138
|
+
lines: content.split('\n').length,
|
|
139
|
+
ingestedAt: new Date().toISOString(),
|
|
140
|
+
metadata: {
|
|
141
|
+
isRaw: true,
|
|
142
|
+
tags,
|
|
143
|
+
label,
|
|
144
|
+
},
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
store.upsertDocs([doc]);
|
|
148
|
+
indexer.invalidate();
|
|
149
|
+
|
|
150
|
+
logger.info(`Ingested raw text: "${label}" (${content.length} chars)`);
|
|
151
|
+
return { success: true, id, kind, label };
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
module.exports = new Ingester();
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const llmClient = require('../utils/llmClient');
|
|
4
|
+
const indexer = require('./indexer');
|
|
5
|
+
const logger = require('../utils/logger');
|
|
6
|
+
|
|
7
|
+
const QUERY_MODES = {
|
|
8
|
+
DEBUG: 'debug', // "Why is this failing?"
|
|
9
|
+
USAGE: 'usage', // "Where is this used?"
|
|
10
|
+
IMPACT: 'impact', // "If I change this, what breaks?"
|
|
11
|
+
GENERAL: 'general', // Open-ended question
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
function detectMode(question) {
|
|
15
|
+
const q = question.toLowerCase();
|
|
16
|
+
|
|
17
|
+
if (/why.*(fail|error|crash|break|throw|exception|not work)/i.test(q) ||
|
|
18
|
+
/what.*(error|exception|wrong|cause|happen)/i.test(q) ||
|
|
19
|
+
/debug|trace|stack|exception|ClassCastException|NullPointer|TypeError/i.test(q)) {
|
|
20
|
+
return QUERY_MODES.DEBUG;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (/where.*(use|call|reference|import|depend)|who.*(use|call)|find.*(usage|reference)/i.test(q)) {
|
|
24
|
+
return QUERY_MODES.USAGE;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (/if.*change|what.*break|impact|affect|depend|side effect|ripple|downstream/i.test(q)) {
|
|
28
|
+
return QUERY_MODES.IMPACT;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return QUERY_MODES.GENERAL;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function buildSystemPrompt(mode) {
|
|
35
|
+
const base = `You are an expert developer assistant with deep knowledge of the codebase.
|
|
36
|
+
You answer questions ONLY based on the code context provided — never guess or hallucinate.
|
|
37
|
+
If the context doesn't contain enough information, say so clearly.
|
|
38
|
+
Be concise, direct, and developer-friendly. Use code examples from the context when relevant.`;
|
|
39
|
+
|
|
40
|
+
const modeInstructions = {
|
|
41
|
+
[QUERY_MODES.DEBUG]: `
|
|
42
|
+
Your job: DIAGNOSE the root cause of bugs and errors.
|
|
43
|
+
- Identify the exact type mismatch, null reference, or logic flaw
|
|
44
|
+
- Trace the execution flow that leads to the error
|
|
45
|
+
- Point to the specific file, function, and line range where the issue originates
|
|
46
|
+
- Provide a concrete fix with code
|
|
47
|
+
Format: Root Cause → Affected Flow → Fix`,
|
|
48
|
+
|
|
49
|
+
[QUERY_MODES.USAGE]: `
|
|
50
|
+
Your job: FIND all usages of a function, class, module, or variable.
|
|
51
|
+
- List every file where it is imported or referenced
|
|
52
|
+
- Explain HOW it is used in each context (called directly, passed as callback, extended, etc.)
|
|
53
|
+
- Note any patterns or inconsistencies in usage
|
|
54
|
+
Format: Summary → File-by-file breakdown`,
|
|
55
|
+
|
|
56
|
+
[QUERY_MODES.IMPACT]: `
|
|
57
|
+
Your job: ANALYSE what would break or change if the target is modified.
|
|
58
|
+
- List all files/modules that directly depend on it
|
|
59
|
+
- Identify indirect dependencies (things that use things that use it)
|
|
60
|
+
- Flag any risky or tightly-coupled areas
|
|
61
|
+
- Suggest safe modification strategies
|
|
62
|
+
Format: Direct Impact → Indirect Impact → Risk Level → Safe change strategy`,
|
|
63
|
+
|
|
64
|
+
[QUERY_MODES.GENERAL]: `
|
|
65
|
+
Your job: Answer the developer's question using the codebase context.
|
|
66
|
+
- Be specific and cite the relevant files/functions
|
|
67
|
+
- If the answer spans multiple files, connect the dots
|
|
68
|
+
- If something is unclear in the code, flag it`,
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
return base + (modeInstructions[mode] || modeInstructions[QUERY_MODES.GENERAL]);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function formatContext(docs) {
|
|
75
|
+
if (!docs || docs.length === 0) {
|
|
76
|
+
return 'No relevant context found in the codebase index.';
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return docs
|
|
80
|
+
.map((doc, i) => {
|
|
81
|
+
const meta = doc.metadata || {};
|
|
82
|
+
const metaSummary = [
|
|
83
|
+
meta.functions?.length ? `Functions: ${meta.functions.slice(0, 5).join(', ')}` : null,
|
|
84
|
+
meta.classes?.length ? `Classes: ${meta.classes.join(', ')}` : null,
|
|
85
|
+
meta.imports?.length ? `Imports: ${meta.imports.slice(0, 3).join(', ')}` : null,
|
|
86
|
+
meta.errors?.length ? `Errors: ${meta.errors.slice(0, 3).join(', ')}` : null,
|
|
87
|
+
meta.patterns?.length ? `Patterns: ${meta.patterns.join(', ')}` : null,
|
|
88
|
+
]
|
|
89
|
+
.filter(Boolean)
|
|
90
|
+
.join(' | ');
|
|
91
|
+
|
|
92
|
+
return `--- [${i + 1}] ${doc.filename} (${doc.kind}) | Score: ${doc.relevanceScore} ---
|
|
93
|
+
Path: ${doc.filePath}
|
|
94
|
+
${metaSummary ? `Meta: ${metaSummary}` : ''}
|
|
95
|
+
\`\`\`
|
|
96
|
+
${doc.content}
|
|
97
|
+
\`\`\``;
|
|
98
|
+
})
|
|
99
|
+
.join('\n\n');
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
class QueryEngine {
|
|
103
|
+
async query(question, options = {}) {
|
|
104
|
+
const {
|
|
105
|
+
mode: forcedMode,
|
|
106
|
+
topK = 8,
|
|
107
|
+
stream = false,
|
|
108
|
+
filter = {},
|
|
109
|
+
} = options;
|
|
110
|
+
|
|
111
|
+
if (!question || question.trim().length === 0) {
|
|
112
|
+
throw new Error('Question cannot be empty');
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const mode = forcedMode || detectMode(question);
|
|
116
|
+
logger.info(`Query mode: ${mode} | Q: "${question.slice(0, 80)}..."`);
|
|
117
|
+
|
|
118
|
+
let docs;
|
|
119
|
+
switch (mode) {
|
|
120
|
+
case QUERY_MODES.DEBUG:
|
|
121
|
+
docs = indexer.searchForErrors(question, topK);
|
|
122
|
+
break;
|
|
123
|
+
case QUERY_MODES.USAGE: {
|
|
124
|
+
const usageMatch = question.match(/(?:where|how|who).*?(?:is|are|does)?\s+[`"']?(\w+)[`"']?\s+(?:used|called|import|reference)/i);
|
|
125
|
+
const symbol = usageMatch?.[1] || question;
|
|
126
|
+
docs = indexer.searchForUsages(symbol, topK);
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
case QUERY_MODES.IMPACT: {
|
|
130
|
+
const impactMatch = question.match(/(?:change|modify|update|refactor)\s+[`"']?(\w+)[`"']?/i);
|
|
131
|
+
const target = impactMatch?.[1] || question;
|
|
132
|
+
docs = indexer.searchForImpact(target, topK);
|
|
133
|
+
break;
|
|
134
|
+
}
|
|
135
|
+
default:
|
|
136
|
+
docs = indexer.search(question, topK, filter);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const contextText = formatContext(docs);
|
|
140
|
+
const systemPrompt = buildSystemPrompt(mode);
|
|
141
|
+
|
|
142
|
+
const userMessage = `## Developer Question
|
|
143
|
+
${question}
|
|
144
|
+
|
|
145
|
+
## Codebase Context (retrieved from your system)
|
|
146
|
+
${contextText}
|
|
147
|
+
|
|
148
|
+
## Answer`;
|
|
149
|
+
|
|
150
|
+
logger.info(`Sending to ${llmClient.label()}: ${docs.length} context chunks`);
|
|
151
|
+
|
|
152
|
+
if (stream) {
|
|
153
|
+
return this._streamQuery(systemPrompt, userMessage, docs, mode);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const response = await llmClient.createMessage({
|
|
157
|
+
maxTokens: 2000,
|
|
158
|
+
system: systemPrompt,
|
|
159
|
+
messages: [{ role: 'user', content: userMessage }],
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
const answer = response.content[0].text;
|
|
163
|
+
|
|
164
|
+
return {
|
|
165
|
+
answer,
|
|
166
|
+
mode,
|
|
167
|
+
provider: llmClient.label(),
|
|
168
|
+
sources: docs.map(d => ({
|
|
169
|
+
file: d.filename,
|
|
170
|
+
path: d.filePath,
|
|
171
|
+
kind: d.kind,
|
|
172
|
+
relevanceScore: d.relevanceScore,
|
|
173
|
+
functions: d.metadata?.functions?.slice(0, 5) || [],
|
|
174
|
+
})),
|
|
175
|
+
usage: {
|
|
176
|
+
inputTokens: response.usage.input_tokens,
|
|
177
|
+
outputTokens: response.usage.output_tokens,
|
|
178
|
+
},
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async *_streamQuery(systemPrompt, userMessage, docs, mode) {
|
|
183
|
+
const stream = await llmClient.createMessage({
|
|
184
|
+
maxTokens: 2000,
|
|
185
|
+
system: systemPrompt,
|
|
186
|
+
messages: [{ role: 'user', content: userMessage }],
|
|
187
|
+
stream: true,
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
yield {
|
|
191
|
+
type: 'metadata',
|
|
192
|
+
mode,
|
|
193
|
+
provider: llmClient.label(),
|
|
194
|
+
sources: docs.map(d => ({
|
|
195
|
+
file: d.filename,
|
|
196
|
+
path: d.filePath,
|
|
197
|
+
kind: d.kind,
|
|
198
|
+
relevanceScore: d.relevanceScore,
|
|
199
|
+
})),
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
for await (const event of stream) {
|
|
203
|
+
if (event.type === 'content_block_delta' && event.delta.type === 'text_delta') {
|
|
204
|
+
yield { type: 'text', text: event.delta.text };
|
|
205
|
+
}
|
|
206
|
+
if (event.type === 'message_stop') {
|
|
207
|
+
yield { type: 'done' };
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
async debugError(errorMessage, stackTrace = '', options = {}) {
|
|
213
|
+
const question = `Why is this error happening and how do I fix it?
|
|
214
|
+
Error: ${errorMessage}
|
|
215
|
+
${stackTrace ? `Stack trace:\n${stackTrace}` : ''}`;
|
|
216
|
+
|
|
217
|
+
return this.query(question, { ...options, mode: QUERY_MODES.DEBUG });
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
async findUsages(symbol, options = {}) {
|
|
221
|
+
return this.query(`Where is ${symbol} used across the codebase?`, {
|
|
222
|
+
...options,
|
|
223
|
+
mode: QUERY_MODES.USAGE,
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
async analyzeImpact(target, changeDescription = '', options = {}) {
|
|
228
|
+
const question = changeDescription
|
|
229
|
+
? `If I change ${target} to ${changeDescription}, what would break or be affected?`
|
|
230
|
+
: `If I change or remove ${target}, what would break?`;
|
|
231
|
+
|
|
232
|
+
return this.query(question, { ...options, mode: QUERY_MODES.IMPACT });
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
module.exports = { QueryEngine: new QueryEngine(), QUERY_MODES, detectMode };
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const logger = require('../utils/logger');
|
|
4
|
+
|
|
5
|
+
const DATA_DIR = path.join(process.cwd(), 'data');
|
|
6
|
+
const INDEX_FILE = path.join(DATA_DIR, 'index.json');
|
|
7
|
+
const META_FILE = path.join(DATA_DIR, 'meta.json');
|
|
8
|
+
|
|
9
|
+
class Store {
|
|
10
|
+
constructor() {
|
|
11
|
+
this._ensureDataDir();
|
|
12
|
+
this.index = this._load(INDEX_FILE, []);
|
|
13
|
+
this.meta = this._load(META_FILE, {
|
|
14
|
+
totalDocs: 0,
|
|
15
|
+
totalFiles: 0,
|
|
16
|
+
lastIngested: null,
|
|
17
|
+
fileTypes: {},
|
|
18
|
+
tags: [],
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
_ensureDataDir() {
|
|
23
|
+
if (!fs.existsSync(DATA_DIR)) {
|
|
24
|
+
fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
25
|
+
}
|
|
26
|
+
const logsDir = path.join(process.cwd(), 'logs');
|
|
27
|
+
if (!fs.existsSync(logsDir)) {
|
|
28
|
+
fs.mkdirSync(logsDir, { recursive: true });
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
_load(file, defaultVal) {
|
|
33
|
+
try {
|
|
34
|
+
if (fs.existsSync(file)) {
|
|
35
|
+
return JSON.parse(fs.readFileSync(file, 'utf-8'));
|
|
36
|
+
}
|
|
37
|
+
} catch (e) {
|
|
38
|
+
logger.warn(`Could not load ${file}: ${e.message}`);
|
|
39
|
+
}
|
|
40
|
+
return defaultVal;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
_save() {
|
|
44
|
+
fs.writeFileSync(INDEX_FILE, JSON.stringify(this.index, null, 2));
|
|
45
|
+
fs.writeFileSync(META_FILE, JSON.stringify(this.meta, null, 2));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
upsertDocs(docs) {
|
|
49
|
+
let added = 0;
|
|
50
|
+
let updated = 0;
|
|
51
|
+
|
|
52
|
+
for (const doc of docs) {
|
|
53
|
+
const existingIdx = this.index.findIndex(d => d.id === doc.id);
|
|
54
|
+
if (existingIdx >= 0) {
|
|
55
|
+
this.index[existingIdx] = doc;
|
|
56
|
+
updated++;
|
|
57
|
+
} else {
|
|
58
|
+
this.index.push(doc);
|
|
59
|
+
added++;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
this._rebuildMeta();
|
|
64
|
+
this._save();
|
|
65
|
+
return { added, updated };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
removeByPath(filePath) {
|
|
69
|
+
const before = this.index.length;
|
|
70
|
+
this.index = this.index.filter(d => d.filePath !== filePath);
|
|
71
|
+
const removed = before - this.index.length;
|
|
72
|
+
this._rebuildMeta();
|
|
73
|
+
this._save();
|
|
74
|
+
return removed;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
getAll() {
|
|
78
|
+
return this.index;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
getByKind(kind) {
|
|
82
|
+
return this.index.filter(d => d.kind === kind);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
getIngestedFiles() {
|
|
86
|
+
return [...new Set(this.index.map(d => d.filePath))];
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
clear() {
|
|
90
|
+
this.index = [];
|
|
91
|
+
this.meta = {
|
|
92
|
+
totalDocs: 0,
|
|
93
|
+
totalFiles: 0,
|
|
94
|
+
lastIngested: null,
|
|
95
|
+
fileTypes: {},
|
|
96
|
+
tags: [],
|
|
97
|
+
};
|
|
98
|
+
this._save();
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
getStats() {
|
|
102
|
+
return {
|
|
103
|
+
...this.meta,
|
|
104
|
+
indexSize: this.index.length,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
_rebuildMeta() {
|
|
109
|
+
const files = new Set(this.index.map(d => d.filePath));
|
|
110
|
+
const fileTypes = {};
|
|
111
|
+
for (const doc of this.index) {
|
|
112
|
+
fileTypes[doc.kind] = (fileTypes[doc.kind] || 0) + 1;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
this.meta = {
|
|
116
|
+
totalDocs: this.index.length,
|
|
117
|
+
totalFiles: files.size,
|
|
118
|
+
lastIngested: new Date().toISOString(),
|
|
119
|
+
fileTypes,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const store = new Store();
|
|
125
|
+
module.exports = store;
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
|
|
4
|
+
const FILE_TYPE_MAP = {
|
|
5
|
+
'.js': 'code',
|
|
6
|
+
'.ts': 'code',
|
|
7
|
+
'.jsx': 'code',
|
|
8
|
+
'.tsx': 'code',
|
|
9
|
+
'.mjs': 'code',
|
|
10
|
+
'.cjs': 'code',
|
|
11
|
+
'.py': 'code',
|
|
12
|
+
'.java': 'code',
|
|
13
|
+
'.go': 'code',
|
|
14
|
+
'.rb': 'code',
|
|
15
|
+
'.php': 'code',
|
|
16
|
+
'.cs': 'code',
|
|
17
|
+
'.cpp': 'code',
|
|
18
|
+
'.c': 'code',
|
|
19
|
+
'.rs': 'code',
|
|
20
|
+
'.json': 'config',
|
|
21
|
+
'.yaml': 'config',
|
|
22
|
+
'.yml': 'config',
|
|
23
|
+
'.env': 'config',
|
|
24
|
+
'.toml': 'config',
|
|
25
|
+
'.xml': 'config',
|
|
26
|
+
'.md': 'documentation',
|
|
27
|
+
'.txt': 'documentation',
|
|
28
|
+
'.log': 'log',
|
|
29
|
+
'.sql': 'schema',
|
|
30
|
+
'.graphql': 'schema',
|
|
31
|
+
'.gql': 'schema',
|
|
32
|
+
'.sh': 'script',
|
|
33
|
+
'.bash': 'script',
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const CHUNK_SIZE = 1500; // characters per chunk
|
|
37
|
+
const CHUNK_OVERLAP = 200; // overlap between chunks
|
|
38
|
+
|
|
39
|
+
function parseFile(filePath) {
|
|
40
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
41
|
+
const kind = FILE_TYPE_MAP[ext] || 'unknown';
|
|
42
|
+
const filename = path.basename(filePath);
|
|
43
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
44
|
+
|
|
45
|
+
if (!content.trim()) return [];
|
|
46
|
+
|
|
47
|
+
const chunks = chunkContent(content, filePath);
|
|
48
|
+
|
|
49
|
+
return chunks.map((chunk, idx) => ({
|
|
50
|
+
id: `${filePath}::chunk${idx}`,
|
|
51
|
+
filePath,
|
|
52
|
+
filename,
|
|
53
|
+
ext,
|
|
54
|
+
kind,
|
|
55
|
+
chunkIndex: idx,
|
|
56
|
+
totalChunks: chunks.length,
|
|
57
|
+
content: chunk,
|
|
58
|
+
lines: countLines(chunk),
|
|
59
|
+
ingestedAt: new Date().toISOString(),
|
|
60
|
+
metadata: extractMetadata(chunk, kind, filename),
|
|
61
|
+
}));
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function chunkContent(content, filePath) {
|
|
65
|
+
const lines = content.split('\n');
|
|
66
|
+
|
|
67
|
+
if (content.length <= CHUNK_SIZE) return [content];
|
|
68
|
+
|
|
69
|
+
const chunks = [];
|
|
70
|
+
let current = [];
|
|
71
|
+
let currentLen = 0;
|
|
72
|
+
|
|
73
|
+
for (const line of lines) {
|
|
74
|
+
current.push(line);
|
|
75
|
+
currentLen += line.length + 1;
|
|
76
|
+
|
|
77
|
+
if (currentLen >= CHUNK_SIZE) {
|
|
78
|
+
chunks.push(current.join('\n'));
|
|
79
|
+
const overlapLines = [];
|
|
80
|
+
let overlapLen = 0;
|
|
81
|
+
for (let i = current.length - 1; i >= 0 && overlapLen < CHUNK_OVERLAP; i--) {
|
|
82
|
+
overlapLines.unshift(current[i]);
|
|
83
|
+
overlapLen += current[i].length + 1;
|
|
84
|
+
}
|
|
85
|
+
current = overlapLines;
|
|
86
|
+
currentLen = overlapLen;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (current.length > 0) {
|
|
91
|
+
chunks.push(current.join('\n'));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return chunks;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function extractMetadata(content, kind, filename) {
|
|
98
|
+
const meta = {};
|
|
99
|
+
|
|
100
|
+
if (kind === 'code') {
|
|
101
|
+
const functions = [];
|
|
102
|
+
const classes = [];
|
|
103
|
+
const imports = [];
|
|
104
|
+
const exports = [];
|
|
105
|
+
|
|
106
|
+
const fnMatches = content.matchAll(/(?:function\s+(\w+)|const\s+(\w+)\s*=\s*(?:async\s*)?\(|(\w+)\s*\([^)]*\)\s*{)/g);
|
|
107
|
+
for (const m of fnMatches) {
|
|
108
|
+
const name = m[1] || m[2] || m[3];
|
|
109
|
+
if (name && !['if', 'for', 'while', 'switch', 'catch'].includes(name)) {
|
|
110
|
+
functions.push(name);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const classMatches = content.matchAll(/class\s+(\w+)/g);
|
|
115
|
+
for (const m of classMatches) classes.push(m[1]);
|
|
116
|
+
|
|
117
|
+
const importMatches = content.matchAll(/(?:import|require)\s*(?:\{[^}]+\}|[\w*]+)?\s*(?:from)?\s*['"]([^'"]+)['"]/g);
|
|
118
|
+
for (const m of importMatches) imports.push(m[1]);
|
|
119
|
+
|
|
120
|
+
const exportMatches = content.matchAll(/export\s+(?:default\s+)?(?:function|class|const|let|var)?\s*(\w+)/g);
|
|
121
|
+
for (const m of exportMatches) exports.push(m[1]);
|
|
122
|
+
|
|
123
|
+
const errors = [];
|
|
124
|
+
const errorMatches = content.matchAll(/(?:catch|throw|Error|Exception)\s*[(\s]*([A-Z]\w+(?:Error|Exception))/g);
|
|
125
|
+
for (const m of errorMatches) errors.push(m[1]);
|
|
126
|
+
|
|
127
|
+
meta.functions = [...new Set(functions)].slice(0, 20);
|
|
128
|
+
meta.classes = [...new Set(classes)].slice(0, 10);
|
|
129
|
+
meta.imports = [...new Set(imports)].slice(0, 20);
|
|
130
|
+
meta.exports = [...new Set(exports)].slice(0, 10);
|
|
131
|
+
meta.errors = [...new Set(errors)].slice(0, 10);
|
|
132
|
+
|
|
133
|
+
const apiPatterns = [];
|
|
134
|
+
if (/express|router\.(get|post|put|delete|patch)/i.test(content)) apiPatterns.push('REST API');
|
|
135
|
+
if (/graphql|resolver|schema/i.test(content)) apiPatterns.push('GraphQL');
|
|
136
|
+
if (/mongoose|sequelize|prisma|typeorm/i.test(content)) apiPatterns.push('ORM');
|
|
137
|
+
if (/redis|ioredis|bull/i.test(content)) apiPatterns.push('Cache/Queue');
|
|
138
|
+
if (/jwt|passport|bcrypt/i.test(content)) apiPatterns.push('Auth');
|
|
139
|
+
meta.patterns = apiPatterns;
|
|
140
|
+
|
|
141
|
+
} else if (kind === 'log') {
|
|
142
|
+
const errorLines = content
|
|
143
|
+
.split('\n')
|
|
144
|
+
.filter(l => /error|exception|warn|fail|crash/i.test(l))
|
|
145
|
+
.slice(0, 10);
|
|
146
|
+
meta.errors = errorLines;
|
|
147
|
+
|
|
148
|
+
const hasTimestamps = /\d{4}-\d{2}-\d{2}|\d{2}\/\d{2}\/\d{4}/.test(content);
|
|
149
|
+
meta.hasTimestamps = hasTimestamps;
|
|
150
|
+
|
|
151
|
+
} else if (kind === 'schema') {
|
|
152
|
+
const tables = [];
|
|
153
|
+
const tableMatches = content.matchAll(/(?:CREATE\s+TABLE|model\s+|type\s+)["'`]?(\w+)["'`]?/gi);
|
|
154
|
+
for (const m of tableMatches) tables.push(m[1]);
|
|
155
|
+
meta.tables = [...new Set(tables)].slice(0, 20);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
meta.isBugFix = /fix|bug|patch|resolve|hotfix|issue/i.test(filename) ||
|
|
159
|
+
/TODO|FIXME|HACK|BUG|XXX/.test(content);
|
|
160
|
+
|
|
161
|
+
return meta;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function countLines(text) {
|
|
165
|
+
return text.split('\n').length;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function shouldSkip(filePath) {
|
|
169
|
+
const skipPatterns = [
|
|
170
|
+
/node_modules/,
|
|
171
|
+
/\.git\//,
|
|
172
|
+
/dist\//,
|
|
173
|
+
/build\//,
|
|
174
|
+
/coverage\//,
|
|
175
|
+
/\.min\.(js|css)$/,
|
|
176
|
+
/package-lock\.json$/,
|
|
177
|
+
/yarn\.lock$/,
|
|
178
|
+
/\.map$/,
|
|
179
|
+
];
|
|
180
|
+
return skipPatterns.some(p => p.test(filePath));
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
module.exports = { parseFile, shouldSkip, FILE_TYPE_MAP };
|