@ace-sdk/cli 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -0
- package/dist/cli.d.ts +14 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +427 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/bootstrap.d.ts +19 -0
- package/dist/commands/bootstrap.d.ts.map +1 -0
- package/dist/commands/bootstrap.js +157 -0
- package/dist/commands/bootstrap.js.map +1 -0
- package/dist/commands/cache.d.ts +19 -0
- package/dist/commands/cache.d.ts.map +1 -0
- package/dist/commands/cache.js +101 -0
- package/dist/commands/cache.js.map +1 -0
- package/dist/commands/clear.d.ts +12 -0
- package/dist/commands/clear.d.ts.map +1 -0
- package/dist/commands/clear.js +50 -0
- package/dist/commands/clear.js.map +1 -0
- package/dist/commands/config.d.ts +34 -0
- package/dist/commands/config.d.ts.map +1 -0
- package/dist/commands/config.js +423 -0
- package/dist/commands/config.js.map +1 -0
- package/dist/commands/delta.d.ts +14 -0
- package/dist/commands/delta.d.ts.map +1 -0
- package/dist/commands/delta.js +140 -0
- package/dist/commands/delta.js.map +1 -0
- package/dist/commands/doctor.d.ts +8 -0
- package/dist/commands/doctor.d.ts.map +1 -0
- package/dist/commands/doctor.js +187 -0
- package/dist/commands/doctor.js.map +1 -0
- package/dist/commands/export.d.ts +12 -0
- package/dist/commands/export.d.ts.map +1 -0
- package/dist/commands/export.js +45 -0
- package/dist/commands/export.js.map +1 -0
- package/dist/commands/import.d.ts +13 -0
- package/dist/commands/import.d.ts.map +1 -0
- package/dist/commands/import.js +87 -0
- package/dist/commands/import.js.map +1 -0
- package/dist/commands/learn.d.ts +17 -0
- package/dist/commands/learn.d.ts.map +1 -0
- package/dist/commands/learn.js +193 -0
- package/dist/commands/learn.js.map +1 -0
- package/dist/commands/patterns.d.ts +13 -0
- package/dist/commands/patterns.d.ts.map +1 -0
- package/dist/commands/patterns.js +76 -0
- package/dist/commands/patterns.js.map +1 -0
- package/dist/commands/plugin.d.ts +26 -0
- package/dist/commands/plugin.d.ts.map +1 -0
- package/dist/commands/plugin.js +267 -0
- package/dist/commands/plugin.js.map +1 -0
- package/dist/commands/projects.d.ts +15 -0
- package/dist/commands/projects.d.ts.map +1 -0
- package/dist/commands/projects.js +122 -0
- package/dist/commands/projects.js.map +1 -0
- package/dist/commands/record.d.ts +32 -0
- package/dist/commands/record.d.ts.map +1 -0
- package/dist/commands/record.js +307 -0
- package/dist/commands/record.js.map +1 -0
- package/dist/commands/search.d.ts +16 -0
- package/dist/commands/search.d.ts.map +1 -0
- package/dist/commands/search.js +125 -0
- package/dist/commands/search.js.map +1 -0
- package/dist/commands/status.d.ts +8 -0
- package/dist/commands/status.d.ts.map +1 -0
- package/dist/commands/status.js +63 -0
- package/dist/commands/status.js.map +1 -0
- package/dist/commands/summarize.d.ts +17 -0
- package/dist/commands/summarize.d.ts.map +1 -0
- package/dist/commands/summarize.js +167 -0
- package/dist/commands/summarize.js.map +1 -0
- package/dist/commands/top.d.ts +14 -0
- package/dist/commands/top.d.ts.map +1 -0
- package/dist/commands/top.js +58 -0
- package/dist/commands/top.js.map +1 -0
- package/dist/commands/tune.d.ts +36 -0
- package/dist/commands/tune.d.ts.map +1 -0
- package/dist/commands/tune.js +354 -0
- package/dist/commands/tune.js.map +1 -0
- package/dist/formatters/playbook-formatter.d.ts +19 -0
- package/dist/formatters/playbook-formatter.d.ts.map +1 -0
- package/dist/formatters/playbook-formatter.js +56 -0
- package/dist/formatters/playbook-formatter.js.map +1 -0
- package/dist/formatters/search-formatter.d.ts +28 -0
- package/dist/formatters/search-formatter.d.ts.map +1 -0
- package/dist/formatters/search-formatter.js +48 -0
- package/dist/formatters/search-formatter.js.map +1 -0
- package/dist/formatters/status-formatter.d.ts +25 -0
- package/dist/formatters/status-formatter.d.ts.map +1 -0
- package/dist/formatters/status-formatter.js +46 -0
- package/dist/formatters/status-formatter.js.map +1 -0
- package/dist/services/config-loader.d.ts +8 -0
- package/dist/services/config-loader.d.ts.map +1 -0
- package/dist/services/config-loader.js +7 -0
- package/dist/services/config-loader.js.map +1 -0
- package/dist/services/initialization.d.ts +128 -0
- package/dist/services/initialization.d.ts.map +1 -0
- package/dist/services/initialization.js +874 -0
- package/dist/services/initialization.js.map +1 -0
- package/dist/services/language-detector.d.ts +63 -0
- package/dist/services/language-detector.d.ts.map +1 -0
- package/dist/services/language-detector.js +123 -0
- package/dist/services/language-detector.js.map +1 -0
- package/dist/services/logger.d.ts +97 -0
- package/dist/services/logger.d.ts.map +1 -0
- package/dist/services/logger.js +229 -0
- package/dist/services/logger.js.map +1 -0
- package/dist/services/plugin-loader.d.ts +84 -0
- package/dist/services/plugin-loader.d.ts.map +1 -0
- package/dist/services/plugin-loader.js +282 -0
- package/dist/services/plugin-loader.js.map +1 -0
- package/dist/services/recorder.d.ts +80 -0
- package/dist/services/recorder.d.ts.map +1 -0
- package/dist/services/recorder.js +267 -0
- package/dist/services/recorder.js.map +1 -0
- package/dist/services/server-client.d.ts +32 -0
- package/dist/services/server-client.d.ts.map +1 -0
- package/dist/services/server-client.js +68 -0
- package/dist/services/server-client.js.map +1 -0
- package/dist/services/session-storage.d.ts +20 -0
- package/dist/services/session-storage.d.ts.map +1 -0
- package/dist/services/session-storage.js +29 -0
- package/dist/services/session-storage.js.map +1 -0
- package/dist/types/config.d.ts +33 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +63 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/pattern.d.ts +8 -0
- package/dist/types/pattern.d.ts.map +1 -0
- package/dist/types/pattern.js +7 -0
- package/dist/types/pattern.js.map +1 -0
- package/dist/types/plugin.d.ts +87 -0
- package/dist/types/plugin.d.ts.map +1 -0
- package/dist/types/plugin.js +5 -0
- package/dist/types/plugin.js.map +1 -0
- package/dist/types/recorder.d.ts +44 -0
- package/dist/types/recorder.d.ts.map +1 -0
- package/dist/types/recorder.js +5 -0
- package/dist/types/recorder.js.map +1 -0
- package/dist/utils/code-extractor.d.ts +8 -0
- package/dist/utils/code-extractor.d.ts.map +1 -0
- package/dist/utils/code-extractor.js +7 -0
- package/dist/utils/code-extractor.js.map +1 -0
- package/dist/utils/semver.d.ts +13 -0
- package/dist/utils/semver.d.ts.map +1 -0
- package/dist/utils/semver.js +14 -0
- package/dist/utils/semver.js.map +1 -0
- package/dist/utils/version-checker.d.ts +27 -0
- package/dist/utils/version-checker.d.ts.map +1 -0
- package/dist/utils/version-checker.js +141 -0
- package/dist/utils/version-checker.js.map +1 -0
- package/package.json +64 -0
|
@@ -0,0 +1,874 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* InitializationService - Offline learning from existing codebase
|
|
3
|
+
*
|
|
4
|
+
* Implements ACE Paper Section 4.1: Offline Adaptation
|
|
5
|
+
* Analyzes git history and existing code to build initial playbook
|
|
6
|
+
*/
|
|
7
|
+
import { exec } from 'child_process';
|
|
8
|
+
import { promisify } from 'util';
|
|
9
|
+
import { LanguageDetector } from './language-detector.js';
|
|
10
|
+
const execAsync = promisify(exec);
|
|
11
|
+
export class InitializationService {
|
|
12
|
+
languageDetector;
|
|
13
|
+
constructor() {
|
|
14
|
+
this.languageDetector = new LanguageDetector();
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Initialize playbook from existing codebase
|
|
18
|
+
*
|
|
19
|
+
* SUPPORTS MULTIPLE MODES:
|
|
20
|
+
* - hybrid: Docs + Git + Local files (NEW DEFAULT - intelligent fallback)
|
|
21
|
+
* - both: Git + Local files (legacy behavior)
|
|
22
|
+
* - docs-only: Only documentation files
|
|
23
|
+
* - git-history: Only git commits
|
|
24
|
+
* - local-files: Only source files
|
|
25
|
+
*
|
|
26
|
+
* USES LINGUIST: Automatically detects ALL programming languages (no hardcoded extensions!)
|
|
27
|
+
*/
|
|
28
|
+
async initializeFromCodebase(repoPath, options = {}) {
|
|
29
|
+
const { mode = 'hybrid', commitLimit = 500, // NEW DEFAULT: 500 (was 100)
|
|
30
|
+
daysBack = 90, // NEW DEFAULT: 90 (was 30)
|
|
31
|
+
maxFiles = 5000, // NEW DEFAULT: 5000 (was implicit 1000)
|
|
32
|
+
// filePatterns used by legacy analyzeSourceFiles fallback (hardcoded in that method)
|
|
33
|
+
filePatterns: _filePatterns = ['*.ts', '*.js', '*.py', '*.java', '*.go', '*.tsx', '*.jsx'], // eslint-disable-line @typescript-eslint/no-unused-vars
|
|
34
|
+
skipPatterns = ['merge', 'wip', 'temp', 'revert'] } = options;
|
|
35
|
+
console.error('📚 Analyzing codebase for offline initialization...');
|
|
36
|
+
console.error(` Repo: ${repoPath}`);
|
|
37
|
+
console.error(` Mode: ${mode}`);
|
|
38
|
+
const allPatterns = [];
|
|
39
|
+
// Determine what to analyze based on mode
|
|
40
|
+
const shouldAnalyzeDocs = mode === 'hybrid' || mode === 'docs-only';
|
|
41
|
+
const shouldAnalyzeGit = mode === 'hybrid' || mode === 'both' || mode === 'git-history';
|
|
42
|
+
const shouldAnalyzeFiles = mode === 'hybrid' || mode === 'both' || mode === 'local-files';
|
|
43
|
+
// 1. Analyze documentation files (NEW - hybrid and docs-only modes)
|
|
44
|
+
if (shouldAnalyzeDocs) {
|
|
45
|
+
console.error(' Scanning documentation files...');
|
|
46
|
+
const docsPatterns = await this.analyzeDocumentation(repoPath);
|
|
47
|
+
console.error(` Extracted ${docsPatterns.length} patterns from documentation`);
|
|
48
|
+
allPatterns.push(...docsPatterns);
|
|
49
|
+
}
|
|
50
|
+
// 2. Analyze git history (if available and requested)
|
|
51
|
+
if (shouldAnalyzeGit) {
|
|
52
|
+
const hasGit = await this.hasGitRepo(repoPath);
|
|
53
|
+
if (hasGit) {
|
|
54
|
+
console.error(` Git repo detected - analyzing commits (${commitLimit} max, ${daysBack} days)`);
|
|
55
|
+
const commits = await this.analyzeGitHistory(repoPath, commitLimit, daysBack, skipPatterns);
|
|
56
|
+
console.error(` Found ${commits.length} relevant commits`);
|
|
57
|
+
const gitPatterns = await this.extractPatternsFromCommits(commits, repoPath);
|
|
58
|
+
console.error(` Extracted ${gitPatterns.length} patterns from git history`);
|
|
59
|
+
allPatterns.push(...gitPatterns);
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
console.error(' No git repo - skipping commit analysis');
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// 3. Analyze local source files (if requested)
|
|
66
|
+
if (shouldAnalyzeFiles) {
|
|
67
|
+
console.error(` Analyzing local source files (max: ${maxFiles === -1 ? 'unlimited' : maxFiles})...`);
|
|
68
|
+
console.error(` Using GitHub Linguist for automatic language detection...`);
|
|
69
|
+
const sourcePatterns = await this.analyzeSourceFilesWithLinguist(repoPath, maxFiles);
|
|
70
|
+
console.error(` Extracted ${sourcePatterns.length} patterns from source files`);
|
|
71
|
+
allPatterns.push(...sourcePatterns);
|
|
72
|
+
}
|
|
73
|
+
// 4. Build structured playbook from combined sources
|
|
74
|
+
console.error(` Total patterns discovered: ${allPatterns.length}`);
|
|
75
|
+
const playbook = this.buildInitialPlaybook(allPatterns);
|
|
76
|
+
console.error('✅ Offline initialization complete');
|
|
77
|
+
return playbook;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Check if directory has git repository
|
|
81
|
+
*/
|
|
82
|
+
async hasGitRepo(repoPath) {
|
|
83
|
+
try {
|
|
84
|
+
await execAsync(`git -C "${repoPath}" rev-parse --git-dir`, {
|
|
85
|
+
timeout: 5000
|
|
86
|
+
});
|
|
87
|
+
return true;
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
return false;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Analyze git history to extract meaningful commits
|
|
95
|
+
*/
|
|
96
|
+
async analyzeGitHistory(repoPath, limit, daysBack, skipPatterns) {
|
|
97
|
+
const sinceDate = new Date();
|
|
98
|
+
sinceDate.setDate(sinceDate.getDate() - daysBack);
|
|
99
|
+
const since = sinceDate.toISOString().split('T')[0];
|
|
100
|
+
try {
|
|
101
|
+
// Get commit log with stats
|
|
102
|
+
const { stdout } = await execAsync(`git -C "${repoPath}" log --since="${since}" --pretty=format:"%H|%s|%an|%ai" --numstat -n ${limit}`, { maxBuffer: 10 * 1024 * 1024 } // 10MB buffer
|
|
103
|
+
);
|
|
104
|
+
const lines = stdout.split('\n');
|
|
105
|
+
const commits = [];
|
|
106
|
+
let currentCommit = null;
|
|
107
|
+
for (const line of lines) {
|
|
108
|
+
if (!line.trim()) {
|
|
109
|
+
if (currentCommit && currentCommit.hash) {
|
|
110
|
+
commits.push(currentCommit);
|
|
111
|
+
}
|
|
112
|
+
currentCommit = null;
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
// Commit header: hash|message|author|date
|
|
116
|
+
if (line.includes('|')) {
|
|
117
|
+
const [hash, message, author, date] = line.split('|');
|
|
118
|
+
// Skip commits with skip patterns
|
|
119
|
+
if (skipPatterns.some(p => message.toLowerCase().includes(p))) {
|
|
120
|
+
currentCommit = null;
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
currentCommit = {
|
|
124
|
+
hash,
|
|
125
|
+
message,
|
|
126
|
+
author,
|
|
127
|
+
date,
|
|
128
|
+
files: [],
|
|
129
|
+
additions: 0,
|
|
130
|
+
deletions: 0
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
// File stat: additions deletions filename
|
|
134
|
+
else if (currentCommit) {
|
|
135
|
+
const [additions, deletions, filename] = line.split('\t');
|
|
136
|
+
if (filename) {
|
|
137
|
+
currentCommit.files.push(filename);
|
|
138
|
+
currentCommit.additions += parseInt(additions) || 0;
|
|
139
|
+
currentCommit.deletions += parseInt(deletions) || 0;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
return commits;
|
|
144
|
+
}
|
|
145
|
+
catch (error) {
|
|
146
|
+
console.error('⚠️ Git analysis failed (not a git repo?), using empty history');
|
|
147
|
+
return [];
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Analyze documentation files for best practices, troubleshooting, and architectural guidance
|
|
152
|
+
*
|
|
153
|
+
* Scans: CLAUDE.md, README.md, ARCHITECTURE.md, docs/ directory, and root markdown files
|
|
154
|
+
*/
|
|
155
|
+
async analyzeDocumentation(repoPath) {
|
|
156
|
+
const fs = await import('fs/promises');
|
|
157
|
+
const path = await import('path');
|
|
158
|
+
const patterns = [];
|
|
159
|
+
const docFiles = [
|
|
160
|
+
'CLAUDE.md',
|
|
161
|
+
'README.md',
|
|
162
|
+
'ARCHITECTURE.md',
|
|
163
|
+
'CONTRIBUTING.md',
|
|
164
|
+
'DEVELOPMENT.md',
|
|
165
|
+
'TROUBLESHOOTING.md'
|
|
166
|
+
];
|
|
167
|
+
// Scan root-level docs
|
|
168
|
+
for (const docFile of docFiles) {
|
|
169
|
+
const docPath = path.join(repoPath, docFile);
|
|
170
|
+
try {
|
|
171
|
+
const content = await fs.readFile(docPath, 'utf-8');
|
|
172
|
+
const extracted = await this.extractPatternsFromMarkdown(content, docFile);
|
|
173
|
+
patterns.push(...extracted);
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
// File doesn't exist, skip
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
// Scan docs/ directory
|
|
180
|
+
const docsDir = path.join(repoPath, 'docs');
|
|
181
|
+
try {
|
|
182
|
+
const docsDirFiles = await this.findMarkdownFiles(docsDir);
|
|
183
|
+
for (const file of docsDirFiles.slice(0, 20)) { // Max 20 docs files
|
|
184
|
+
try {
|
|
185
|
+
const content = await fs.readFile(file, 'utf-8');
|
|
186
|
+
const relativePath = path.relative(repoPath, file);
|
|
187
|
+
const extracted = await this.extractPatternsFromMarkdown(content, relativePath);
|
|
188
|
+
patterns.push(...extracted);
|
|
189
|
+
}
|
|
190
|
+
catch {
|
|
191
|
+
// Skip files that can't be read
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
catch {
|
|
196
|
+
// docs/ directory doesn't exist
|
|
197
|
+
}
|
|
198
|
+
return patterns;
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Find markdown files in a directory
|
|
202
|
+
*/
|
|
203
|
+
async findMarkdownFiles(dir) {
|
|
204
|
+
const fs = await import('fs/promises');
|
|
205
|
+
const path = await import('path');
|
|
206
|
+
const files = [];
|
|
207
|
+
async function scan(currentDir, depth = 0) {
|
|
208
|
+
if (depth > 3)
|
|
209
|
+
return; // Max depth 3
|
|
210
|
+
try {
|
|
211
|
+
const entries = await fs.readdir(currentDir, { withFileTypes: true });
|
|
212
|
+
for (const entry of entries) {
|
|
213
|
+
if (entry.name.startsWith('.'))
|
|
214
|
+
continue;
|
|
215
|
+
const fullPath = path.join(currentDir, entry.name);
|
|
216
|
+
if (entry.isDirectory()) {
|
|
217
|
+
await scan(fullPath, depth + 1);
|
|
218
|
+
}
|
|
219
|
+
else if (entry.isFile() && entry.name.endsWith('.md')) {
|
|
220
|
+
files.push(fullPath);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
catch {
|
|
225
|
+
// Skip directories we can't read
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
await scan(dir);
|
|
229
|
+
return files;
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Extract patterns from markdown documentation
|
|
233
|
+
*/
|
|
234
|
+
async extractPatternsFromMarkdown(content, filename) {
|
|
235
|
+
const patterns = [];
|
|
236
|
+
// First, extract complete code blocks from markdown
|
|
237
|
+
const { extractCodeBlocksFromMarkdown } = await import('../utils/code-extractor.js');
|
|
238
|
+
const codeBlocks = extractCodeBlocksFromMarkdown(content);
|
|
239
|
+
for (const codeBlock of codeBlocks) {
|
|
240
|
+
patterns.push({
|
|
241
|
+
section: 'useful_code_snippets',
|
|
242
|
+
content: codeBlock, // Complete code block, not snippet
|
|
243
|
+
confidence: 0.9,
|
|
244
|
+
evidence: [filename]
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
// Then extract headers and their content for other patterns
|
|
248
|
+
const lines = content.split('\n');
|
|
249
|
+
let currentSection = '';
|
|
250
|
+
let currentContent = [];
|
|
251
|
+
for (let i = 0; i < lines.length; i++) {
|
|
252
|
+
const line = lines[i];
|
|
253
|
+
// Detect headers
|
|
254
|
+
if (line.startsWith('#')) {
|
|
255
|
+
// Process previous section
|
|
256
|
+
if (currentSection && currentContent.length > 0) {
|
|
257
|
+
const sectionText = currentContent.join('\n').trim(); // Join with newlines, not spaces
|
|
258
|
+
const pattern = this.classifyDocSection(currentSection, sectionText, filename);
|
|
259
|
+
if (pattern)
|
|
260
|
+
patterns.push(pattern);
|
|
261
|
+
}
|
|
262
|
+
currentSection = line.replace(/^#+\s*/, '').trim();
|
|
263
|
+
currentContent = [];
|
|
264
|
+
}
|
|
265
|
+
else if (line.trim()) {
|
|
266
|
+
currentContent.push(line); // Keep original line, not trimmed
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
// Process last section
|
|
270
|
+
if (currentSection && currentContent.length > 0) {
|
|
271
|
+
const sectionText = currentContent.join('\n').trim();
|
|
272
|
+
const pattern = this.classifyDocSection(currentSection, sectionText, filename);
|
|
273
|
+
if (pattern)
|
|
274
|
+
patterns.push(pattern);
|
|
275
|
+
}
|
|
276
|
+
return patterns;
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Classify documentation section into playbook category
|
|
280
|
+
*/
|
|
281
|
+
classifyDocSection(header, content, filename) {
|
|
282
|
+
const headerLower = header.toLowerCase();
|
|
283
|
+
// Increase from 200 to 2000 chars to capture more context
|
|
284
|
+
const contentSnippet = content.substring(0, 2000);
|
|
285
|
+
// Skip very short sections
|
|
286
|
+
if (content.length < 50)
|
|
287
|
+
return null;
|
|
288
|
+
// STRATEGIES: Best practices, coding standards, architecture
|
|
289
|
+
if (/best practice|coding standard|architecture|pattern|principle|rule|guideline/i.test(headerLower)) {
|
|
290
|
+
return {
|
|
291
|
+
section: 'strategies_and_hard_rules',
|
|
292
|
+
content: `${header}: ${contentSnippet}`,
|
|
293
|
+
confidence: 0.85,
|
|
294
|
+
evidence: [filename]
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
// TROUBLESHOOTING: Known issues, gotchas, common problems
|
|
298
|
+
if (/troubleshoot|known issue|common problem|gotcha|pitfall|warning|caveat/i.test(headerLower)) {
|
|
299
|
+
return {
|
|
300
|
+
section: 'troubleshooting_and_pitfalls',
|
|
301
|
+
content: `${header}: ${contentSnippet}`,
|
|
302
|
+
confidence: 0.9,
|
|
303
|
+
evidence: [filename]
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
// APIS: Integration guides, library usage, API documentation
|
|
307
|
+
if (/api|integration|library|framework|dependency|tool|service/i.test(headerLower)) {
|
|
308
|
+
return {
|
|
309
|
+
section: 'apis_to_use',
|
|
310
|
+
content: `${header}: ${contentSnippet}`,
|
|
311
|
+
confidence: 0.8,
|
|
312
|
+
evidence: [filename]
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
// CODE SNIPPETS: Example code, how-to sections
|
|
316
|
+
if (/example|snippet|code|usage|how to|quick start/i.test(headerLower) &&
|
|
317
|
+
content.includes('```')) {
|
|
318
|
+
return {
|
|
319
|
+
section: 'useful_code_snippets',
|
|
320
|
+
content: `${header}: ${contentSnippet}`,
|
|
321
|
+
confidence: 0.75,
|
|
322
|
+
evidence: [filename]
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
return null;
|
|
326
|
+
}
|
|
327
|
+
/**
|
|
328
|
+
* Analyze local source files using GitHub Linguist (NEW - replaces hardcoded extensions!)
|
|
329
|
+
*
|
|
330
|
+
* Automatically detects ALL programming languages using Linguist
|
|
331
|
+
* Supports: TypeScript, JavaScript, Python, Java, Go, Rust, Ruby, PHP, C#, Kotlin, Swift, and 100+ more!
|
|
332
|
+
*/
|
|
333
|
+
async analyzeSourceFilesWithLinguist(repoPath, maxFiles = 5000) {
|
|
334
|
+
const fs = await import('fs/promises');
|
|
335
|
+
const path = await import('path');
|
|
336
|
+
const discoveredPatterns = [];
|
|
337
|
+
try {
|
|
338
|
+
// Use Linguist to detect all programming files
|
|
339
|
+
console.error(' 🔍 Detecting languages with GitHub Linguist...');
|
|
340
|
+
const programmingFiles = await this.languageDetector.getProgrammingFiles(repoPath, maxFiles);
|
|
341
|
+
const languageBreakdown = await this.languageDetector.getLanguageBreakdown(repoPath);
|
|
342
|
+
console.error(` 📊 Detected ${Object.keys(languageBreakdown).length} languages: ${Object.keys(languageBreakdown).join(', ')}`);
|
|
343
|
+
console.error(` 📁 Found ${programmingFiles.length} source files`);
|
|
344
|
+
// Add language breakdown as a pattern
|
|
345
|
+
for (const [lang, percentage] of Object.entries(languageBreakdown)) {
|
|
346
|
+
if (percentage > 5) { // Only include if >5% of codebase
|
|
347
|
+
discoveredPatterns.push({
|
|
348
|
+
section: 'strategies_and_hard_rules',
|
|
349
|
+
content: `Primary language: ${lang} (${percentage.toFixed(1)}% of codebase)`,
|
|
350
|
+
confidence: 0.95,
|
|
351
|
+
evidence: ['linguist-analysis']
|
|
352
|
+
});
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
// Analyze dependency files (package.json, requirements.txt, etc.)
|
|
356
|
+
const depPatterns = await this.analyzeDependencyFiles(repoPath);
|
|
357
|
+
discoveredPatterns.push(...depPatterns);
|
|
358
|
+
// Sample source files for deeper analysis
|
|
359
|
+
const sampleFiles = programmingFiles.slice(0, Math.min(50, programmingFiles.length));
|
|
360
|
+
for (const filePath of sampleFiles) {
|
|
361
|
+
try {
|
|
362
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
363
|
+
const relativePath = path.relative(repoPath, filePath);
|
|
364
|
+
// Extract complete function bodies instead of summaries
|
|
365
|
+
const { extractFunctionBodies, isInteresting } = await import('../utils/code-extractor.js');
|
|
366
|
+
const functions = extractFunctionBodies(content, relativePath);
|
|
367
|
+
for (const func of functions) {
|
|
368
|
+
// Only include interesting code (10+ lines with async/error handling/API calls)
|
|
369
|
+
if (func.lines.length >= 10 && isInteresting(func.code)) {
|
|
370
|
+
discoveredPatterns.push({
|
|
371
|
+
section: 'useful_code_snippets',
|
|
372
|
+
content: func.code, // Complete code with imports
|
|
373
|
+
confidence: 0.9,
|
|
374
|
+
evidence: [relativePath]
|
|
375
|
+
});
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
// Also extract smaller interesting blocks (5-9 lines) for quick patterns
|
|
379
|
+
for (const func of functions) {
|
|
380
|
+
if (func.lines.length >= 5 && func.lines.length < 10 && isInteresting(func.code)) {
|
|
381
|
+
discoveredPatterns.push({
|
|
382
|
+
section: 'useful_code_snippets',
|
|
383
|
+
content: func.code,
|
|
384
|
+
confidence: 0.75,
|
|
385
|
+
evidence: [relativePath]
|
|
386
|
+
});
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
catch {
|
|
391
|
+
// Skip files that can't be read
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
catch (error) {
|
|
396
|
+
console.error('⚠️ Linguist analysis failed, falling back to basic detection:', error);
|
|
397
|
+
// Fallback to old method if Linguist fails
|
|
398
|
+
return this.analyzeSourceFiles(repoPath, ['*.ts', '*.js', '*.py'], maxFiles);
|
|
399
|
+
}
|
|
400
|
+
return discoveredPatterns;
|
|
401
|
+
}
|
|
402
|
+
/**
|
|
403
|
+
* Analyze dependency files (package.json, requirements.txt, Cargo.toml, etc.)
|
|
404
|
+
*/
|
|
405
|
+
async analyzeDependencyFiles(repoPath) {
|
|
406
|
+
const fs = await import('fs/promises');
|
|
407
|
+
const path = await import('path');
|
|
408
|
+
const patterns = [];
|
|
409
|
+
// TypeScript/JavaScript: package.json
|
|
410
|
+
try {
|
|
411
|
+
const packageJsonPath = path.join(repoPath, 'package.json');
|
|
412
|
+
const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
|
|
413
|
+
const allDeps = {
|
|
414
|
+
...packageJson.dependencies,
|
|
415
|
+
...packageJson.devDependencies
|
|
416
|
+
};
|
|
417
|
+
const topDeps = Object.keys(allDeps).slice(0, 10);
|
|
418
|
+
for (const dep of topDeps) {
|
|
419
|
+
patterns.push({
|
|
420
|
+
section: 'apis_to_use',
|
|
421
|
+
content: `Project uses ${dep} (${allDeps[dep]})`,
|
|
422
|
+
confidence: 0.9,
|
|
423
|
+
evidence: ['package.json']
|
|
424
|
+
});
|
|
425
|
+
}
|
|
426
|
+
// Framework detection
|
|
427
|
+
if (allDeps['react']) {
|
|
428
|
+
patterns.push({
|
|
429
|
+
section: 'strategies_and_hard_rules',
|
|
430
|
+
content: 'React framework - use functional components with hooks',
|
|
431
|
+
confidence: 0.85,
|
|
432
|
+
evidence: ['package.json']
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
if (allDeps['express'] || allDeps['fastify']) {
|
|
436
|
+
patterns.push({
|
|
437
|
+
section: 'strategies_and_hard_rules',
|
|
438
|
+
content: 'Node.js backend - use async/await for all routes',
|
|
439
|
+
confidence: 0.85,
|
|
440
|
+
evidence: ['package.json']
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
catch {
|
|
445
|
+
// No package.json
|
|
446
|
+
}
|
|
447
|
+
// Python: requirements.txt
|
|
448
|
+
try {
|
|
449
|
+
const requirementsPath = path.join(repoPath, 'requirements.txt');
|
|
450
|
+
const requirements = await fs.readFile(requirementsPath, 'utf-8');
|
|
451
|
+
const deps = requirements.split('\n').filter(line => line.trim() && !line.startsWith('#'));
|
|
452
|
+
for (const dep of deps.slice(0, 10)) {
|
|
453
|
+
const pkgName = dep.split('==')[0].split('>=')[0].trim();
|
|
454
|
+
patterns.push({
|
|
455
|
+
section: 'apis_to_use',
|
|
456
|
+
content: `Python project uses ${pkgName}`,
|
|
457
|
+
confidence: 0.9,
|
|
458
|
+
evidence: ['requirements.txt']
|
|
459
|
+
});
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
catch {
|
|
463
|
+
// No requirements.txt
|
|
464
|
+
}
|
|
465
|
+
// Rust: Cargo.toml
|
|
466
|
+
try {
|
|
467
|
+
const cargoPath = path.join(repoPath, 'Cargo.toml');
|
|
468
|
+
const cargo = await fs.readFile(cargoPath, 'utf-8');
|
|
469
|
+
if (cargo) {
|
|
470
|
+
patterns.push({
|
|
471
|
+
section: 'strategies_and_hard_rules',
|
|
472
|
+
content: 'Rust project - follows ownership and borrowing principles',
|
|
473
|
+
confidence: 0.9,
|
|
474
|
+
evidence: ['Cargo.toml']
|
|
475
|
+
});
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
catch {
|
|
479
|
+
// No Cargo.toml
|
|
480
|
+
}
|
|
481
|
+
// Go: go.mod
|
|
482
|
+
try {
|
|
483
|
+
const goModPath = path.join(repoPath, 'go.mod');
|
|
484
|
+
const goMod = await fs.readFile(goModPath, 'utf-8');
|
|
485
|
+
if (goMod) {
|
|
486
|
+
patterns.push({
|
|
487
|
+
section: 'strategies_and_hard_rules',
|
|
488
|
+
content: 'Go project - use goroutines for concurrency, defer for cleanup',
|
|
489
|
+
confidence: 0.9,
|
|
490
|
+
evidence: ['go.mod']
|
|
491
|
+
});
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
catch {
|
|
495
|
+
// No go.mod
|
|
496
|
+
}
|
|
497
|
+
return patterns;
|
|
498
|
+
}
|
|
499
|
+
/**
|
|
500
|
+
* OLD METHOD: Analyze local source files for patterns (FALLBACK ONLY)
|
|
501
|
+
*
|
|
502
|
+
* @deprecated Use analyzeSourceFilesWithLinguist instead
|
|
503
|
+
*/
|
|
504
|
+
async analyzeSourceFiles(repoPath, patterns, maxFiles = 5000) {
|
|
505
|
+
const fs = await import('fs/promises');
|
|
506
|
+
const path = await import('path');
|
|
507
|
+
const discoveredPatterns = [];
|
|
508
|
+
try {
|
|
509
|
+
// Read package.json for dependencies (TypeScript/JavaScript)
|
|
510
|
+
const packageJsonPath = path.join(repoPath, 'package.json');
|
|
511
|
+
try {
|
|
512
|
+
const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
|
|
513
|
+
const allDeps = {
|
|
514
|
+
...packageJson.dependencies,
|
|
515
|
+
...packageJson.devDependencies
|
|
516
|
+
};
|
|
517
|
+
// Top dependencies used in project
|
|
518
|
+
const topDeps = Object.keys(allDeps).slice(0, 10);
|
|
519
|
+
for (const dep of topDeps) {
|
|
520
|
+
discoveredPatterns.push({
|
|
521
|
+
section: 'apis_to_use',
|
|
522
|
+
content: `Project uses ${dep} (${allDeps[dep]})`,
|
|
523
|
+
confidence: 0.9,
|
|
524
|
+
evidence: ['package.json']
|
|
525
|
+
});
|
|
526
|
+
}
|
|
527
|
+
// Framework detection
|
|
528
|
+
if (allDeps['react']) {
|
|
529
|
+
discoveredPatterns.push({
|
|
530
|
+
section: 'strategies_and_hard_rules',
|
|
531
|
+
content: 'React framework - use functional components with hooks',
|
|
532
|
+
confidence: 0.85,
|
|
533
|
+
evidence: ['package.json']
|
|
534
|
+
});
|
|
535
|
+
}
|
|
536
|
+
if (allDeps['express'] || allDeps['fastify']) {
|
|
537
|
+
discoveredPatterns.push({
|
|
538
|
+
section: 'strategies_and_hard_rules',
|
|
539
|
+
content: 'Node.js backend - use async/await for all routes',
|
|
540
|
+
confidence: 0.85,
|
|
541
|
+
evidence: ['package.json']
|
|
542
|
+
});
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
catch {
|
|
546
|
+
// No package.json or parse error
|
|
547
|
+
}
|
|
548
|
+
// Read requirements.txt for dependencies (Python)
|
|
549
|
+
const requirementsPath = path.join(repoPath, 'requirements.txt');
|
|
550
|
+
try {
|
|
551
|
+
const requirements = await fs.readFile(requirementsPath, 'utf-8');
|
|
552
|
+
const deps = requirements.split('\n').filter(line => line.trim() && !line.startsWith('#'));
|
|
553
|
+
for (const dep of deps.slice(0, 10)) {
|
|
554
|
+
const pkgName = dep.split('==')[0].split('>=')[0].trim();
|
|
555
|
+
discoveredPatterns.push({
|
|
556
|
+
section: 'apis_to_use',
|
|
557
|
+
content: `Python project uses ${pkgName}`,
|
|
558
|
+
confidence: 0.9,
|
|
559
|
+
evidence: ['requirements.txt']
|
|
560
|
+
});
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
catch {
|
|
564
|
+
// No requirements.txt
|
|
565
|
+
}
|
|
566
|
+
// Scan for common patterns in source files
|
|
567
|
+
const sourceFiles = await this.findSourceFiles(repoPath, patterns, maxFiles);
|
|
568
|
+
const sampleFiles = sourceFiles.slice(0, Math.min(50, sourceFiles.length)); // Sample up to 50 files
|
|
569
|
+
for (const filePath of sampleFiles) {
|
|
570
|
+
try {
|
|
571
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
572
|
+
const relativePath = path.relative(repoPath, filePath);
|
|
573
|
+
// Detect import patterns
|
|
574
|
+
const imports = this.extractImports(content, filePath);
|
|
575
|
+
for (const imp of imports.slice(0, 5)) {
|
|
576
|
+
discoveredPatterns.push({
|
|
577
|
+
section: 'useful_code_snippets',
|
|
578
|
+
content: `Common import: ${imp}`,
|
|
579
|
+
confidence: 0.7,
|
|
580
|
+
evidence: [relativePath]
|
|
581
|
+
});
|
|
582
|
+
}
|
|
583
|
+
// Detect API/endpoint patterns
|
|
584
|
+
if (content.includes('app.get(') || content.includes('app.post(')) {
|
|
585
|
+
discoveredPatterns.push({
|
|
586
|
+
section: 'apis_to_use',
|
|
587
|
+
content: `REST API endpoints defined in ${relativePath}`,
|
|
588
|
+
confidence: 0.8,
|
|
589
|
+
evidence: [relativePath]
|
|
590
|
+
});
|
|
591
|
+
}
|
|
592
|
+
// Detect database patterns
|
|
593
|
+
if (content.includes('prisma') || content.includes('mongoose') || content.includes('typeorm')) {
|
|
594
|
+
discoveredPatterns.push({
|
|
595
|
+
section: 'strategies_and_hard_rules',
|
|
596
|
+
content: 'Uses ORM for database access - define models before queries',
|
|
597
|
+
confidence: 0.75,
|
|
598
|
+
evidence: [relativePath]
|
|
599
|
+
});
|
|
600
|
+
}
|
|
601
|
+
// Detect async patterns
|
|
602
|
+
if (content.includes('async ') && content.includes('await ')) {
|
|
603
|
+
discoveredPatterns.push({
|
|
604
|
+
section: 'strategies_and_hard_rules',
|
|
605
|
+
content: 'Codebase uses async/await - ensure all async functions are awaited',
|
|
606
|
+
confidence: 0.8,
|
|
607
|
+
evidence: [relativePath]
|
|
608
|
+
});
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
catch {
|
|
612
|
+
// Skip files that can't be read
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
catch (error) {
|
|
617
|
+
console.error('⚠️ Source file analysis failed:', error);
|
|
618
|
+
}
|
|
619
|
+
return discoveredPatterns;
|
|
620
|
+
}
|
|
621
|
+
/**
|
|
622
|
+
* Find source files matching patterns
|
|
623
|
+
*/
|
|
624
|
+
async findSourceFiles(repoPath, _patterns, // Used for filter criteria, currently hardcoded in scanDir
|
|
625
|
+
maxFiles = 5000) {
|
|
626
|
+
const fs = await import('fs/promises');
|
|
627
|
+
const path = await import('path');
|
|
628
|
+
const files = [];
|
|
629
|
+
async function scanDir(dir, depth = 0) {
|
|
630
|
+
// Stop if we've hit the limit (unless unlimited)
|
|
631
|
+
if (maxFiles !== -1 && files.length >= maxFiles)
|
|
632
|
+
return true;
|
|
633
|
+
if (depth > 5)
|
|
634
|
+
return false; // Max depth
|
|
635
|
+
try {
|
|
636
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
637
|
+
for (const entry of entries) {
|
|
638
|
+
// Stop if we've hit the limit
|
|
639
|
+
if (maxFiles !== -1 && files.length >= maxFiles)
|
|
640
|
+
return true;
|
|
641
|
+
// Skip node_modules, .git, dist, build
|
|
642
|
+
if (['node_modules', '.git', 'dist', 'build', '.next', 'target', '__pycache__', 'venv'].includes(entry.name)) {
|
|
643
|
+
continue;
|
|
644
|
+
}
|
|
645
|
+
const fullPath = path.join(dir, entry.name);
|
|
646
|
+
if (entry.isDirectory()) {
|
|
647
|
+
const shouldStop = await scanDir(fullPath, depth + 1);
|
|
648
|
+
if (shouldStop)
|
|
649
|
+
return true;
|
|
650
|
+
}
|
|
651
|
+
else if (entry.isFile()) {
|
|
652
|
+
// Check if matches patterns
|
|
653
|
+
const ext = path.extname(entry.name);
|
|
654
|
+
if (['.ts', '.js', '.tsx', '.jsx', '.py', '.java', '.go', '.rs', '.rb', '.php'].includes(ext)) {
|
|
655
|
+
files.push(fullPath);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
catch {
|
|
661
|
+
// Skip directories we can't read
|
|
662
|
+
}
|
|
663
|
+
return false;
|
|
664
|
+
}
|
|
665
|
+
await scanDir(repoPath);
|
|
666
|
+
return files;
|
|
667
|
+
}
|
|
668
|
+
/**
|
|
669
|
+
* Extract import statements from source code
|
|
670
|
+
*/
|
|
671
|
+
extractImports(content, _filePath) {
|
|
672
|
+
const imports = [];
|
|
673
|
+
// TypeScript/JavaScript imports
|
|
674
|
+
const jsImportRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
675
|
+
let match;
|
|
676
|
+
while ((match = jsImportRegex.exec(content)) !== null) {
|
|
677
|
+
imports.push(match[1]);
|
|
678
|
+
}
|
|
679
|
+
// Python imports
|
|
680
|
+
const pyImportRegex = /^(?:from\s+(\S+)\s+)?import\s+(.+)$/gm;
|
|
681
|
+
while ((match = pyImportRegex.exec(content)) !== null) {
|
|
682
|
+
imports.push(match[1] || match[2].split(',')[0].trim());
|
|
683
|
+
}
|
|
684
|
+
return imports;
|
|
685
|
+
}
|
|
686
|
+
/**
|
|
687
|
+
* Extract patterns from commit analysis
|
|
688
|
+
*/
|
|
689
|
+
async extractPatternsFromCommits(commits, repoPath) {
|
|
690
|
+
const patterns = [];
|
|
691
|
+
const { extractAddedLinesFromDiff, isInteresting } = await import('../utils/code-extractor.js');
|
|
692
|
+
// 1. STRATEGIES from successful refactorings
|
|
693
|
+
const refactoringCommits = commits.filter(c => /refactor|improve|optimize|clean/i.test(c.message));
|
|
694
|
+
for (const commit of refactoringCommits.slice(0, 10)) {
|
|
695
|
+
try {
|
|
696
|
+
// Get actual diff for this commit
|
|
697
|
+
const { stdout: diff } = await execAsync(`cd "${repoPath}" && git show ${commit.hash}`, { maxBuffer: 1024 * 1024 * 10 });
|
|
698
|
+
const addedCode = extractAddedLinesFromDiff(diff);
|
|
699
|
+
// Only include if substantial code was added (10+ lines) and it's interesting
|
|
700
|
+
if (addedCode.split('\n').length >= 10 && isInteresting(addedCode)) {
|
|
701
|
+
patterns.push({
|
|
702
|
+
section: 'strategies_and_hard_rules',
|
|
703
|
+
content: `Refactoring pattern (${commit.message}):\n\n${addedCode.substring(0, 1500)}`,
|
|
704
|
+
confidence: 0.8,
|
|
705
|
+
evidence: [commit.hash, ...commit.files.slice(0, 3)]
|
|
706
|
+
});
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
catch {
|
|
710
|
+
// Fallback to commit message if git show fails
|
|
711
|
+
patterns.push({
|
|
712
|
+
section: 'strategies_and_hard_rules',
|
|
713
|
+
content: `Pattern from refactoring: ${commit.message}`,
|
|
714
|
+
confidence: 0.6,
|
|
715
|
+
evidence: [commit.hash]
|
|
716
|
+
});
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
// 2. TROUBLESHOOTING from bug fixes
|
|
720
|
+
const bugFixCommits = commits.filter(c => /fix|bug|error|crash|issue/i.test(c.message));
|
|
721
|
+
for (const commit of bugFixCommits.slice(0, 15)) {
|
|
722
|
+
try {
|
|
723
|
+
// Get actual fix code
|
|
724
|
+
const { stdout: diff } = await execAsync(`cd "${repoPath}" && git show ${commit.hash}`, { maxBuffer: 1024 * 1024 * 10 });
|
|
725
|
+
const fixedCode = extractAddedLinesFromDiff(diff);
|
|
726
|
+
if (fixedCode.split('\n').length >= 5 && isInteresting(fixedCode)) {
|
|
727
|
+
patterns.push({
|
|
728
|
+
section: 'troubleshooting_and_pitfalls',
|
|
729
|
+
content: `Bug fix (${commit.message}):\n\n${fixedCode.substring(0, 1500)}`,
|
|
730
|
+
confidence: 0.85,
|
|
731
|
+
evidence: [commit.hash, ...commit.files.slice(0, 3)]
|
|
732
|
+
});
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
catch {
|
|
736
|
+
// Fallback to commit message
|
|
737
|
+
patterns.push({
|
|
738
|
+
section: 'troubleshooting_and_pitfalls',
|
|
739
|
+
content: `Common issue: ${commit.message}`,
|
|
740
|
+
confidence: 0.7,
|
|
741
|
+
evidence: [commit.hash]
|
|
742
|
+
});
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
// 3. APIS from feature additions
|
|
746
|
+
const featureCommits = commits.filter(c => /add|implement|create|new/i.test(c.message) &&
|
|
747
|
+
!/(test|doc|comment)/i.test(c.message));
|
|
748
|
+
for (const commit of featureCommits.slice(0, 10)) {
|
|
749
|
+
if (commit.files.some(f => /api|service|client|interface/i.test(f))) {
|
|
750
|
+
try {
|
|
751
|
+
const { stdout: diff } = await execAsync(`cd "${repoPath}" && git show ${commit.hash}`, { maxBuffer: 1024 * 1024 * 10 });
|
|
752
|
+
const newCode = extractAddedLinesFromDiff(diff);
|
|
753
|
+
if (newCode.split('\n').length >= 10 && isInteresting(newCode)) {
|
|
754
|
+
patterns.push({
|
|
755
|
+
section: 'apis_to_use',
|
|
756
|
+
content: `API implementation (${commit.message}):\n\n${newCode.substring(0, 1500)}`,
|
|
757
|
+
confidence: 0.75,
|
|
758
|
+
evidence: [commit.hash, ...commit.files.slice(0, 3)]
|
|
759
|
+
});
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
catch {
|
|
763
|
+
// Skip if git show fails
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
// 4. FILE CHANGE PATTERNS - Files that change together
|
|
768
|
+
const fileCoOccurrence = this.analyzeFileCoOccurrence(commits);
|
|
769
|
+
for (const [fileSet, count] of fileCoOccurrence.slice(0, 5)) {
|
|
770
|
+
if (count >= 3) {
|
|
771
|
+
patterns.push({
|
|
772
|
+
section: 'strategies_and_hard_rules',
|
|
773
|
+
content: `Files that often change together: ${fileSet}`,
|
|
774
|
+
confidence: Math.min(0.9, count / 10),
|
|
775
|
+
evidence: [`Co-occurred ${count} times`]
|
|
776
|
+
});
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
// 5. COMMON ERROR PATTERNS
|
|
780
|
+
const errorPatterns = this.extractErrorPatterns(commits);
|
|
781
|
+
patterns.push(...errorPatterns);
|
|
782
|
+
return patterns;
|
|
783
|
+
}
|
|
784
|
+
/**
|
|
785
|
+
* Find files that frequently change together
|
|
786
|
+
*/
|
|
787
|
+
analyzeFileCoOccurrence(commits) {
|
|
788
|
+
const coOccurrence = new Map();
|
|
789
|
+
for (const commit of commits) {
|
|
790
|
+
if (commit.files.length >= 2 && commit.files.length <= 5) {
|
|
791
|
+
// Sort files to create consistent key
|
|
792
|
+
const fileSet = commit.files.sort().join(' + ');
|
|
793
|
+
coOccurrence.set(fileSet, (coOccurrence.get(fileSet) || 0) + 1);
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
return Array.from(coOccurrence.entries())
|
|
797
|
+
.sort((a, b) => b[1] - a[1]);
|
|
798
|
+
}
|
|
799
|
+
/**
|
|
800
|
+
* Extract error patterns from commit messages
|
|
801
|
+
*/
|
|
802
|
+
extractErrorPatterns(commits) {
|
|
803
|
+
const patterns = [];
|
|
804
|
+
const errorKeywords = [
|
|
805
|
+
'null pointer',
|
|
806
|
+
'undefined',
|
|
807
|
+
'not found',
|
|
808
|
+
'timeout',
|
|
809
|
+
'permission denied',
|
|
810
|
+
'connection refused',
|
|
811
|
+
'out of memory',
|
|
812
|
+
'race condition',
|
|
813
|
+
'deadlock'
|
|
814
|
+
];
|
|
815
|
+
for (const commit of commits) {
|
|
816
|
+
const messageLower = commit.message.toLowerCase();
|
|
817
|
+
for (const keyword of errorKeywords) {
|
|
818
|
+
if (messageLower.includes(keyword)) {
|
|
819
|
+
patterns.push({
|
|
820
|
+
section: 'troubleshooting_and_pitfalls',
|
|
821
|
+
content: `Watch out for ${keyword} errors: ${commit.message}`,
|
|
822
|
+
confidence: 0.75,
|
|
823
|
+
evidence: [commit.hash, ...commit.files.slice(0, 2)]
|
|
824
|
+
});
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
return patterns;
|
|
829
|
+
}
|
|
830
|
+
/**
|
|
831
|
+
* Build initial playbook from extracted patterns
|
|
832
|
+
*/
|
|
833
|
+
buildInitialPlaybook(patterns) {
|
|
834
|
+
const playbook = {
|
|
835
|
+
strategies_and_hard_rules: [],
|
|
836
|
+
useful_code_snippets: [],
|
|
837
|
+
troubleshooting_and_pitfalls: [],
|
|
838
|
+
apis_to_use: []
|
|
839
|
+
};
|
|
840
|
+
// Group patterns by section and deduplicate
|
|
841
|
+
const seenContent = new Set();
|
|
842
|
+
for (const pattern of patterns) {
|
|
843
|
+
// Skip duplicates
|
|
844
|
+
const contentKey = pattern.content.toLowerCase().substring(0, 50);
|
|
845
|
+
if (seenContent.has(contentKey))
|
|
846
|
+
continue;
|
|
847
|
+
seenContent.add(contentKey);
|
|
848
|
+
// Create bullet
|
|
849
|
+
const bullet = {
|
|
850
|
+
id: this.generateBulletId(),
|
|
851
|
+
section: pattern.section,
|
|
852
|
+
content: pattern.content,
|
|
853
|
+
helpful: 0, // Will be updated during online learning
|
|
854
|
+
harmful: 0,
|
|
855
|
+
confidence: pattern.confidence,
|
|
856
|
+
evidence: pattern.evidence,
|
|
857
|
+
observations: 0,
|
|
858
|
+
created_at: new Date().toISOString(),
|
|
859
|
+
last_used: new Date().toISOString()
|
|
860
|
+
};
|
|
861
|
+
playbook[pattern.section].push(bullet);
|
|
862
|
+
}
|
|
863
|
+
return playbook;
|
|
864
|
+
}
|
|
865
|
+
/**
|
|
866
|
+
* Generate bullet ID: ctx-{timestamp}-{random}
|
|
867
|
+
*/
|
|
868
|
+
generateBulletId() {
|
|
869
|
+
const timestamp = Math.floor(Date.now() / 1000);
|
|
870
|
+
const random = Math.random().toString(36).substring(2, 7);
|
|
871
|
+
return `ctx-${timestamp}-${random}`;
|
|
872
|
+
}
|
|
873
|
+
}
|
|
874
|
+
//# sourceMappingURL=initialization.js.map
|