@timmeck/brain 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BRAIN_PLAN.md +3324 -3324
- package/LICENSE +21 -21
- package/README.md +194 -188
- package/dist/brain.js +2 -0
- package/dist/brain.js.map +1 -1
- package/dist/cli/colors.d.ts +50 -0
- package/dist/cli/colors.js +106 -0
- package/dist/cli/colors.js.map +1 -0
- package/dist/cli/commands/config.d.ts +2 -0
- package/dist/cli/commands/config.js +165 -0
- package/dist/cli/commands/config.js.map +1 -0
- package/dist/cli/commands/dashboard.js +222 -8
- package/dist/cli/commands/dashboard.js.map +1 -1
- package/dist/cli/commands/export.js +3 -0
- package/dist/cli/commands/export.js.map +1 -1
- package/dist/cli/commands/import.js +24 -15
- package/dist/cli/commands/import.js.map +1 -1
- package/dist/cli/commands/insights.js +33 -6
- package/dist/cli/commands/insights.js.map +1 -1
- package/dist/cli/commands/learn.d.ts +2 -0
- package/dist/cli/commands/learn.js +22 -0
- package/dist/cli/commands/learn.js.map +1 -0
- package/dist/cli/commands/modules.js +25 -6
- package/dist/cli/commands/modules.js.map +1 -1
- package/dist/cli/commands/network.js +15 -9
- package/dist/cli/commands/network.js.map +1 -1
- package/dist/cli/commands/query.js +92 -25
- package/dist/cli/commands/query.js.map +1 -1
- package/dist/cli/commands/start.js +8 -5
- package/dist/cli/commands/start.js.map +1 -1
- package/dist/cli/commands/status.js +21 -16
- package/dist/cli/commands/status.js.map +1 -1
- package/dist/cli/commands/stop.js +5 -4
- package/dist/cli/commands/stop.js.map +1 -1
- package/dist/cli/ipc-helper.js +4 -3
- package/dist/cli/ipc-helper.js.map +1 -1
- package/dist/cli/update-check.d.ts +2 -0
- package/dist/cli/update-check.js +58 -0
- package/dist/cli/update-check.js.map +1 -0
- package/dist/db/migrations/001_core_schema.js +115 -115
- package/dist/db/migrations/002_learning_schema.js +33 -33
- package/dist/db/migrations/003_code_schema.js +48 -48
- package/dist/db/migrations/004_synapses_schema.js +52 -52
- package/dist/db/migrations/005_fts_indexes.js +73 -73
- package/dist/db/migrations/index.js +6 -6
- package/dist/db/repositories/antipattern.repository.js +3 -3
- package/dist/db/repositories/code-module.repository.d.ts +1 -0
- package/dist/db/repositories/code-module.repository.js +8 -0
- package/dist/db/repositories/code-module.repository.js.map +1 -1
- package/dist/db/repositories/error.repository.js +46 -46
- package/dist/db/repositories/insight.repository.js +3 -3
- package/dist/db/repositories/notification.repository.js +3 -3
- package/dist/db/repositories/project.repository.js +21 -21
- package/dist/db/repositories/rule.repository.js +24 -24
- package/dist/db/repositories/solution.repository.js +50 -50
- package/dist/db/repositories/synapse.repository.js +18 -18
- package/dist/db/repositories/terminal.repository.js +24 -24
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/ipc/router.d.ts +2 -0
- package/dist/ipc/router.js +7 -1
- package/dist/ipc/router.js.map +1 -1
- package/dist/services/code.service.d.ts +1 -1
- package/dist/services/code.service.js +5 -2
- package/dist/services/code.service.js.map +1 -1
- package/package.json +5 -4
- package/src/brain.ts +3 -0
- package/src/cli/colors.ts +116 -0
- package/src/cli/commands/config.ts +169 -0
- package/src/cli/commands/dashboard.ts +231 -8
- package/src/cli/commands/export.ts +4 -0
- package/src/cli/commands/import.ts +24 -15
- package/src/cli/commands/insights.ts +37 -5
- package/src/cli/commands/learn.ts +24 -0
- package/src/cli/commands/modules.ts +28 -5
- package/src/cli/commands/network.ts +15 -9
- package/src/cli/commands/query.ts +103 -26
- package/src/cli/commands/start.ts +8 -5
- package/src/cli/commands/status.ts +22 -16
- package/src/cli/commands/stop.ts +5 -4
- package/src/cli/ipc-helper.ts +4 -3
- package/src/cli/update-check.ts +63 -0
- package/src/code/analyzer.ts +77 -77
- package/src/code/fingerprint.ts +87 -87
- package/src/code/matcher.ts +64 -64
- package/src/code/parsers/generic.ts +29 -29
- package/src/code/parsers/python.ts +54 -54
- package/src/code/parsers/typescript.ts +65 -65
- package/src/code/registry.ts +60 -60
- package/src/code/scorer.ts +108 -108
- package/src/config.ts +111 -111
- package/src/db/connection.ts +22 -22
- package/src/db/migrations/001_core_schema.ts +120 -120
- package/src/db/migrations/002_learning_schema.ts +38 -38
- package/src/db/migrations/003_code_schema.ts +53 -53
- package/src/db/migrations/004_synapses_schema.ts +57 -57
- package/src/db/migrations/005_fts_indexes.ts +78 -78
- package/src/db/migrations/006_synapses_phase3.ts +17 -17
- package/src/db/migrations/index.ts +64 -64
- package/src/db/repositories/antipattern.repository.ts +66 -66
- package/src/db/repositories/code-module.repository.ts +9 -0
- package/src/db/repositories/error.repository.ts +149 -149
- package/src/db/repositories/insight.repository.ts +78 -78
- package/src/db/repositories/notification.repository.ts +66 -66
- package/src/db/repositories/project.repository.ts +93 -93
- package/src/db/repositories/rule.repository.ts +108 -108
- package/src/db/repositories/solution.repository.ts +154 -154
- package/src/db/repositories/synapse.repository.ts +153 -153
- package/src/db/repositories/terminal.repository.ts +101 -101
- package/src/hooks/post-tool-use.ts +90 -90
- package/src/hooks/post-write.ts +117 -117
- package/src/index.ts +4 -0
- package/src/ipc/client.ts +118 -118
- package/src/ipc/protocol.ts +35 -35
- package/src/ipc/router.ts +9 -1
- package/src/ipc/server.ts +110 -110
- package/src/learning/confidence-scorer.ts +47 -47
- package/src/learning/decay.ts +46 -46
- package/src/learning/learning-engine.ts +162 -162
- package/src/learning/pattern-extractor.ts +90 -90
- package/src/learning/rule-generator.ts +74 -74
- package/src/matching/error-matcher.ts +115 -115
- package/src/matching/fingerprint.ts +29 -29
- package/src/matching/similarity.ts +61 -61
- package/src/matching/tfidf.ts +74 -74
- package/src/matching/tokenizer.ts +41 -41
- package/src/mcp/auto-detect.ts +93 -93
- package/src/mcp/server.ts +73 -73
- package/src/mcp/tools.ts +290 -290
- package/src/parsing/error-parser.ts +28 -28
- package/src/parsing/parsers/compiler.ts +93 -93
- package/src/parsing/parsers/generic.ts +28 -28
- package/src/parsing/parsers/go.ts +97 -97
- package/src/parsing/parsers/node.ts +69 -69
- package/src/parsing/parsers/python.ts +62 -62
- package/src/parsing/parsers/rust.ts +50 -50
- package/src/parsing/parsers/shell.ts +42 -42
- package/src/parsing/types.ts +47 -47
- package/src/research/gap-analyzer.ts +135 -135
- package/src/research/insight-generator.ts +123 -123
- package/src/research/research-engine.ts +116 -116
- package/src/research/synergy-detector.ts +126 -126
- package/src/research/template-extractor.ts +130 -130
- package/src/research/trend-analyzer.ts +127 -127
- package/src/services/analytics.service.ts +87 -87
- package/src/services/code.service.ts +5 -2
- package/src/services/error.service.ts +164 -164
- package/src/services/notification.service.ts +41 -41
- package/src/services/prevention.service.ts +119 -119
- package/src/services/research.service.ts +93 -93
- package/src/services/solution.service.ts +116 -116
- package/src/services/synapse.service.ts +59 -59
- package/src/services/terminal.service.ts +81 -81
- package/src/synapses/activation.ts +80 -80
- package/src/synapses/decay.ts +38 -38
- package/src/synapses/hebbian.ts +69 -69
- package/src/synapses/pathfinder.ts +81 -81
- package/src/synapses/synapse-manager.ts +109 -109
- package/src/types/code.types.ts +52 -52
- package/src/types/config.types.ts +79 -79
- package/src/types/error.types.ts +67 -67
- package/src/types/ipc.types.ts +8 -8
- package/src/types/mcp.types.ts +53 -53
- package/src/types/research.types.ts +28 -28
- package/src/types/solution.types.ts +30 -30
- package/src/types/synapse.types.ts +49 -49
- package/src/utils/events.ts +45 -45
- package/src/utils/hash.ts +5 -5
- package/src/utils/logger.ts +48 -48
- package/src/utils/paths.ts +19 -19
- package/tests/fixtures/code-modules/modules.ts +83 -83
- package/tests/fixtures/errors/go.ts +9 -9
- package/tests/fixtures/errors/node.ts +24 -24
- package/tests/fixtures/errors/python.ts +21 -21
- package/tests/fixtures/errors/rust.ts +25 -25
- package/tests/fixtures/errors/shell.ts +15 -15
- package/tests/fixtures/solutions/solutions.ts +27 -27
- package/tests/helpers/setup-db.ts +52 -52
- package/tests/integration/code-flow.test.ts +86 -86
- package/tests/integration/error-flow.test.ts +83 -83
- package/tests/integration/ipc-flow.test.ts +166 -166
- package/tests/integration/learning-cycle.test.ts +82 -82
- package/tests/integration/synapse-flow.test.ts +117 -117
- package/tests/unit/code/analyzer.test.ts +58 -58
- package/tests/unit/code/fingerprint.test.ts +51 -51
- package/tests/unit/code/scorer.test.ts +55 -55
- package/tests/unit/learning/confidence-scorer.test.ts +60 -60
- package/tests/unit/learning/decay.test.ts +45 -45
- package/tests/unit/learning/pattern-extractor.test.ts +50 -50
- package/tests/unit/matching/error-matcher.test.ts +69 -69
- package/tests/unit/matching/fingerprint.test.ts +47 -47
- package/tests/unit/matching/similarity.test.ts +65 -65
- package/tests/unit/matching/tfidf.test.ts +71 -71
- package/tests/unit/matching/tokenizer.test.ts +83 -83
- package/tests/unit/parsing/parsers.test.ts +113 -113
- package/tests/unit/research/gap-analyzer.test.ts +45 -45
- package/tests/unit/research/trend-analyzer.test.ts +45 -45
- package/tests/unit/synapses/activation.test.ts +80 -80
- package/tests/unit/synapses/decay.test.ts +27 -27
- package/tests/unit/synapses/hebbian.test.ts +96 -96
- package/tests/unit/synapses/pathfinder.test.ts +72 -72
- package/tsconfig.json +18 -18
package/src/code/analyzer.ts
CHANGED
|
@@ -1,77 +1,77 @@
|
|
|
1
|
-
import type { ExportInfo } from '../types/code.types.js';
|
|
2
|
-
import * as tsParser from './parsers/typescript.js';
|
|
3
|
-
import * as pyParser from './parsers/python.js';
|
|
4
|
-
import * as genericParser from './parsers/generic.js';
|
|
5
|
-
|
|
6
|
-
export interface AnalysisResult {
|
|
7
|
-
exports: ExportInfo[];
|
|
8
|
-
externalDeps: string[];
|
|
9
|
-
internalDeps: string[];
|
|
10
|
-
isPure: boolean;
|
|
11
|
-
hasTypeAnnotations: boolean;
|
|
12
|
-
linesOfCode: number;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
const SIDE_EFFECT_PATTERNS = [
|
|
16
|
-
'fs.', 'process.exit', 'process.env', 'console.', 'fetch(',
|
|
17
|
-
'XMLHttpRequest', 'document.', 'window.',
|
|
18
|
-
'global.', 'require(',
|
|
19
|
-
];
|
|
20
|
-
|
|
21
|
-
function getParser(language: string) {
|
|
22
|
-
switch (language) {
|
|
23
|
-
case 'typescript':
|
|
24
|
-
case 'javascript':
|
|
25
|
-
return tsParser;
|
|
26
|
-
case 'python':
|
|
27
|
-
return pyParser;
|
|
28
|
-
default:
|
|
29
|
-
return genericParser;
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
export function analyzeCode(source: string, language: string): AnalysisResult {
|
|
34
|
-
const parser = getParser(language);
|
|
35
|
-
const exports = parser.extractExports(source);
|
|
36
|
-
const { external, internal } = parser.extractImports(source);
|
|
37
|
-
const isPure = checkPurity(source);
|
|
38
|
-
const typed = parser.hasTypeAnnotations(source);
|
|
39
|
-
const linesOfCode = source.split('\n').filter(l => l.trim().length > 0).length;
|
|
40
|
-
|
|
41
|
-
return {
|
|
42
|
-
exports,
|
|
43
|
-
externalDeps: external,
|
|
44
|
-
internalDeps: internal,
|
|
45
|
-
isPure,
|
|
46
|
-
hasTypeAnnotations: typed,
|
|
47
|
-
linesOfCode,
|
|
48
|
-
};
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
export function checkPurity(source: string): boolean {
|
|
52
|
-
return !SIDE_EFFECT_PATTERNS.some(p => source.includes(p));
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
export function measureCohesion(exports: ExportInfo[]): number {
|
|
56
|
-
if (exports.length <= 1) return 1.0;
|
|
57
|
-
|
|
58
|
-
const names = exports.map(e =>
|
|
59
|
-
e.name
|
|
60
|
-
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
|
61
|
-
.replace(/([a-z\d])([A-Z])/g, '$1 $2')
|
|
62
|
-
.toLowerCase()
|
|
63
|
-
.split(/\s+/)
|
|
64
|
-
);
|
|
65
|
-
|
|
66
|
-
const vocab = new Set<string>();
|
|
67
|
-
names.forEach(tokens => tokens.forEach(t => vocab.add(t)));
|
|
68
|
-
|
|
69
|
-
let sharedTokens = 0;
|
|
70
|
-
for (const token of vocab) {
|
|
71
|
-
const count = names.filter(n => n.includes(token)).length;
|
|
72
|
-
if (count > 1) sharedTokens += count;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
const maxPossible = names.length * vocab.size;
|
|
76
|
-
return maxPossible === 0 ? 0 : sharedTokens / maxPossible;
|
|
77
|
-
}
|
|
1
|
+
import type { ExportInfo } from '../types/code.types.js';
|
|
2
|
+
import * as tsParser from './parsers/typescript.js';
|
|
3
|
+
import * as pyParser from './parsers/python.js';
|
|
4
|
+
import * as genericParser from './parsers/generic.js';
|
|
5
|
+
|
|
6
|
+
export interface AnalysisResult {
|
|
7
|
+
exports: ExportInfo[];
|
|
8
|
+
externalDeps: string[];
|
|
9
|
+
internalDeps: string[];
|
|
10
|
+
isPure: boolean;
|
|
11
|
+
hasTypeAnnotations: boolean;
|
|
12
|
+
linesOfCode: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const SIDE_EFFECT_PATTERNS = [
|
|
16
|
+
'fs.', 'process.exit', 'process.env', 'console.', 'fetch(',
|
|
17
|
+
'XMLHttpRequest', 'document.', 'window.',
|
|
18
|
+
'global.', 'require(',
|
|
19
|
+
];
|
|
20
|
+
|
|
21
|
+
function getParser(language: string) {
|
|
22
|
+
switch (language) {
|
|
23
|
+
case 'typescript':
|
|
24
|
+
case 'javascript':
|
|
25
|
+
return tsParser;
|
|
26
|
+
case 'python':
|
|
27
|
+
return pyParser;
|
|
28
|
+
default:
|
|
29
|
+
return genericParser;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function analyzeCode(source: string, language: string): AnalysisResult {
|
|
34
|
+
const parser = getParser(language);
|
|
35
|
+
const exports = parser.extractExports(source);
|
|
36
|
+
const { external, internal } = parser.extractImports(source);
|
|
37
|
+
const isPure = checkPurity(source);
|
|
38
|
+
const typed = parser.hasTypeAnnotations(source);
|
|
39
|
+
const linesOfCode = source.split('\n').filter(l => l.trim().length > 0).length;
|
|
40
|
+
|
|
41
|
+
return {
|
|
42
|
+
exports,
|
|
43
|
+
externalDeps: external,
|
|
44
|
+
internalDeps: internal,
|
|
45
|
+
isPure,
|
|
46
|
+
hasTypeAnnotations: typed,
|
|
47
|
+
linesOfCode,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function checkPurity(source: string): boolean {
|
|
52
|
+
return !SIDE_EFFECT_PATTERNS.some(p => source.includes(p));
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function measureCohesion(exports: ExportInfo[]): number {
|
|
56
|
+
if (exports.length <= 1) return 1.0;
|
|
57
|
+
|
|
58
|
+
const names = exports.map(e =>
|
|
59
|
+
e.name
|
|
60
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
|
61
|
+
.replace(/([a-z\d])([A-Z])/g, '$1 $2')
|
|
62
|
+
.toLowerCase()
|
|
63
|
+
.split(/\s+/)
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
const vocab = new Set<string>();
|
|
67
|
+
names.forEach(tokens => tokens.forEach(t => vocab.add(t)));
|
|
68
|
+
|
|
69
|
+
let sharedTokens = 0;
|
|
70
|
+
for (const token of vocab) {
|
|
71
|
+
const count = names.filter(n => n.includes(token)).length;
|
|
72
|
+
if (count > 1) sharedTokens += count;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const maxPossible = names.length * vocab.size;
|
|
76
|
+
return maxPossible === 0 ? 0 : sharedTokens / maxPossible;
|
|
77
|
+
}
|
package/src/code/fingerprint.ts
CHANGED
|
@@ -1,87 +1,87 @@
|
|
|
1
|
-
import { sha256 } from '../utils/hash.js';
|
|
2
|
-
|
|
3
|
-
export function fingerprintCode(source: string, language: string): string {
|
|
4
|
-
let normalized = stripComments(source, language);
|
|
5
|
-
normalized = normalized.replace(/\s+/g, ' ').trim();
|
|
6
|
-
normalized = normalizeIdentifiers(normalized, language);
|
|
7
|
-
normalized = normalized.replace(/'[^']*'/g, "'<STR>'");
|
|
8
|
-
normalized = normalized.replace(/"[^"]*"/g, '"<STR>"');
|
|
9
|
-
normalized = normalized.replace(/`[^`]*`/g, '`<STR>`');
|
|
10
|
-
normalized = normalized.replace(/\b\d+\b/g, '<NUM>');
|
|
11
|
-
return sha256(normalized);
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export function stripComments(source: string, language: string): string {
|
|
15
|
-
switch (language) {
|
|
16
|
-
case 'typescript':
|
|
17
|
-
case 'javascript':
|
|
18
|
-
case 'java':
|
|
19
|
-
case 'go':
|
|
20
|
-
case 'rust':
|
|
21
|
-
case 'c':
|
|
22
|
-
case 'cpp':
|
|
23
|
-
return source
|
|
24
|
-
.replace(/\/\/.*$/gm, '')
|
|
25
|
-
.replace(/\/\*[\s\S]*?\*\//g, '');
|
|
26
|
-
case 'python':
|
|
27
|
-
return source
|
|
28
|
-
.replace(/#.*$/gm, '')
|
|
29
|
-
.replace(/"""[\s\S]*?"""/g, '')
|
|
30
|
-
.replace(/'''[\s\S]*?'''/g, '');
|
|
31
|
-
default:
|
|
32
|
-
return source
|
|
33
|
-
.replace(/\/\/.*$/gm, '')
|
|
34
|
-
.replace(/#.*$/gm, '');
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
function normalizeIdentifiers(source: string, language: string): string {
|
|
39
|
-
const importNames = extractImportNames(source, language);
|
|
40
|
-
const keywords = getLanguageKeywords(language);
|
|
41
|
-
const preserve = new Set([...importNames, ...keywords]);
|
|
42
|
-
|
|
43
|
-
return source.replace(/\b[a-zA-Z_]\w*\b/g, (match) => {
|
|
44
|
-
if (preserve.has(match)) return match;
|
|
45
|
-
if (match[0] === match[0]!.toUpperCase() && match[0] !== match[0]!.toLowerCase()) return '<CLASS>';
|
|
46
|
-
return '<VAR>';
|
|
47
|
-
});
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
function extractImportNames(source: string, language: string): string[] {
|
|
51
|
-
const names: string[] = [];
|
|
52
|
-
|
|
53
|
-
if (language === 'typescript' || language === 'javascript') {
|
|
54
|
-
const re = /import\s+(?:\{([^}]+)\}|\*\s+as\s+(\w+)|(\w+))/g;
|
|
55
|
-
let m: RegExpExecArray | null;
|
|
56
|
-
while ((m = re.exec(source)) !== null) {
|
|
57
|
-
if (m[1]) names.push(...m[1].split(',').map(s => s.trim().split(/\s+as\s+/).pop()!));
|
|
58
|
-
if (m[2]) names.push(m[2]);
|
|
59
|
-
if (m[3]) names.push(m[3]);
|
|
60
|
-
}
|
|
61
|
-
} else if (language === 'python') {
|
|
62
|
-
const re = /(?:from\s+\S+\s+)?import\s+(.+)/g;
|
|
63
|
-
let m: RegExpExecArray | null;
|
|
64
|
-
while ((m = re.exec(source)) !== null) {
|
|
65
|
-
names.push(...m[1]!.split(',').map(s => {
|
|
66
|
-
const parts = s.trim().split(/\s+as\s+/);
|
|
67
|
-
return parts[parts.length - 1]!;
|
|
68
|
-
}));
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
return names.filter(Boolean);
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
function getLanguageKeywords(language: string): string[] {
|
|
76
|
-
const common = ['if', 'else', 'for', 'while', 'return', 'break', 'continue', 'switch', 'case', 'default', 'try', 'catch', 'throw', 'new', 'delete', 'true', 'false', 'null', 'undefined', 'void'];
|
|
77
|
-
|
|
78
|
-
const langKeywords: Record<string, string[]> = {
|
|
79
|
-
typescript: [...common, 'const', 'let', 'var', 'function', 'class', 'interface', 'type', 'enum', 'import', 'export', 'from', 'async', 'await', 'extends', 'implements', 'readonly', 'private', 'public', 'protected', 'static', 'abstract', 'as', 'is', 'in', 'of', 'typeof', 'keyof', 'infer', 'never', 'unknown', 'any', 'string', 'number', 'boolean', 'symbol', 'object'],
|
|
80
|
-
javascript: [...common, 'const', 'let', 'var', 'function', 'class', 'import', 'export', 'from', 'async', 'await', 'extends', 'typeof', 'instanceof', 'in', 'of', 'this', 'super', 'yield'],
|
|
81
|
-
python: ['def', 'class', 'import', 'from', 'if', 'elif', 'else', 'for', 'while', 'return', 'yield', 'break', 'continue', 'try', 'except', 'finally', 'raise', 'with', 'as', 'pass', 'lambda', 'True', 'False', 'None', 'and', 'or', 'not', 'in', 'is', 'global', 'nonlocal', 'assert', 'async', 'await', 'self'],
|
|
82
|
-
rust: [...common, 'fn', 'let', 'mut', 'pub', 'struct', 'enum', 'impl', 'trait', 'use', 'mod', 'crate', 'self', 'super', 'match', 'loop', 'move', 'ref', 'where', 'async', 'await', 'dyn', 'Box', 'Vec', 'String', 'Option', 'Result', 'Some', 'None', 'Ok', 'Err'],
|
|
83
|
-
go: [...common, 'func', 'package', 'import', 'type', 'struct', 'interface', 'map', 'chan', 'go', 'defer', 'select', 'range', 'var', 'const', 'nil', 'make', 'len', 'append', 'cap', 'copy', 'close'],
|
|
84
|
-
};
|
|
85
|
-
|
|
86
|
-
return langKeywords[language] ?? common;
|
|
87
|
-
}
|
|
1
|
+
import { sha256 } from '../utils/hash.js';
|
|
2
|
+
|
|
3
|
+
export function fingerprintCode(source: string, language: string): string {
|
|
4
|
+
let normalized = stripComments(source, language);
|
|
5
|
+
normalized = normalized.replace(/\s+/g, ' ').trim();
|
|
6
|
+
normalized = normalizeIdentifiers(normalized, language);
|
|
7
|
+
normalized = normalized.replace(/'[^']*'/g, "'<STR>'");
|
|
8
|
+
normalized = normalized.replace(/"[^"]*"/g, '"<STR>"');
|
|
9
|
+
normalized = normalized.replace(/`[^`]*`/g, '`<STR>`');
|
|
10
|
+
normalized = normalized.replace(/\b\d+\b/g, '<NUM>');
|
|
11
|
+
return sha256(normalized);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export function stripComments(source: string, language: string): string {
|
|
15
|
+
switch (language) {
|
|
16
|
+
case 'typescript':
|
|
17
|
+
case 'javascript':
|
|
18
|
+
case 'java':
|
|
19
|
+
case 'go':
|
|
20
|
+
case 'rust':
|
|
21
|
+
case 'c':
|
|
22
|
+
case 'cpp':
|
|
23
|
+
return source
|
|
24
|
+
.replace(/\/\/.*$/gm, '')
|
|
25
|
+
.replace(/\/\*[\s\S]*?\*\//g, '');
|
|
26
|
+
case 'python':
|
|
27
|
+
return source
|
|
28
|
+
.replace(/#.*$/gm, '')
|
|
29
|
+
.replace(/"""[\s\S]*?"""/g, '')
|
|
30
|
+
.replace(/'''[\s\S]*?'''/g, '');
|
|
31
|
+
default:
|
|
32
|
+
return source
|
|
33
|
+
.replace(/\/\/.*$/gm, '')
|
|
34
|
+
.replace(/#.*$/gm, '');
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function normalizeIdentifiers(source: string, language: string): string {
|
|
39
|
+
const importNames = extractImportNames(source, language);
|
|
40
|
+
const keywords = getLanguageKeywords(language);
|
|
41
|
+
const preserve = new Set([...importNames, ...keywords]);
|
|
42
|
+
|
|
43
|
+
return source.replace(/\b[a-zA-Z_]\w*\b/g, (match) => {
|
|
44
|
+
if (preserve.has(match)) return match;
|
|
45
|
+
if (match[0] === match[0]!.toUpperCase() && match[0] !== match[0]!.toLowerCase()) return '<CLASS>';
|
|
46
|
+
return '<VAR>';
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function extractImportNames(source: string, language: string): string[] {
|
|
51
|
+
const names: string[] = [];
|
|
52
|
+
|
|
53
|
+
if (language === 'typescript' || language === 'javascript') {
|
|
54
|
+
const re = /import\s+(?:\{([^}]+)\}|\*\s+as\s+(\w+)|(\w+))/g;
|
|
55
|
+
let m: RegExpExecArray | null;
|
|
56
|
+
while ((m = re.exec(source)) !== null) {
|
|
57
|
+
if (m[1]) names.push(...m[1].split(',').map(s => s.trim().split(/\s+as\s+/).pop()!));
|
|
58
|
+
if (m[2]) names.push(m[2]);
|
|
59
|
+
if (m[3]) names.push(m[3]);
|
|
60
|
+
}
|
|
61
|
+
} else if (language === 'python') {
|
|
62
|
+
const re = /(?:from\s+\S+\s+)?import\s+(.+)/g;
|
|
63
|
+
let m: RegExpExecArray | null;
|
|
64
|
+
while ((m = re.exec(source)) !== null) {
|
|
65
|
+
names.push(...m[1]!.split(',').map(s => {
|
|
66
|
+
const parts = s.trim().split(/\s+as\s+/);
|
|
67
|
+
return parts[parts.length - 1]!;
|
|
68
|
+
}));
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return names.filter(Boolean);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function getLanguageKeywords(language: string): string[] {
|
|
76
|
+
const common = ['if', 'else', 'for', 'while', 'return', 'break', 'continue', 'switch', 'case', 'default', 'try', 'catch', 'throw', 'new', 'delete', 'true', 'false', 'null', 'undefined', 'void'];
|
|
77
|
+
|
|
78
|
+
const langKeywords: Record<string, string[]> = {
|
|
79
|
+
typescript: [...common, 'const', 'let', 'var', 'function', 'class', 'interface', 'type', 'enum', 'import', 'export', 'from', 'async', 'await', 'extends', 'implements', 'readonly', 'private', 'public', 'protected', 'static', 'abstract', 'as', 'is', 'in', 'of', 'typeof', 'keyof', 'infer', 'never', 'unknown', 'any', 'string', 'number', 'boolean', 'symbol', 'object'],
|
|
80
|
+
javascript: [...common, 'const', 'let', 'var', 'function', 'class', 'import', 'export', 'from', 'async', 'await', 'extends', 'typeof', 'instanceof', 'in', 'of', 'this', 'super', 'yield'],
|
|
81
|
+
python: ['def', 'class', 'import', 'from', 'if', 'elif', 'else', 'for', 'while', 'return', 'yield', 'break', 'continue', 'try', 'except', 'finally', 'raise', 'with', 'as', 'pass', 'lambda', 'True', 'False', 'None', 'and', 'or', 'not', 'in', 'is', 'global', 'nonlocal', 'assert', 'async', 'await', 'self'],
|
|
82
|
+
rust: [...common, 'fn', 'let', 'mut', 'pub', 'struct', 'enum', 'impl', 'trait', 'use', 'mod', 'crate', 'self', 'super', 'match', 'loop', 'move', 'ref', 'where', 'async', 'await', 'dyn', 'Box', 'Vec', 'String', 'Option', 'Result', 'Some', 'None', 'Ok', 'Err'],
|
|
83
|
+
go: [...common, 'func', 'package', 'import', 'type', 'struct', 'interface', 'map', 'chan', 'go', 'defer', 'select', 'range', 'var', 'const', 'nil', 'make', 'len', 'append', 'cap', 'copy', 'close'],
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
return langKeywords[language] ?? common;
|
|
87
|
+
}
|
package/src/code/matcher.ts
CHANGED
|
@@ -1,64 +1,64 @@
|
|
|
1
|
-
import type { CodeModuleRecord } from '../types/code.types.js';
|
|
2
|
-
import { fingerprintCode } from './fingerprint.js';
|
|
3
|
-
import { tokenize } from '../matching/tokenizer.js';
|
|
4
|
-
import { cosineSimilarity, jaccardSimilarity } from '../matching/similarity.js';
|
|
5
|
-
|
|
6
|
-
export interface CodeMatchResult {
|
|
7
|
-
moduleId: number;
|
|
8
|
-
score: number;
|
|
9
|
-
matchType: 'exact' | 'structural' | 'semantic';
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
export function findExactMatches(
|
|
13
|
-
fingerprint: string,
|
|
14
|
-
candidates: CodeModuleRecord[],
|
|
15
|
-
): CodeMatchResult[] {
|
|
16
|
-
return candidates
|
|
17
|
-
.filter(c => c.fingerprint === fingerprint)
|
|
18
|
-
.map(c => ({ moduleId: c.id, score: 1.0, matchType: 'exact' as const }));
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
export function findStructuralMatches(
|
|
22
|
-
source: string,
|
|
23
|
-
language: string,
|
|
24
|
-
candidates: CodeModuleRecord[],
|
|
25
|
-
threshold: number = 0.75,
|
|
26
|
-
): CodeMatchResult[] {
|
|
27
|
-
const fp = fingerprintCode(source, language);
|
|
28
|
-
const results: CodeMatchResult[] = [];
|
|
29
|
-
|
|
30
|
-
for (const candidate of candidates) {
|
|
31
|
-
if (candidate.fingerprint === fp) {
|
|
32
|
-
results.push({ moduleId: candidate.id, score: 1.0, matchType: 'structural' });
|
|
33
|
-
continue;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
const tokensA = tokenize(source);
|
|
37
|
-
const tokensB = tokenize(candidate.name + ' ' + (candidate.description ?? ''));
|
|
38
|
-
const sim = cosineSimilarity(tokensA, tokensB);
|
|
39
|
-
if (sim >= threshold) {
|
|
40
|
-
results.push({ moduleId: candidate.id, score: sim, matchType: 'structural' });
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
return results.sort((a, b) => b.score - a.score);
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
export function findSemanticMatches(
|
|
48
|
-
description: string,
|
|
49
|
-
candidates: CodeModuleRecord[],
|
|
50
|
-
threshold: number = 0.5,
|
|
51
|
-
): CodeMatchResult[] {
|
|
52
|
-
const queryTokens = tokenize(description);
|
|
53
|
-
|
|
54
|
-
return candidates
|
|
55
|
-
.map(c => {
|
|
56
|
-
const candidateTokens = tokenize(
|
|
57
|
-
[c.name, c.description ?? '', c.file_path].join(' ')
|
|
58
|
-
);
|
|
59
|
-
const score = cosineSimilarity(queryTokens, candidateTokens);
|
|
60
|
-
return { moduleId: c.id, score, matchType: 'semantic' as const };
|
|
61
|
-
})
|
|
62
|
-
.filter(r => r.score >= threshold)
|
|
63
|
-
.sort((a, b) => b.score - a.score);
|
|
64
|
-
}
|
|
1
|
+
import type { CodeModuleRecord } from '../types/code.types.js';
|
|
2
|
+
import { fingerprintCode } from './fingerprint.js';
|
|
3
|
+
import { tokenize } from '../matching/tokenizer.js';
|
|
4
|
+
import { cosineSimilarity, jaccardSimilarity } from '../matching/similarity.js';
|
|
5
|
+
|
|
6
|
+
export interface CodeMatchResult {
|
|
7
|
+
moduleId: number;
|
|
8
|
+
score: number;
|
|
9
|
+
matchType: 'exact' | 'structural' | 'semantic';
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export function findExactMatches(
|
|
13
|
+
fingerprint: string,
|
|
14
|
+
candidates: CodeModuleRecord[],
|
|
15
|
+
): CodeMatchResult[] {
|
|
16
|
+
return candidates
|
|
17
|
+
.filter(c => c.fingerprint === fingerprint)
|
|
18
|
+
.map(c => ({ moduleId: c.id, score: 1.0, matchType: 'exact' as const }));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function findStructuralMatches(
|
|
22
|
+
source: string,
|
|
23
|
+
language: string,
|
|
24
|
+
candidates: CodeModuleRecord[],
|
|
25
|
+
threshold: number = 0.75,
|
|
26
|
+
): CodeMatchResult[] {
|
|
27
|
+
const fp = fingerprintCode(source, language);
|
|
28
|
+
const results: CodeMatchResult[] = [];
|
|
29
|
+
|
|
30
|
+
for (const candidate of candidates) {
|
|
31
|
+
if (candidate.fingerprint === fp) {
|
|
32
|
+
results.push({ moduleId: candidate.id, score: 1.0, matchType: 'structural' });
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const tokensA = tokenize(source);
|
|
37
|
+
const tokensB = tokenize(candidate.name + ' ' + (candidate.description ?? ''));
|
|
38
|
+
const sim = cosineSimilarity(tokensA, tokensB);
|
|
39
|
+
if (sim >= threshold) {
|
|
40
|
+
results.push({ moduleId: candidate.id, score: sim, matchType: 'structural' });
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return results.sort((a, b) => b.score - a.score);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function findSemanticMatches(
|
|
48
|
+
description: string,
|
|
49
|
+
candidates: CodeModuleRecord[],
|
|
50
|
+
threshold: number = 0.5,
|
|
51
|
+
): CodeMatchResult[] {
|
|
52
|
+
const queryTokens = tokenize(description);
|
|
53
|
+
|
|
54
|
+
return candidates
|
|
55
|
+
.map(c => {
|
|
56
|
+
const candidateTokens = tokenize(
|
|
57
|
+
[c.name, c.description ?? '', c.file_path].join(' ')
|
|
58
|
+
);
|
|
59
|
+
const score = cosineSimilarity(queryTokens, candidateTokens);
|
|
60
|
+
return { moduleId: c.id, score, matchType: 'semantic' as const };
|
|
61
|
+
})
|
|
62
|
+
.filter(r => r.score >= threshold)
|
|
63
|
+
.sort((a, b) => b.score - a.score);
|
|
64
|
+
}
|
|
@@ -1,29 +1,29 @@
|
|
|
1
|
-
import type { ExportInfo } from '../../types/code.types.js';
|
|
2
|
-
|
|
3
|
-
const FUNC_RE = /(?:function|func|fn|def|sub)\s+(\w+)/g;
|
|
4
|
-
const CLASS_RE = /(?:class|struct|type)\s+(\w+)/g;
|
|
5
|
-
|
|
6
|
-
export function extractExports(source: string): ExportInfo[] {
|
|
7
|
-
const exports: ExportInfo[] = [];
|
|
8
|
-
let match: RegExpExecArray | null;
|
|
9
|
-
|
|
10
|
-
const funcRe = new RegExp(FUNC_RE.source, 'g');
|
|
11
|
-
while ((match = funcRe.exec(source)) !== null) {
|
|
12
|
-
exports.push({ name: match[1]!, type: 'function' });
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
const classRe = new RegExp(CLASS_RE.source, 'g');
|
|
16
|
-
while ((match = classRe.exec(source)) !== null) {
|
|
17
|
-
exports.push({ name: match[1]!, type: 'class' });
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
return exports;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
export function extractImports(_source: string): { external: string[]; internal: string[] } {
|
|
24
|
-
return { external: [], internal: [] };
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
export function hasTypeAnnotations(_source: string): boolean {
|
|
28
|
-
return false;
|
|
29
|
-
}
|
|
1
|
+
import type { ExportInfo } from '../../types/code.types.js';
|
|
2
|
+
|
|
3
|
+
const FUNC_RE = /(?:function|func|fn|def|sub)\s+(\w+)/g;
|
|
4
|
+
const CLASS_RE = /(?:class|struct|type)\s+(\w+)/g;
|
|
5
|
+
|
|
6
|
+
export function extractExports(source: string): ExportInfo[] {
|
|
7
|
+
const exports: ExportInfo[] = [];
|
|
8
|
+
let match: RegExpExecArray | null;
|
|
9
|
+
|
|
10
|
+
const funcRe = new RegExp(FUNC_RE.source, 'g');
|
|
11
|
+
while ((match = funcRe.exec(source)) !== null) {
|
|
12
|
+
exports.push({ name: match[1]!, type: 'function' });
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const classRe = new RegExp(CLASS_RE.source, 'g');
|
|
16
|
+
while ((match = classRe.exec(source)) !== null) {
|
|
17
|
+
exports.push({ name: match[1]!, type: 'class' });
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
return exports;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function extractImports(_source: string): { external: string[]; internal: string[] } {
|
|
24
|
+
return { external: [], internal: [] };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function hasTypeAnnotations(_source: string): boolean {
|
|
28
|
+
return false;
|
|
29
|
+
}
|
|
@@ -1,54 +1,54 @@
|
|
|
1
|
-
import type { ExportInfo } from '../../types/code.types.js';
|
|
2
|
-
|
|
3
|
-
const FUNC_DEF_RE = /^def\s+(\w+)\s*\(/gm;
|
|
4
|
-
const CLASS_DEF_RE = /^class\s+(\w+)/gm;
|
|
5
|
-
const IMPORT_RE = /^(?:from\s+(\S+)\s+)?import\s+(.+)/gm;
|
|
6
|
-
const TOP_LEVEL_ASSIGN_RE = /^([A-Z_][A-Z_\d]*)\s*=/gm;
|
|
7
|
-
|
|
8
|
-
export function extractExports(source: string): ExportInfo[] {
|
|
9
|
-
const exports: ExportInfo[] = [];
|
|
10
|
-
let match: RegExpExecArray | null;
|
|
11
|
-
|
|
12
|
-
const funcRe = new RegExp(FUNC_DEF_RE.source, 'gm');
|
|
13
|
-
while ((match = funcRe.exec(source)) !== null) {
|
|
14
|
-
if (!match[1]!.startsWith('_')) {
|
|
15
|
-
exports.push({ name: match[1]!, type: 'function' });
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
const classRe = new RegExp(CLASS_DEF_RE.source, 'gm');
|
|
20
|
-
while ((match = classRe.exec(source)) !== null) {
|
|
21
|
-
if (!match[1]!.startsWith('_')) {
|
|
22
|
-
exports.push({ name: match[1]!, type: 'class' });
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
const constRe = new RegExp(TOP_LEVEL_ASSIGN_RE.source, 'gm');
|
|
27
|
-
while ((match = constRe.exec(source)) !== null) {
|
|
28
|
-
exports.push({ name: match[1]!, type: 'constant' });
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
return exports;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
export function extractImports(source: string): { external: string[]; internal: string[] } {
|
|
35
|
-
const external: string[] = [];
|
|
36
|
-
const internal: string[] = [];
|
|
37
|
-
let match: RegExpExecArray | null;
|
|
38
|
-
|
|
39
|
-
const importRe = new RegExp(IMPORT_RE.source, 'gm');
|
|
40
|
-
while ((match = importRe.exec(source)) !== null) {
|
|
41
|
-
const module = match[1] ?? match[2]!.trim().split(/\s*,\s*/)[0]!;
|
|
42
|
-
if (module.startsWith('.')) {
|
|
43
|
-
internal.push(module);
|
|
44
|
-
} else {
|
|
45
|
-
external.push(module);
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
return { external, internal };
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
export function hasTypeAnnotations(source: string): boolean {
|
|
53
|
-
return /:\s*\w+/.test(source) && /->/.test(source);
|
|
54
|
-
}
|
|
1
|
+
import type { ExportInfo } from '../../types/code.types.js';
|
|
2
|
+
|
|
3
|
+
const FUNC_DEF_RE = /^def\s+(\w+)\s*\(/gm;
|
|
4
|
+
const CLASS_DEF_RE = /^class\s+(\w+)/gm;
|
|
5
|
+
const IMPORT_RE = /^(?:from\s+(\S+)\s+)?import\s+(.+)/gm;
|
|
6
|
+
const TOP_LEVEL_ASSIGN_RE = /^([A-Z_][A-Z_\d]*)\s*=/gm;
|
|
7
|
+
|
|
8
|
+
export function extractExports(source: string): ExportInfo[] {
|
|
9
|
+
const exports: ExportInfo[] = [];
|
|
10
|
+
let match: RegExpExecArray | null;
|
|
11
|
+
|
|
12
|
+
const funcRe = new RegExp(FUNC_DEF_RE.source, 'gm');
|
|
13
|
+
while ((match = funcRe.exec(source)) !== null) {
|
|
14
|
+
if (!match[1]!.startsWith('_')) {
|
|
15
|
+
exports.push({ name: match[1]!, type: 'function' });
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const classRe = new RegExp(CLASS_DEF_RE.source, 'gm');
|
|
20
|
+
while ((match = classRe.exec(source)) !== null) {
|
|
21
|
+
if (!match[1]!.startsWith('_')) {
|
|
22
|
+
exports.push({ name: match[1]!, type: 'class' });
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const constRe = new RegExp(TOP_LEVEL_ASSIGN_RE.source, 'gm');
|
|
27
|
+
while ((match = constRe.exec(source)) !== null) {
|
|
28
|
+
exports.push({ name: match[1]!, type: 'constant' });
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return exports;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function extractImports(source: string): { external: string[]; internal: string[] } {
|
|
35
|
+
const external: string[] = [];
|
|
36
|
+
const internal: string[] = [];
|
|
37
|
+
let match: RegExpExecArray | null;
|
|
38
|
+
|
|
39
|
+
const importRe = new RegExp(IMPORT_RE.source, 'gm');
|
|
40
|
+
while ((match = importRe.exec(source)) !== null) {
|
|
41
|
+
const module = match[1] ?? match[2]!.trim().split(/\s*,\s*/)[0]!;
|
|
42
|
+
if (module.startsWith('.')) {
|
|
43
|
+
internal.push(module);
|
|
44
|
+
} else {
|
|
45
|
+
external.push(module);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return { external, internal };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function hasTypeAnnotations(source: string): boolean {
|
|
53
|
+
return /:\s*\w+/.test(source) && /->/.test(source);
|
|
54
|
+
}
|