@timmeck/brain 1.8.0 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BRAIN_PLAN.md +3324 -3324
- package/LICENSE +21 -21
- package/dist/api/server.d.ts +4 -0
- package/dist/api/server.js +73 -0
- package/dist/api/server.js.map +1 -1
- package/dist/brain.js +2 -1
- package/dist/brain.js.map +1 -1
- package/dist/cli/commands/dashboard.js +606 -572
- package/dist/cli/commands/dashboard.js.map +1 -1
- package/dist/dashboard/server.js +25 -25
- package/dist/db/migrations/001_core_schema.js +115 -115
- package/dist/db/migrations/002_learning_schema.js +33 -33
- package/dist/db/migrations/003_code_schema.js +48 -48
- package/dist/db/migrations/004_synapses_schema.js +52 -52
- package/dist/db/migrations/005_fts_indexes.js +73 -73
- package/dist/db/migrations/007_feedback.js +8 -8
- package/dist/db/migrations/008_git_integration.js +33 -33
- package/dist/db/migrations/009_embeddings.js +3 -3
- package/dist/db/repositories/antipattern.repository.js +3 -3
- package/dist/db/repositories/code-module.repository.js +32 -32
- package/dist/db/repositories/notification.repository.js +3 -3
- package/dist/db/repositories/project.repository.js +21 -21
- package/dist/db/repositories/rule.repository.js +24 -24
- package/dist/db/repositories/solution.repository.js +50 -50
- package/dist/db/repositories/synapse.repository.js +18 -18
- package/dist/db/repositories/terminal.repository.js +24 -24
- package/dist/embeddings/engine.d.ts +2 -2
- package/dist/embeddings/engine.js +17 -4
- package/dist/embeddings/engine.js.map +1 -1
- package/dist/index.js +1 -1
- package/dist/ipc/server.d.ts +8 -0
- package/dist/ipc/server.js +67 -1
- package/dist/ipc/server.js.map +1 -1
- package/dist/matching/error-matcher.js +5 -5
- package/dist/matching/fingerprint.js +6 -1
- package/dist/matching/fingerprint.js.map +1 -1
- package/dist/mcp/http-server.js +8 -2
- package/dist/mcp/http-server.js.map +1 -1
- package/dist/services/code.service.d.ts +3 -0
- package/dist/services/code.service.js +33 -4
- package/dist/services/code.service.js.map +1 -1
- package/dist/services/error.service.js +4 -3
- package/dist/services/error.service.js.map +1 -1
- package/dist/services/git.service.js +14 -14
- package/package.json +49 -49
- package/src/api/server.ts +395 -321
- package/src/brain.ts +266 -265
- package/src/cli/colors.ts +116 -116
- package/src/cli/commands/config.ts +169 -169
- package/src/cli/commands/dashboard.ts +755 -720
- package/src/cli/commands/doctor.ts +118 -118
- package/src/cli/commands/explain.ts +83 -83
- package/src/cli/commands/export.ts +31 -31
- package/src/cli/commands/import.ts +199 -199
- package/src/cli/commands/insights.ts +65 -65
- package/src/cli/commands/learn.ts +24 -24
- package/src/cli/commands/modules.ts +53 -53
- package/src/cli/commands/network.ts +67 -67
- package/src/cli/commands/projects.ts +42 -42
- package/src/cli/commands/query.ts +120 -120
- package/src/cli/commands/start.ts +62 -62
- package/src/cli/commands/status.ts +75 -75
- package/src/cli/commands/stop.ts +34 -34
- package/src/cli/ipc-helper.ts +22 -22
- package/src/cli/update-check.ts +63 -63
- package/src/code/fingerprint.ts +87 -87
- package/src/code/parsers/generic.ts +29 -29
- package/src/code/parsers/python.ts +54 -54
- package/src/code/parsers/typescript.ts +65 -65
- package/src/code/registry.ts +60 -60
- package/src/dashboard/server.ts +142 -142
- package/src/db/connection.ts +22 -22
- package/src/db/migrations/001_core_schema.ts +120 -120
- package/src/db/migrations/002_learning_schema.ts +38 -38
- package/src/db/migrations/003_code_schema.ts +53 -53
- package/src/db/migrations/004_synapses_schema.ts +57 -57
- package/src/db/migrations/005_fts_indexes.ts +78 -78
- package/src/db/migrations/006_synapses_phase3.ts +17 -17
- package/src/db/migrations/007_feedback.ts +13 -13
- package/src/db/migrations/008_git_integration.ts +38 -38
- package/src/db/migrations/009_embeddings.ts +8 -8
- package/src/db/repositories/antipattern.repository.ts +66 -66
- package/src/db/repositories/code-module.repository.ts +142 -142
- package/src/db/repositories/notification.repository.ts +66 -66
- package/src/db/repositories/project.repository.ts +93 -93
- package/src/db/repositories/rule.repository.ts +108 -108
- package/src/db/repositories/solution.repository.ts +154 -154
- package/src/db/repositories/synapse.repository.ts +153 -153
- package/src/db/repositories/terminal.repository.ts +101 -101
- package/src/embeddings/engine.ts +238 -217
- package/src/index.ts +63 -63
- package/src/ipc/client.ts +118 -118
- package/src/ipc/protocol.ts +35 -35
- package/src/ipc/router.ts +133 -133
- package/src/ipc/server.ts +176 -110
- package/src/learning/decay.ts +46 -46
- package/src/learning/pattern-extractor.ts +90 -90
- package/src/learning/rule-generator.ts +74 -74
- package/src/matching/error-matcher.ts +5 -5
- package/src/matching/fingerprint.ts +34 -29
- package/src/matching/similarity.ts +61 -61
- package/src/matching/tfidf.ts +74 -74
- package/src/matching/tokenizer.ts +41 -41
- package/src/mcp/auto-detect.ts +93 -93
- package/src/mcp/http-server.ts +140 -137
- package/src/mcp/server.ts +73 -73
- package/src/parsing/error-parser.ts +28 -28
- package/src/parsing/parsers/compiler.ts +93 -93
- package/src/parsing/parsers/generic.ts +28 -28
- package/src/parsing/parsers/go.ts +97 -97
- package/src/parsing/parsers/node.ts +69 -69
- package/src/parsing/parsers/python.ts +62 -62
- package/src/parsing/parsers/rust.ts +50 -50
- package/src/parsing/parsers/shell.ts +42 -42
- package/src/parsing/types.ts +47 -47
- package/src/research/gap-analyzer.ts +135 -135
- package/src/research/insight-generator.ts +123 -123
- package/src/research/research-engine.ts +116 -116
- package/src/research/synergy-detector.ts +126 -126
- package/src/research/template-extractor.ts +130 -130
- package/src/research/trend-analyzer.ts +127 -127
- package/src/services/code.service.ts +271 -238
- package/src/services/error.service.ts +4 -3
- package/src/services/git.service.ts +132 -132
- package/src/services/notification.service.ts +41 -41
- package/src/services/synapse.service.ts +59 -59
- package/src/services/terminal.service.ts +81 -81
- package/src/synapses/activation.ts +80 -80
- package/src/synapses/decay.ts +38 -38
- package/src/synapses/hebbian.ts +69 -69
- package/src/synapses/pathfinder.ts +81 -81
- package/src/synapses/synapse-manager.ts +109 -109
- package/src/types/code.types.ts +52 -52
- package/src/types/error.types.ts +67 -67
- package/src/types/ipc.types.ts +8 -8
- package/src/types/mcp.types.ts +53 -53
- package/src/types/research.types.ts +28 -28
- package/src/types/solution.types.ts +30 -30
- package/src/utils/events.ts +45 -45
- package/src/utils/hash.ts +5 -5
- package/src/utils/logger.ts +48 -48
- package/src/utils/paths.ts +19 -19
- package/tests/e2e/test_code_intelligence.py +1015 -0
- package/tests/e2e/test_error_memory.py +451 -0
- package/tests/e2e/test_full_integration.py +534 -0
- package/tests/fixtures/code-modules/modules.ts +83 -83
- package/tests/fixtures/errors/go.ts +9 -9
- package/tests/fixtures/errors/node.ts +24 -24
- package/tests/fixtures/errors/python.ts +21 -21
- package/tests/fixtures/errors/rust.ts +25 -25
- package/tests/fixtures/errors/shell.ts +15 -15
- package/tests/fixtures/solutions/solutions.ts +27 -27
- package/tests/helpers/setup-db.ts +52 -52
- package/tests/integration/code-flow.test.ts +86 -86
- package/tests/integration/error-flow.test.ts +83 -83
- package/tests/integration/ipc-flow.test.ts +166 -166
- package/tests/integration/learning-cycle.test.ts +82 -82
- package/tests/integration/synapse-flow.test.ts +117 -117
- package/tests/unit/code/analyzer.test.ts +58 -58
- package/tests/unit/code/fingerprint.test.ts +51 -51
- package/tests/unit/code/scorer.test.ts +55 -55
- package/tests/unit/learning/confidence-scorer.test.ts +60 -60
- package/tests/unit/learning/decay.test.ts +45 -45
- package/tests/unit/learning/pattern-extractor.test.ts +50 -50
- package/tests/unit/matching/error-matcher.test.ts +69 -69
- package/tests/unit/matching/fingerprint.test.ts +47 -47
- package/tests/unit/matching/similarity.test.ts +65 -65
- package/tests/unit/matching/tfidf.test.ts +71 -71
- package/tests/unit/matching/tokenizer.test.ts +83 -83
- package/tests/unit/parsing/parsers.test.ts +113 -113
- package/tests/unit/research/gap-analyzer.test.ts +45 -45
- package/tests/unit/research/trend-analyzer.test.ts +45 -45
- package/tests/unit/synapses/activation.test.ts +80 -80
- package/tests/unit/synapses/decay.test.ts +27 -27
- package/tests/unit/synapses/hebbian.test.ts +96 -96
- package/tests/unit/synapses/pathfinder.test.ts +72 -72
- package/tsconfig.json +18 -18
|
@@ -1,65 +1,65 @@
|
|
|
1
|
-
import { describe, it, expect } from 'vitest';
|
|
2
|
-
import { levenshteinDistance, cosineSimilarity, jaccardSimilarity } from '../../../src/matching/similarity.js';
|
|
3
|
-
|
|
4
|
-
describe('levenshteinDistance (normalized similarity)', () => {
|
|
5
|
-
it('returns 1 for identical strings', () => {
|
|
6
|
-
expect(levenshteinDistance('hello', 'hello')).toBeCloseTo(1.0);
|
|
7
|
-
});
|
|
8
|
-
|
|
9
|
-
it('returns 0 for completely different strings', () => {
|
|
10
|
-
expect(levenshteinDistance('', 'abc')).toBeCloseTo(0.0);
|
|
11
|
-
expect(levenshteinDistance('abc', '')).toBeCloseTo(0.0);
|
|
12
|
-
});
|
|
13
|
-
|
|
14
|
-
it('returns high similarity for single edit', () => {
|
|
15
|
-
// cat vs bat: 1 edit / 3 length = 0.33 distance → 0.67 similarity
|
|
16
|
-
expect(levenshteinDistance('cat', 'bat')).toBeCloseTo(1 - 1 / 3, 1);
|
|
17
|
-
});
|
|
18
|
-
|
|
19
|
-
it('returns value between 0 and 1', () => {
|
|
20
|
-
const sim = levenshteinDistance('kitten', 'sitting');
|
|
21
|
-
expect(sim).toBeGreaterThanOrEqual(0);
|
|
22
|
-
expect(sim).toBeLessThanOrEqual(1);
|
|
23
|
-
});
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
describe('cosineSimilarity', () => {
|
|
27
|
-
it('returns 1 for identical token arrays', () => {
|
|
28
|
-
expect(cosineSimilarity(['a', 'b', 'c'], ['a', 'b', 'c'])).toBeCloseTo(1.0);
|
|
29
|
-
});
|
|
30
|
-
|
|
31
|
-
it('returns 0 for disjoint token arrays', () => {
|
|
32
|
-
expect(cosineSimilarity(['a', 'b'], ['c', 'd'])).toBeCloseTo(0.0);
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
it('returns value between 0 and 1 for partial overlap', () => {
|
|
36
|
-
const sim = cosineSimilarity(['a', 'b', 'c'], ['b', 'c', 'd']);
|
|
37
|
-
expect(sim).toBeGreaterThan(0);
|
|
38
|
-
expect(sim).toBeLessThan(1);
|
|
39
|
-
});
|
|
40
|
-
|
|
41
|
-
it('handles empty arrays', () => {
|
|
42
|
-
expect(cosineSimilarity([], [])).toBe(0);
|
|
43
|
-
expect(cosineSimilarity(['a'], [])).toBe(0);
|
|
44
|
-
});
|
|
45
|
-
});
|
|
46
|
-
|
|
47
|
-
describe('jaccardSimilarity', () => {
|
|
48
|
-
it('returns 1 for identical sets', () => {
|
|
49
|
-
expect(jaccardSimilarity(['a', 'b'], ['a', 'b'])).toBeCloseTo(1.0);
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
it('returns 0 for disjoint sets', () => {
|
|
53
|
-
expect(jaccardSimilarity(['a', 'b'], ['c', 'd'])).toBeCloseTo(0.0);
|
|
54
|
-
});
|
|
55
|
-
|
|
56
|
-
it('computes correct ratio for partial overlap', () => {
|
|
57
|
-
// intersection {b} = 1, union {a,b,c} = 3
|
|
58
|
-
const sim = jaccardSimilarity(['a', 'b'], ['b', 'c']);
|
|
59
|
-
expect(sim).toBeCloseTo(1 / 3);
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
it('handles empty arrays', () => {
|
|
63
|
-
expect(jaccardSimilarity([], [])).toBe(0);
|
|
64
|
-
});
|
|
65
|
-
});
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { levenshteinDistance, cosineSimilarity, jaccardSimilarity } from '../../../src/matching/similarity.js';
|
|
3
|
+
|
|
4
|
+
describe('levenshteinDistance (normalized similarity)', () => {
|
|
5
|
+
it('returns 1 for identical strings', () => {
|
|
6
|
+
expect(levenshteinDistance('hello', 'hello')).toBeCloseTo(1.0);
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
it('returns 0 for completely different strings', () => {
|
|
10
|
+
expect(levenshteinDistance('', 'abc')).toBeCloseTo(0.0);
|
|
11
|
+
expect(levenshteinDistance('abc', '')).toBeCloseTo(0.0);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it('returns high similarity for single edit', () => {
|
|
15
|
+
// cat vs bat: 1 edit / 3 length = 0.33 distance → 0.67 similarity
|
|
16
|
+
expect(levenshteinDistance('cat', 'bat')).toBeCloseTo(1 - 1 / 3, 1);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it('returns value between 0 and 1', () => {
|
|
20
|
+
const sim = levenshteinDistance('kitten', 'sitting');
|
|
21
|
+
expect(sim).toBeGreaterThanOrEqual(0);
|
|
22
|
+
expect(sim).toBeLessThanOrEqual(1);
|
|
23
|
+
});
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
describe('cosineSimilarity', () => {
|
|
27
|
+
it('returns 1 for identical token arrays', () => {
|
|
28
|
+
expect(cosineSimilarity(['a', 'b', 'c'], ['a', 'b', 'c'])).toBeCloseTo(1.0);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it('returns 0 for disjoint token arrays', () => {
|
|
32
|
+
expect(cosineSimilarity(['a', 'b'], ['c', 'd'])).toBeCloseTo(0.0);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('returns value between 0 and 1 for partial overlap', () => {
|
|
36
|
+
const sim = cosineSimilarity(['a', 'b', 'c'], ['b', 'c', 'd']);
|
|
37
|
+
expect(sim).toBeGreaterThan(0);
|
|
38
|
+
expect(sim).toBeLessThan(1);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it('handles empty arrays', () => {
|
|
42
|
+
expect(cosineSimilarity([], [])).toBe(0);
|
|
43
|
+
expect(cosineSimilarity(['a'], [])).toBe(0);
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
describe('jaccardSimilarity', () => {
|
|
48
|
+
it('returns 1 for identical sets', () => {
|
|
49
|
+
expect(jaccardSimilarity(['a', 'b'], ['a', 'b'])).toBeCloseTo(1.0);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it('returns 0 for disjoint sets', () => {
|
|
53
|
+
expect(jaccardSimilarity(['a', 'b'], ['c', 'd'])).toBeCloseTo(0.0);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it('computes correct ratio for partial overlap', () => {
|
|
57
|
+
// intersection {b} = 1, union {a,b,c} = 3
|
|
58
|
+
const sim = jaccardSimilarity(['a', 'b'], ['b', 'c']);
|
|
59
|
+
expect(sim).toBeCloseTo(1 / 3);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('handles empty arrays', () => {
|
|
63
|
+
expect(jaccardSimilarity([], [])).toBe(0);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
@@ -1,71 +1,71 @@
|
|
|
1
|
-
import { describe, it, expect } from 'vitest';
|
|
2
|
-
import { TfIdfIndex } from '../../../src/matching/tfidf.js';
|
|
3
|
-
|
|
4
|
-
describe('TfIdfIndex', () => {
|
|
5
|
-
it('indexes documents and returns matches', () => {
|
|
6
|
-
const index = new TfIdfIndex();
|
|
7
|
-
// Add docs in order where query terms get correct IDF
|
|
8
|
-
// IDF is only recomputed for terms in the newly added doc,
|
|
9
|
-
// so add the doc with query terms last to ensure fresh IDF
|
|
10
|
-
index.addDocument(3, ['rust', 'borrow', 'lifetime']);
|
|
11
|
-
index.addDocument(2, ['python', 'import', 'module']);
|
|
12
|
-
index.addDocument(1, ['typescript', 'error', 'module']);
|
|
13
|
-
|
|
14
|
-
const results = index.query(['typescript']);
|
|
15
|
-
expect(results.length).toBeGreaterThan(0);
|
|
16
|
-
// Doc 1 has 'typescript'
|
|
17
|
-
expect(results[0].id).toBe(1);
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
it('tracks document count', () => {
|
|
21
|
-
const index = new TfIdfIndex();
|
|
22
|
-
expect(index.getDocumentCount()).toBe(0);
|
|
23
|
-
index.addDocument(1, ['a', 'b']);
|
|
24
|
-
expect(index.getDocumentCount()).toBe(1);
|
|
25
|
-
index.addDocument(2, ['c', 'd']);
|
|
26
|
-
expect(index.getDocumentCount()).toBe(2);
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
it('removes documents', () => {
|
|
30
|
-
const index = new TfIdfIndex();
|
|
31
|
-
index.addDocument(1, ['error', 'type']);
|
|
32
|
-
index.addDocument(2, ['error', 'syntax']);
|
|
33
|
-
index.removeDocument(1);
|
|
34
|
-
expect(index.getDocumentCount()).toBe(1);
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
it('returns empty for no match', () => {
|
|
38
|
-
const index = new TfIdfIndex();
|
|
39
|
-
index.addDocument(1, ['foo', 'bar']);
|
|
40
|
-
const results = index.query(['completely', 'different']);
|
|
41
|
-
expect(results).toEqual([]);
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
it('respects topK parameter', () => {
|
|
45
|
-
const index = new TfIdfIndex();
|
|
46
|
-
for (let i = 0; i < 20; i++) {
|
|
47
|
-
index.addDocument(i, ['shared', `unique${i}`]);
|
|
48
|
-
}
|
|
49
|
-
// 'unique0' only in doc 0, so has high IDF — query with it plus 'shared'
|
|
50
|
-
const results = index.query(['unique0', 'shared'], 5);
|
|
51
|
-
expect(results.length).toBeLessThanOrEqual(5);
|
|
52
|
-
});
|
|
53
|
-
|
|
54
|
-
it('getIdf returns a map', () => {
|
|
55
|
-
const index = new TfIdfIndex();
|
|
56
|
-
// IDF = log(N/df), only recomputed for terms in the just-added doc.
|
|
57
|
-
// Add 'frequent' in all 3 docs, 'rare' in last doc only.
|
|
58
|
-
// When doc3 is added (N=3), 'rare' df=1 → IDF=log(3)=1.099
|
|
59
|
-
// 'frequent' df=3 → IDF=log(3/3)=0
|
|
60
|
-
index.addDocument(1, ['frequent', 'alpha']);
|
|
61
|
-
index.addDocument(2, ['frequent', 'beta']);
|
|
62
|
-
index.addDocument(3, ['frequent', 'rare']);
|
|
63
|
-
|
|
64
|
-
const idf = index.getIdf();
|
|
65
|
-
expect(idf).toBeInstanceOf(Map);
|
|
66
|
-
expect(idf.has('rare')).toBe(true);
|
|
67
|
-
expect(idf.has('frequent')).toBe(true);
|
|
68
|
-
// rare appears in 1/3 docs, frequent in 3/3
|
|
69
|
-
expect(idf.get('rare')!).toBeGreaterThan(idf.get('frequent')!);
|
|
70
|
-
});
|
|
71
|
-
});
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { TfIdfIndex } from '../../../src/matching/tfidf.js';
|
|
3
|
+
|
|
4
|
+
describe('TfIdfIndex', () => {
|
|
5
|
+
it('indexes documents and returns matches', () => {
|
|
6
|
+
const index = new TfIdfIndex();
|
|
7
|
+
// Add docs in order where query terms get correct IDF
|
|
8
|
+
// IDF is only recomputed for terms in the newly added doc,
|
|
9
|
+
// so add the doc with query terms last to ensure fresh IDF
|
|
10
|
+
index.addDocument(3, ['rust', 'borrow', 'lifetime']);
|
|
11
|
+
index.addDocument(2, ['python', 'import', 'module']);
|
|
12
|
+
index.addDocument(1, ['typescript', 'error', 'module']);
|
|
13
|
+
|
|
14
|
+
const results = index.query(['typescript']);
|
|
15
|
+
expect(results.length).toBeGreaterThan(0);
|
|
16
|
+
// Doc 1 has 'typescript'
|
|
17
|
+
expect(results[0].id).toBe(1);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it('tracks document count', () => {
|
|
21
|
+
const index = new TfIdfIndex();
|
|
22
|
+
expect(index.getDocumentCount()).toBe(0);
|
|
23
|
+
index.addDocument(1, ['a', 'b']);
|
|
24
|
+
expect(index.getDocumentCount()).toBe(1);
|
|
25
|
+
index.addDocument(2, ['c', 'd']);
|
|
26
|
+
expect(index.getDocumentCount()).toBe(2);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it('removes documents', () => {
|
|
30
|
+
const index = new TfIdfIndex();
|
|
31
|
+
index.addDocument(1, ['error', 'type']);
|
|
32
|
+
index.addDocument(2, ['error', 'syntax']);
|
|
33
|
+
index.removeDocument(1);
|
|
34
|
+
expect(index.getDocumentCount()).toBe(1);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it('returns empty for no match', () => {
|
|
38
|
+
const index = new TfIdfIndex();
|
|
39
|
+
index.addDocument(1, ['foo', 'bar']);
|
|
40
|
+
const results = index.query(['completely', 'different']);
|
|
41
|
+
expect(results).toEqual([]);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('respects topK parameter', () => {
|
|
45
|
+
const index = new TfIdfIndex();
|
|
46
|
+
for (let i = 0; i < 20; i++) {
|
|
47
|
+
index.addDocument(i, ['shared', `unique${i}`]);
|
|
48
|
+
}
|
|
49
|
+
// 'unique0' only in doc 0, so has high IDF — query with it plus 'shared'
|
|
50
|
+
const results = index.query(['unique0', 'shared'], 5);
|
|
51
|
+
expect(results.length).toBeLessThanOrEqual(5);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it('getIdf returns a map', () => {
|
|
55
|
+
const index = new TfIdfIndex();
|
|
56
|
+
// IDF = log(N/df), only recomputed for terms in the just-added doc.
|
|
57
|
+
// Add 'frequent' in all 3 docs, 'rare' in last doc only.
|
|
58
|
+
// When doc3 is added (N=3), 'rare' df=1 → IDF=log(3)=1.099
|
|
59
|
+
// 'frequent' df=3 → IDF=log(3/3)=0
|
|
60
|
+
index.addDocument(1, ['frequent', 'alpha']);
|
|
61
|
+
index.addDocument(2, ['frequent', 'beta']);
|
|
62
|
+
index.addDocument(3, ['frequent', 'rare']);
|
|
63
|
+
|
|
64
|
+
const idf = index.getIdf();
|
|
65
|
+
expect(idf).toBeInstanceOf(Map);
|
|
66
|
+
expect(idf.has('rare')).toBe(true);
|
|
67
|
+
expect(idf.has('frequent')).toBe(true);
|
|
68
|
+
// rare appears in 1/3 docs, frequent in 3/3
|
|
69
|
+
expect(idf.get('rare')!).toBeGreaterThan(idf.get('frequent')!);
|
|
70
|
+
});
|
|
71
|
+
});
|
|
@@ -1,83 +1,83 @@
|
|
|
1
|
-
import { describe, it, expect } from 'vitest';
|
|
2
|
-
import { splitCamelCase, splitSnakeCase, removeStopwords, tokenize } from '../../../src/matching/tokenizer.js';
|
|
3
|
-
|
|
4
|
-
describe('splitCamelCase', () => {
|
|
5
|
-
it('splits camelCase words (preserves case)', () => {
|
|
6
|
-
const result = splitCamelCase('camelCase');
|
|
7
|
-
expect(result).toContain('camel');
|
|
8
|
-
expect(result).toContain('Case');
|
|
9
|
-
});
|
|
10
|
-
|
|
11
|
-
it('splits PascalCase words', () => {
|
|
12
|
-
const result = splitCamelCase('PascalCase');
|
|
13
|
-
expect(result).toContain('Pascal');
|
|
14
|
-
expect(result).toContain('Case');
|
|
15
|
-
});
|
|
16
|
-
|
|
17
|
-
it('handles consecutive uppercase (acronyms)', () => {
|
|
18
|
-
const result = splitCamelCase('parseHTMLResponse');
|
|
19
|
-
expect(result).toContain('parse');
|
|
20
|
-
expect(result).toContain('HTML');
|
|
21
|
-
expect(result).toContain('Response');
|
|
22
|
-
});
|
|
23
|
-
|
|
24
|
-
it('returns single word unchanged', () => {
|
|
25
|
-
expect(splitCamelCase('hello')).toEqual(['hello']);
|
|
26
|
-
});
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
describe('splitSnakeCase', () => {
|
|
30
|
-
it('splits snake_case words', () => {
|
|
31
|
-
expect(splitSnakeCase('snake_case')).toEqual(['snake', 'case']);
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
it('splits kebab-case words', () => {
|
|
35
|
-
expect(splitSnakeCase('kebab-case')).toEqual(['kebab', 'case']);
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
it('returns single word unchanged', () => {
|
|
39
|
-
expect(splitSnakeCase('hello')).toEqual(['hello']);
|
|
40
|
-
});
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
describe('removeStopwords', () => {
|
|
44
|
-
it('removes common stopwords', () => {
|
|
45
|
-
const tokens = ['the', 'is', 'in', 'module'];
|
|
46
|
-
const result = removeStopwords(tokens);
|
|
47
|
-
expect(result).toContain('module');
|
|
48
|
-
expect(result).not.toContain('the');
|
|
49
|
-
expect(result).not.toContain('is');
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
it('error is treated as stopword', () => {
|
|
53
|
-
// 'error' is in the stopword list
|
|
54
|
-
const tokens = ['error', 'module'];
|
|
55
|
-
const result = removeStopwords(tokens);
|
|
56
|
-
expect(result).toContain('module');
|
|
57
|
-
});
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
describe('tokenize', () => {
|
|
61
|
-
it('tokenizes and lowercases text', () => {
|
|
62
|
-
const tokens = tokenize('Cannot read property map of undefined');
|
|
63
|
-
expect(tokens).toContain('read');
|
|
64
|
-
expect(tokens).toContain('property');
|
|
65
|
-
expect(tokens).toContain('map');
|
|
66
|
-
expect(tokens).toContain('undefined');
|
|
67
|
-
});
|
|
68
|
-
|
|
69
|
-
it('handles empty string', () => {
|
|
70
|
-
expect(tokenize('')).toEqual([]);
|
|
71
|
-
});
|
|
72
|
-
|
|
73
|
-
it('lowercases all tokens', () => {
|
|
74
|
-
const tokens = tokenize('Something FAILED');
|
|
75
|
-
tokens.forEach(t => expect(t).toBe(t.toLowerCase()));
|
|
76
|
-
});
|
|
77
|
-
|
|
78
|
-
it('returns unique tokens', () => {
|
|
79
|
-
const tokens = tokenize('error error error');
|
|
80
|
-
const unique = new Set(tokens);
|
|
81
|
-
expect(tokens.length).toBe(unique.size);
|
|
82
|
-
});
|
|
83
|
-
});
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { splitCamelCase, splitSnakeCase, removeStopwords, tokenize } from '../../../src/matching/tokenizer.js';
|
|
3
|
+
|
|
4
|
+
describe('splitCamelCase', () => {
|
|
5
|
+
it('splits camelCase words (preserves case)', () => {
|
|
6
|
+
const result = splitCamelCase('camelCase');
|
|
7
|
+
expect(result).toContain('camel');
|
|
8
|
+
expect(result).toContain('Case');
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
it('splits PascalCase words', () => {
|
|
12
|
+
const result = splitCamelCase('PascalCase');
|
|
13
|
+
expect(result).toContain('Pascal');
|
|
14
|
+
expect(result).toContain('Case');
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it('handles consecutive uppercase (acronyms)', () => {
|
|
18
|
+
const result = splitCamelCase('parseHTMLResponse');
|
|
19
|
+
expect(result).toContain('parse');
|
|
20
|
+
expect(result).toContain('HTML');
|
|
21
|
+
expect(result).toContain('Response');
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('returns single word unchanged', () => {
|
|
25
|
+
expect(splitCamelCase('hello')).toEqual(['hello']);
|
|
26
|
+
});
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
describe('splitSnakeCase', () => {
|
|
30
|
+
it('splits snake_case words', () => {
|
|
31
|
+
expect(splitSnakeCase('snake_case')).toEqual(['snake', 'case']);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('splits kebab-case words', () => {
|
|
35
|
+
expect(splitSnakeCase('kebab-case')).toEqual(['kebab', 'case']);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it('returns single word unchanged', () => {
|
|
39
|
+
expect(splitSnakeCase('hello')).toEqual(['hello']);
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
describe('removeStopwords', () => {
|
|
44
|
+
it('removes common stopwords', () => {
|
|
45
|
+
const tokens = ['the', 'is', 'in', 'module'];
|
|
46
|
+
const result = removeStopwords(tokens);
|
|
47
|
+
expect(result).toContain('module');
|
|
48
|
+
expect(result).not.toContain('the');
|
|
49
|
+
expect(result).not.toContain('is');
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it('error is treated as stopword', () => {
|
|
53
|
+
// 'error' is in the stopword list
|
|
54
|
+
const tokens = ['error', 'module'];
|
|
55
|
+
const result = removeStopwords(tokens);
|
|
56
|
+
expect(result).toContain('module');
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
describe('tokenize', () => {
|
|
61
|
+
it('tokenizes and lowercases text', () => {
|
|
62
|
+
const tokens = tokenize('Cannot read property map of undefined');
|
|
63
|
+
expect(tokens).toContain('read');
|
|
64
|
+
expect(tokens).toContain('property');
|
|
65
|
+
expect(tokens).toContain('map');
|
|
66
|
+
expect(tokens).toContain('undefined');
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it('handles empty string', () => {
|
|
70
|
+
expect(tokenize('')).toEqual([]);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it('lowercases all tokens', () => {
|
|
74
|
+
const tokens = tokenize('Something FAILED');
|
|
75
|
+
tokens.forEach(t => expect(t).toBe(t.toLowerCase()));
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it('returns unique tokens', () => {
|
|
79
|
+
const tokens = tokenize('error error error');
|
|
80
|
+
const unique = new Set(tokens);
|
|
81
|
+
expect(tokens.length).toBe(unique.size);
|
|
82
|
+
});
|
|
83
|
+
});
|