@timmeck/brain 1.8.1 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/BRAIN_PLAN.md +3324 -3324
  2. package/LICENSE +21 -21
  3. package/dist/cli/commands/dashboard.js +595 -595
  4. package/dist/dashboard/server.js +25 -25
  5. package/dist/db/migrations/001_core_schema.js +115 -115
  6. package/dist/db/migrations/002_learning_schema.js +33 -33
  7. package/dist/db/migrations/003_code_schema.js +48 -48
  8. package/dist/db/migrations/004_synapses_schema.js +52 -52
  9. package/dist/db/migrations/005_fts_indexes.js +73 -73
  10. package/dist/db/migrations/007_feedback.js +8 -8
  11. package/dist/db/migrations/008_git_integration.js +33 -33
  12. package/dist/db/migrations/009_embeddings.js +3 -3
  13. package/dist/db/repositories/antipattern.repository.js +3 -3
  14. package/dist/db/repositories/code-module.repository.js +32 -32
  15. package/dist/db/repositories/notification.repository.js +3 -3
  16. package/dist/db/repositories/project.repository.js +21 -21
  17. package/dist/db/repositories/rule.repository.js +24 -24
  18. package/dist/db/repositories/solution.repository.js +50 -50
  19. package/dist/db/repositories/synapse.repository.js +18 -18
  20. package/dist/db/repositories/terminal.repository.js +24 -24
  21. package/dist/ipc/server.d.ts +8 -0
  22. package/dist/ipc/server.js +67 -1
  23. package/dist/ipc/server.js.map +1 -1
  24. package/dist/matching/error-matcher.js +5 -5
  25. package/dist/matching/fingerprint.js +6 -1
  26. package/dist/matching/fingerprint.js.map +1 -1
  27. package/dist/services/error.service.js +4 -3
  28. package/dist/services/error.service.js.map +1 -1
  29. package/dist/services/git.service.js +14 -14
  30. package/package.json +49 -49
  31. package/src/api/server.ts +395 -395
  32. package/src/brain.ts +266 -266
  33. package/src/cli/colors.ts +116 -116
  34. package/src/cli/commands/config.ts +169 -169
  35. package/src/cli/commands/dashboard.ts +755 -755
  36. package/src/cli/commands/doctor.ts +118 -118
  37. package/src/cli/commands/explain.ts +83 -83
  38. package/src/cli/commands/export.ts +31 -31
  39. package/src/cli/commands/import.ts +199 -199
  40. package/src/cli/commands/insights.ts +65 -65
  41. package/src/cli/commands/learn.ts +24 -24
  42. package/src/cli/commands/modules.ts +53 -53
  43. package/src/cli/commands/network.ts +67 -67
  44. package/src/cli/commands/projects.ts +42 -42
  45. package/src/cli/commands/query.ts +120 -120
  46. package/src/cli/commands/start.ts +62 -62
  47. package/src/cli/commands/status.ts +75 -75
  48. package/src/cli/commands/stop.ts +34 -34
  49. package/src/cli/ipc-helper.ts +22 -22
  50. package/src/cli/update-check.ts +63 -63
  51. package/src/code/fingerprint.ts +87 -87
  52. package/src/code/parsers/generic.ts +29 -29
  53. package/src/code/parsers/python.ts +54 -54
  54. package/src/code/parsers/typescript.ts +65 -65
  55. package/src/code/registry.ts +60 -60
  56. package/src/dashboard/server.ts +142 -142
  57. package/src/db/connection.ts +22 -22
  58. package/src/db/migrations/001_core_schema.ts +120 -120
  59. package/src/db/migrations/002_learning_schema.ts +38 -38
  60. package/src/db/migrations/003_code_schema.ts +53 -53
  61. package/src/db/migrations/004_synapses_schema.ts +57 -57
  62. package/src/db/migrations/005_fts_indexes.ts +78 -78
  63. package/src/db/migrations/006_synapses_phase3.ts +17 -17
  64. package/src/db/migrations/007_feedback.ts +13 -13
  65. package/src/db/migrations/008_git_integration.ts +38 -38
  66. package/src/db/migrations/009_embeddings.ts +8 -8
  67. package/src/db/repositories/antipattern.repository.ts +66 -66
  68. package/src/db/repositories/code-module.repository.ts +142 -142
  69. package/src/db/repositories/notification.repository.ts +66 -66
  70. package/src/db/repositories/project.repository.ts +93 -93
  71. package/src/db/repositories/rule.repository.ts +108 -108
  72. package/src/db/repositories/solution.repository.ts +154 -154
  73. package/src/db/repositories/synapse.repository.ts +153 -153
  74. package/src/db/repositories/terminal.repository.ts +101 -101
  75. package/src/embeddings/engine.ts +238 -238
  76. package/src/index.ts +63 -63
  77. package/src/ipc/client.ts +118 -118
  78. package/src/ipc/protocol.ts +35 -35
  79. package/src/ipc/router.ts +133 -133
  80. package/src/ipc/server.ts +176 -110
  81. package/src/learning/decay.ts +46 -46
  82. package/src/learning/pattern-extractor.ts +90 -90
  83. package/src/learning/rule-generator.ts +74 -74
  84. package/src/matching/error-matcher.ts +5 -5
  85. package/src/matching/fingerprint.ts +34 -29
  86. package/src/matching/similarity.ts +61 -61
  87. package/src/matching/tfidf.ts +74 -74
  88. package/src/matching/tokenizer.ts +41 -41
  89. package/src/mcp/auto-detect.ts +93 -93
  90. package/src/mcp/http-server.ts +140 -140
  91. package/src/mcp/server.ts +73 -73
  92. package/src/parsing/error-parser.ts +28 -28
  93. package/src/parsing/parsers/compiler.ts +93 -93
  94. package/src/parsing/parsers/generic.ts +28 -28
  95. package/src/parsing/parsers/go.ts +97 -97
  96. package/src/parsing/parsers/node.ts +69 -69
  97. package/src/parsing/parsers/python.ts +62 -62
  98. package/src/parsing/parsers/rust.ts +50 -50
  99. package/src/parsing/parsers/shell.ts +42 -42
  100. package/src/parsing/types.ts +47 -47
  101. package/src/research/gap-analyzer.ts +135 -135
  102. package/src/research/insight-generator.ts +123 -123
  103. package/src/research/research-engine.ts +116 -116
  104. package/src/research/synergy-detector.ts +126 -126
  105. package/src/research/template-extractor.ts +130 -130
  106. package/src/research/trend-analyzer.ts +127 -127
  107. package/src/services/code.service.ts +271 -271
  108. package/src/services/error.service.ts +4 -3
  109. package/src/services/git.service.ts +132 -132
  110. package/src/services/notification.service.ts +41 -41
  111. package/src/services/synapse.service.ts +59 -59
  112. package/src/services/terminal.service.ts +81 -81
  113. package/src/synapses/activation.ts +80 -80
  114. package/src/synapses/decay.ts +38 -38
  115. package/src/synapses/hebbian.ts +69 -69
  116. package/src/synapses/pathfinder.ts +81 -81
  117. package/src/synapses/synapse-manager.ts +109 -109
  118. package/src/types/code.types.ts +52 -52
  119. package/src/types/error.types.ts +67 -67
  120. package/src/types/ipc.types.ts +8 -8
  121. package/src/types/mcp.types.ts +53 -53
  122. package/src/types/research.types.ts +28 -28
  123. package/src/types/solution.types.ts +30 -30
  124. package/src/utils/events.ts +45 -45
  125. package/src/utils/hash.ts +5 -5
  126. package/src/utils/logger.ts +48 -48
  127. package/src/utils/paths.ts +19 -19
  128. package/tests/e2e/test_code_intelligence.py +1015 -0
  129. package/tests/e2e/test_error_memory.py +451 -0
  130. package/tests/e2e/test_full_integration.py +534 -0
  131. package/tests/fixtures/code-modules/modules.ts +83 -83
  132. package/tests/fixtures/errors/go.ts +9 -9
  133. package/tests/fixtures/errors/node.ts +24 -24
  134. package/tests/fixtures/errors/python.ts +21 -21
  135. package/tests/fixtures/errors/rust.ts +25 -25
  136. package/tests/fixtures/errors/shell.ts +15 -15
  137. package/tests/fixtures/solutions/solutions.ts +27 -27
  138. package/tests/helpers/setup-db.ts +52 -52
  139. package/tests/integration/code-flow.test.ts +86 -86
  140. package/tests/integration/error-flow.test.ts +83 -83
  141. package/tests/integration/ipc-flow.test.ts +166 -166
  142. package/tests/integration/learning-cycle.test.ts +82 -82
  143. package/tests/integration/synapse-flow.test.ts +117 -117
  144. package/tests/unit/code/analyzer.test.ts +58 -58
  145. package/tests/unit/code/fingerprint.test.ts +51 -51
  146. package/tests/unit/code/scorer.test.ts +55 -55
  147. package/tests/unit/learning/confidence-scorer.test.ts +60 -60
  148. package/tests/unit/learning/decay.test.ts +45 -45
  149. package/tests/unit/learning/pattern-extractor.test.ts +50 -50
  150. package/tests/unit/matching/error-matcher.test.ts +69 -69
  151. package/tests/unit/matching/fingerprint.test.ts +47 -47
  152. package/tests/unit/matching/similarity.test.ts +65 -65
  153. package/tests/unit/matching/tfidf.test.ts +71 -71
  154. package/tests/unit/matching/tokenizer.test.ts +83 -83
  155. package/tests/unit/parsing/parsers.test.ts +113 -113
  156. package/tests/unit/research/gap-analyzer.test.ts +45 -45
  157. package/tests/unit/research/trend-analyzer.test.ts +45 -45
  158. package/tests/unit/synapses/activation.test.ts +80 -80
  159. package/tests/unit/synapses/decay.test.ts +27 -27
  160. package/tests/unit/synapses/hebbian.test.ts +96 -96
  161. package/tests/unit/synapses/pathfinder.test.ts +72 -72
  162. package/tsconfig.json +18 -18
@@ -1,74 +1,74 @@
1
- import type { LearningConfig } from '../types/config.types.js';
2
- import type { RuleRepository } from '../db/repositories/rule.repository.js';
3
- import type { ErrorPattern } from './pattern-extractor.js';
4
- import { getLogger } from '../utils/logger.js';
5
-
6
- export interface GeneratedRule {
7
- pattern: string;
8
- action: string;
9
- description: string;
10
- confidence: number;
11
- sourceErrorIds: number[];
12
- }
13
-
14
- /**
15
- * Generate prevention rules from extracted patterns.
16
- */
17
- export function generateRules(
18
- patterns: ErrorPattern[],
19
- config: LearningConfig,
20
- ): GeneratedRule[] {
21
- return patterns
22
- .filter(p =>
23
- p.occurrences >= config.minOccurrences &&
24
- p.confidence >= config.minConfidence,
25
- )
26
- .map(pattern => ({
27
- pattern: pattern.messageRegex,
28
- action: pattern.confidence >= 0.90
29
- ? `Auto-fix available for ${pattern.errorType}`
30
- : `Suggestion: check ${pattern.errorType} pattern (${pattern.occurrences} occurrences)`,
31
- description: `Auto-generated from ${pattern.occurrences} occurrences of ${pattern.errorType}`,
32
- confidence: pattern.confidence,
33
- sourceErrorIds: pattern.errorIds,
34
- }));
35
- }
36
-
37
- /**
38
- * Persist generated rules to the database.
39
- */
40
- export function persistRules(
41
- rules: GeneratedRule[],
42
- ruleRepo: RuleRepository,
43
- projectId?: number,
44
- ): number {
45
- const logger = getLogger();
46
- let created = 0;
47
-
48
- for (const rule of rules) {
49
- // Check if similar rule already exists
50
- const existing = ruleRepo.findByPattern(rule.pattern);
51
- if (existing.length > 0) {
52
- // Update confidence of existing rule
53
- const best = existing[0]!;
54
- if (rule.confidence > best.confidence) {
55
- ruleRepo.update(best.id, { confidence: rule.confidence });
56
- }
57
- continue;
58
- }
59
-
60
- ruleRepo.create({
61
- pattern: rule.pattern,
62
- action: rule.action,
63
- description: rule.description,
64
- confidence: rule.confidence,
65
- occurrences: 0,
66
- active: 1,
67
- project_id: projectId ?? null,
68
- });
69
- created++;
70
- logger.info(`New rule generated: ${rule.pattern.substring(0, 50)}...`);
71
- }
72
-
73
- return created;
74
- }
1
+ import type { LearningConfig } from '../types/config.types.js';
2
+ import type { RuleRepository } from '../db/repositories/rule.repository.js';
3
+ import type { ErrorPattern } from './pattern-extractor.js';
4
+ import { getLogger } from '../utils/logger.js';
5
+
6
+ export interface GeneratedRule {
7
+ pattern: string;
8
+ action: string;
9
+ description: string;
10
+ confidence: number;
11
+ sourceErrorIds: number[];
12
+ }
13
+
14
+ /**
15
+ * Generate prevention rules from extracted patterns.
16
+ */
17
+ export function generateRules(
18
+ patterns: ErrorPattern[],
19
+ config: LearningConfig,
20
+ ): GeneratedRule[] {
21
+ return patterns
22
+ .filter(p =>
23
+ p.occurrences >= config.minOccurrences &&
24
+ p.confidence >= config.minConfidence,
25
+ )
26
+ .map(pattern => ({
27
+ pattern: pattern.messageRegex,
28
+ action: pattern.confidence >= 0.90
29
+ ? `Auto-fix available for ${pattern.errorType}`
30
+ : `Suggestion: check ${pattern.errorType} pattern (${pattern.occurrences} occurrences)`,
31
+ description: `Auto-generated from ${pattern.occurrences} occurrences of ${pattern.errorType}`,
32
+ confidence: pattern.confidence,
33
+ sourceErrorIds: pattern.errorIds,
34
+ }));
35
+ }
36
+
37
+ /**
38
+ * Persist generated rules to the database.
39
+ */
40
+ export function persistRules(
41
+ rules: GeneratedRule[],
42
+ ruleRepo: RuleRepository,
43
+ projectId?: number,
44
+ ): number {
45
+ const logger = getLogger();
46
+ let created = 0;
47
+
48
+ for (const rule of rules) {
49
+ // Check if similar rule already exists
50
+ const existing = ruleRepo.findByPattern(rule.pattern);
51
+ if (existing.length > 0) {
52
+ // Update confidence of existing rule
53
+ const best = existing[0]!;
54
+ if (rule.confidence > best.confidence) {
55
+ ruleRepo.update(best.id, { confidence: rule.confidence });
56
+ }
57
+ continue;
58
+ }
59
+
60
+ ruleRepo.create({
61
+ pattern: rule.pattern,
62
+ action: rule.action,
63
+ description: rule.description,
64
+ confidence: rule.confidence,
65
+ occurrences: 0,
66
+ active: 1,
67
+ project_id: projectId ?? null,
68
+ });
69
+ created++;
70
+ logger.info(`New rule generated: ${rule.pattern.substring(0, 50)}...`);
71
+ }
72
+
73
+ return created;
74
+ }
@@ -23,12 +23,12 @@ interface MatchSignal {
23
23
 
24
24
  // Base signals (used when vector search is NOT available)
25
25
  const SIGNALS_BASE: MatchSignal[] = [
26
- { name: 'fingerprint', weight: 0.30, compute: fingerprintMatch },
27
- { name: 'message_similarity', weight: 0.20, compute: messageSimilarity },
26
+ { name: 'fingerprint', weight: 0.20, compute: fingerprintMatch },
27
+ { name: 'message_similarity', weight: 0.25, compute: messageSimilarity },
28
28
  { name: 'type_match', weight: 0.15, compute: typeMatch },
29
29
  { name: 'stack_similarity', weight: 0.15, compute: stackSimilarity },
30
- { name: 'file_similarity', weight: 0.10, compute: fileSimilarity },
31
- { name: 'context_similarity', weight: 0.10, compute: contextSimilarity },
30
+ { name: 'file_similarity', weight: 0.12, compute: fileSimilarity },
31
+ { name: 'context_similarity', weight: 0.13, compute: contextSimilarity },
32
32
  ];
33
33
 
34
34
  // Hybrid signals (used when vector search IS available — vector gets 20% weight)
@@ -42,7 +42,7 @@ const SIGNALS_HYBRID: MatchSignal[] = [
42
42
  ];
43
43
 
44
44
  const VECTOR_WEIGHT = 0.20;
45
- const MATCH_THRESHOLD = 0.70;
45
+ const MATCH_THRESHOLD = 0.55;
46
46
  const STRONG_MATCH_THRESHOLD = 0.90;
47
47
 
48
48
  /**
@@ -1,29 +1,34 @@
1
- import path from 'node:path';
2
- import { sha256 } from '../utils/hash.js';
3
- import type { StackFrame } from '../parsing/types.js';
4
-
5
- export function templateMessage(msg: string): string {
6
- return msg
7
- .replace(/[A-Z]:\\[\w\-.\\ ]+\.\w+/g, '<PATH>')
8
- .replace(/\/[\w\-./ ]+\.\w+/g, '<PATH>')
9
- .replace(/:(\d+):(\d+)/g, ':<LINE>:<COL>')
10
- .replace(/line \d+/gi, 'line <LINE>')
11
- .replace(/0x[0-9a-fA-F]+/g, '<ADDR>')
12
- .replace(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi, '<UUID>')
13
- .replace(/https?:\/\/[^\s]+/g, '<URL>')
14
- .replace(/\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/g, '<TIMESTAMP>');
15
- }
16
-
17
- export function generateFingerprint(
18
- errorType: string,
19
- message: string,
20
- frames: StackFrame[],
21
- ): string {
22
- const template = templateMessage(message);
23
- const topFrames = frames
24
- .slice(0, 3)
25
- .map(f => `${f.function_name || '<anon>'}@${path.basename(f.file_path || '<unknown>')}`)
26
- .join('|');
27
- const input = `${errorType}::${template}::${topFrames}`;
28
- return sha256(input);
29
- }
1
+ import path from 'node:path';
2
+ import { sha256 } from '../utils/hash.js';
3
+ import type { StackFrame } from '../parsing/types.js';
4
+
5
+ export function templateMessage(msg: string): string {
6
+ return msg
7
+ .replace(/[A-Z]:\\[\w\-.\\ ]+\.\w+/g, '<PATH>')
8
+ .replace(/\/[\w\-./ ]+\.\w+/g, '<PATH>')
9
+ .replace(/:(\d+):(\d+)/g, ':<LINE>:<COL>')
10
+ .replace(/line \d+/gi, 'line <LINE>')
11
+ .replace(/0x[0-9a-fA-F]+/g, '<ADDR>')
12
+ .replace(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi, '<UUID>')
13
+ .replace(/https?:\/\/[^\s]+/g, '<URL>')
14
+ .replace(/\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/g, '<TIMESTAMP>')
15
+ // Normalize JS/TS property access patterns so "reading 'map'" ≈ "reading 'forEach'"
16
+ .replace(/\(reading ['"][^'"]*['"]\)/g, "(reading '<PROP>')")
17
+ .replace(/\(writing ['"][^'"]*['"]\)/g, "(writing '<PROP>')")
18
+ // Normalize quoted identifiers (e.g., 'someVar', "someFunc")
19
+ .replace(/['"][a-zA-Z_$][\w$]*['"]/g, "'<IDENT>'");
20
+ }
21
+
22
+ export function generateFingerprint(
23
+ errorType: string,
24
+ message: string,
25
+ frames: StackFrame[],
26
+ ): string {
27
+ const template = templateMessage(message);
28
+ const topFrames = frames
29
+ .slice(0, 3)
30
+ .map(f => `${f.function_name || '<anon>'}@${path.basename(f.file_path || '<unknown>')}`)
31
+ .join('|');
32
+ const input = `${errorType}::${template}::${topFrames}`;
33
+ return sha256(input);
34
+ }
@@ -1,61 +1,61 @@
1
- export function levenshteinDistance(a: string, b: string): number {
2
- if (a === b) return 1.0;
3
- if (a.length === 0 || b.length === 0) return 0.0;
4
-
5
- const dp: number[][] = Array(b.length + 1)
6
- .fill(0)
7
- .map(() => Array(a.length + 1).fill(0) as number[]);
8
-
9
- for (let i = 0; i <= a.length; i++) dp[0]![i] = i;
10
- for (let j = 0; j <= b.length; j++) dp[j]![0] = j;
11
-
12
- for (let i = 1; i <= b.length; i++) {
13
- for (let j = 1; j <= a.length; j++) {
14
- const cost = a[j - 1] === b[i - 1] ? 0 : 1;
15
- dp[i]![j] = Math.min(
16
- dp[i - 1]![j]! + 1,
17
- dp[i]![j - 1]! + 1,
18
- dp[i - 1]![j - 1]! + cost,
19
- );
20
- }
21
- }
22
-
23
- return 1 - dp[b.length]![a.length]! / Math.max(a.length, b.length);
24
- }
25
-
26
- export function cosineSimilarity(tokensA: string[], tokensB: string[]): number {
27
- if (tokensA.length === 0 || tokensB.length === 0) return 0.0;
28
-
29
- const vocab = new Set([...tokensA, ...tokensB]);
30
- const vecA = new Map<string, number>();
31
- const vecB = new Map<string, number>();
32
-
33
- for (const t of tokensA) vecA.set(t, (vecA.get(t) ?? 0) + 1);
34
- for (const t of tokensB) vecB.set(t, (vecB.get(t) ?? 0) + 1);
35
-
36
- let dot = 0;
37
- let magA = 0;
38
- let magB = 0;
39
-
40
- for (const word of vocab) {
41
- const a = vecA.get(word) ?? 0;
42
- const b = vecB.get(word) ?? 0;
43
- dot += a * b;
44
- magA += a * a;
45
- magB += b * b;
46
- }
47
-
48
- const denom = Math.sqrt(magA) * Math.sqrt(magB);
49
- return denom === 0 ? 0 : dot / denom;
50
- }
51
-
52
- export function jaccardSimilarity(tokensA: string[], tokensB: string[]): number {
53
- if (tokensA.length === 0 && tokensB.length === 0) return 0.0;
54
-
55
- const setA = new Set(tokensA);
56
- const setB = new Set(tokensB);
57
- const intersection = new Set([...setA].filter(x => setB.has(x)));
58
- const union = new Set([...setA, ...setB]);
59
-
60
- return union.size === 0 ? 0 : intersection.size / union.size;
61
- }
1
+ export function levenshteinDistance(a: string, b: string): number {
2
+ if (a === b) return 1.0;
3
+ if (a.length === 0 || b.length === 0) return 0.0;
4
+
5
+ const dp: number[][] = Array(b.length + 1)
6
+ .fill(0)
7
+ .map(() => Array(a.length + 1).fill(0) as number[]);
8
+
9
+ for (let i = 0; i <= a.length; i++) dp[0]![i] = i;
10
+ for (let j = 0; j <= b.length; j++) dp[j]![0] = j;
11
+
12
+ for (let i = 1; i <= b.length; i++) {
13
+ for (let j = 1; j <= a.length; j++) {
14
+ const cost = a[j - 1] === b[i - 1] ? 0 : 1;
15
+ dp[i]![j] = Math.min(
16
+ dp[i - 1]![j]! + 1,
17
+ dp[i]![j - 1]! + 1,
18
+ dp[i - 1]![j - 1]! + cost,
19
+ );
20
+ }
21
+ }
22
+
23
+ return 1 - dp[b.length]![a.length]! / Math.max(a.length, b.length);
24
+ }
25
+
26
+ export function cosineSimilarity(tokensA: string[], tokensB: string[]): number {
27
+ if (tokensA.length === 0 || tokensB.length === 0) return 0.0;
28
+
29
+ const vocab = new Set([...tokensA, ...tokensB]);
30
+ const vecA = new Map<string, number>();
31
+ const vecB = new Map<string, number>();
32
+
33
+ for (const t of tokensA) vecA.set(t, (vecA.get(t) ?? 0) + 1);
34
+ for (const t of tokensB) vecB.set(t, (vecB.get(t) ?? 0) + 1);
35
+
36
+ let dot = 0;
37
+ let magA = 0;
38
+ let magB = 0;
39
+
40
+ for (const word of vocab) {
41
+ const a = vecA.get(word) ?? 0;
42
+ const b = vecB.get(word) ?? 0;
43
+ dot += a * b;
44
+ magA += a * a;
45
+ magB += b * b;
46
+ }
47
+
48
+ const denom = Math.sqrt(magA) * Math.sqrt(magB);
49
+ return denom === 0 ? 0 : dot / denom;
50
+ }
51
+
52
+ export function jaccardSimilarity(tokensA: string[], tokensB: string[]): number {
53
+ if (tokensA.length === 0 && tokensB.length === 0) return 0.0;
54
+
55
+ const setA = new Set(tokensA);
56
+ const setB = new Set(tokensB);
57
+ const intersection = new Set([...setA].filter(x => setB.has(x)));
58
+ const union = new Set([...setA, ...setB]);
59
+
60
+ return union.size === 0 ? 0 : intersection.size / union.size;
61
+ }
@@ -1,74 +1,74 @@
1
- export class TfIdfIndex {
2
- private documents = new Map<number, string[]>();
3
- private df = new Map<string, number>();
4
- private idf = new Map<string, number>();
5
- private documentCount = 0;
6
-
7
- addDocument(id: number, tokens: string[]): void {
8
- if (this.documents.has(id)) {
9
- this.removeDocument(id);
10
- }
11
- const unique = new Set(tokens);
12
- for (const token of unique) {
13
- this.df.set(token, (this.df.get(token) ?? 0) + 1);
14
- }
15
- this.documents.set(id, tokens);
16
- this.documentCount++;
17
- this.recomputeIdfForTerms(unique);
18
- }
19
-
20
- removeDocument(id: number): void {
21
- const tokens = this.documents.get(id);
22
- if (!tokens) return;
23
-
24
- const unique = new Set(tokens);
25
- for (const token of unique) {
26
- const count = this.df.get(token) ?? 0;
27
- if (count <= 1) {
28
- this.df.delete(token);
29
- this.idf.delete(token);
30
- } else {
31
- this.df.set(token, count - 1);
32
- }
33
- }
34
- this.documents.delete(id);
35
- this.documentCount--;
36
- }
37
-
38
- query(tokens: string[], topK: number = 10): Array<{ id: number; score: number }> {
39
- const scores = new Map<number, number>();
40
-
41
- for (const token of tokens) {
42
- const idfVal = this.idf.get(token) ?? 0;
43
- if (idfVal === 0) continue;
44
-
45
- for (const [docId, docTokens] of this.documents) {
46
- const tf = docTokens.filter(t => t === token).length / docTokens.length;
47
- const score = (scores.get(docId) ?? 0) + tf * idfVal;
48
- scores.set(docId, score);
49
- }
50
- }
51
-
52
- return Array.from(scores.entries())
53
- .map(([id, score]) => ({ id, score }))
54
- .sort((a, b) => b.score - a.score)
55
- .slice(0, topK);
56
- }
57
-
58
- getDocumentCount(): number {
59
- return this.documentCount;
60
- }
61
-
62
- getIdf(): ReadonlyMap<string, number> {
63
- return this.idf;
64
- }
65
-
66
- private recomputeIdfForTerms(terms: Set<string>): void {
67
- for (const term of terms) {
68
- const dfVal = this.df.get(term) ?? 0;
69
- if (dfVal > 0 && this.documentCount > 0) {
70
- this.idf.set(term, Math.log(this.documentCount / dfVal));
71
- }
72
- }
73
- }
74
- }
1
+ export class TfIdfIndex {
2
+ private documents = new Map<number, string[]>();
3
+ private df = new Map<string, number>();
4
+ private idf = new Map<string, number>();
5
+ private documentCount = 0;
6
+
7
+ addDocument(id: number, tokens: string[]): void {
8
+ if (this.documents.has(id)) {
9
+ this.removeDocument(id);
10
+ }
11
+ const unique = new Set(tokens);
12
+ for (const token of unique) {
13
+ this.df.set(token, (this.df.get(token) ?? 0) + 1);
14
+ }
15
+ this.documents.set(id, tokens);
16
+ this.documentCount++;
17
+ this.recomputeIdfForTerms(unique);
18
+ }
19
+
20
+ removeDocument(id: number): void {
21
+ const tokens = this.documents.get(id);
22
+ if (!tokens) return;
23
+
24
+ const unique = new Set(tokens);
25
+ for (const token of unique) {
26
+ const count = this.df.get(token) ?? 0;
27
+ if (count <= 1) {
28
+ this.df.delete(token);
29
+ this.idf.delete(token);
30
+ } else {
31
+ this.df.set(token, count - 1);
32
+ }
33
+ }
34
+ this.documents.delete(id);
35
+ this.documentCount--;
36
+ }
37
+
38
+ query(tokens: string[], topK: number = 10): Array<{ id: number; score: number }> {
39
+ const scores = new Map<number, number>();
40
+
41
+ for (const token of tokens) {
42
+ const idfVal = this.idf.get(token) ?? 0;
43
+ if (idfVal === 0) continue;
44
+
45
+ for (const [docId, docTokens] of this.documents) {
46
+ const tf = docTokens.filter(t => t === token).length / docTokens.length;
47
+ const score = (scores.get(docId) ?? 0) + tf * idfVal;
48
+ scores.set(docId, score);
49
+ }
50
+ }
51
+
52
+ return Array.from(scores.entries())
53
+ .map(([id, score]) => ({ id, score }))
54
+ .sort((a, b) => b.score - a.score)
55
+ .slice(0, topK);
56
+ }
57
+
58
+ getDocumentCount(): number {
59
+ return this.documentCount;
60
+ }
61
+
62
+ getIdf(): ReadonlyMap<string, number> {
63
+ return this.idf;
64
+ }
65
+
66
+ private recomputeIdfForTerms(terms: Set<string>): void {
67
+ for (const term of terms) {
68
+ const dfVal = this.df.get(term) ?? 0;
69
+ if (dfVal > 0 && this.documentCount > 0) {
70
+ this.idf.set(term, Math.log(this.documentCount / dfVal));
71
+ }
72
+ }
73
+ }
74
+ }
@@ -1,41 +1,41 @@
1
- const STOPWORDS = new Set([
2
- 'the', 'is', 'are', 'a', 'an', 'and', 'or', 'not', 'in', 'at', 'by', 'for',
3
- 'from', 'of', 'on', 'to', 'with', 'as', 'error', 'exception', 'throw', 'catch',
4
- 'was', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
5
- 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'it', 'its',
6
- 'this', 'that', 'these', 'those', 'i', 'we', 'you', 'he', 'she', 'they',
7
- ]);
8
-
9
- export function splitCamelCase(text: string): string[] {
10
- return text
11
- .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
12
- .replace(/([a-z\d])([A-Z])/g, '$1 $2')
13
- .split(/\s+/)
14
- .filter(t => t.length > 0);
15
- }
16
-
17
- export function splitSnakeCase(text: string): string[] {
18
- return text.split(/[_\-]+/).filter(t => t.length > 0);
19
- }
20
-
21
- export function removeStopwords(tokens: string[]): string[] {
22
- return tokens.filter(t => !STOPWORDS.has(t.toLowerCase()));
23
- }
24
-
25
- export function tokenize(text: string): string[] {
26
- const words = text
27
- .replace(/[^\w\s]/g, ' ')
28
- .split(/\s+/)
29
- .filter(t => t.length > 0);
30
-
31
- const tokens: string[] = [];
32
- for (const word of words) {
33
- tokens.push(...splitCamelCase(word));
34
- if (word.includes('_') || word.includes('-')) {
35
- tokens.push(...splitSnakeCase(word));
36
- }
37
- }
38
-
39
- const cleaned = removeStopwords(tokens);
40
- return [...new Set(cleaned.map(t => t.toLowerCase()))];
41
- }
1
+ const STOPWORDS = new Set([
2
+ 'the', 'is', 'are', 'a', 'an', 'and', 'or', 'not', 'in', 'at', 'by', 'for',
3
+ 'from', 'of', 'on', 'to', 'with', 'as', 'error', 'exception', 'throw', 'catch',
4
+ 'was', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
5
+ 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'it', 'its',
6
+ 'this', 'that', 'these', 'those', 'i', 'we', 'you', 'he', 'she', 'they',
7
+ ]);
8
+
9
+ export function splitCamelCase(text: string): string[] {
10
+ return text
11
+ .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
12
+ .replace(/([a-z\d])([A-Z])/g, '$1 $2')
13
+ .split(/\s+/)
14
+ .filter(t => t.length > 0);
15
+ }
16
+
17
+ export function splitSnakeCase(text: string): string[] {
18
+ return text.split(/[_\-]+/).filter(t => t.length > 0);
19
+ }
20
+
21
+ export function removeStopwords(tokens: string[]): string[] {
22
+ return tokens.filter(t => !STOPWORDS.has(t.toLowerCase()));
23
+ }
24
+
25
+ export function tokenize(text: string): string[] {
26
+ const words = text
27
+ .replace(/[^\w\s]/g, ' ')
28
+ .split(/\s+/)
29
+ .filter(t => t.length > 0);
30
+
31
+ const tokens: string[] = [];
32
+ for (const word of words) {
33
+ tokens.push(...splitCamelCase(word));
34
+ if (word.includes('_') || word.includes('-')) {
35
+ tokens.push(...splitSnakeCase(word));
36
+ }
37
+ }
38
+
39
+ const cleaned = removeStopwords(tokens);
40
+ return [...new Set(cleaned.map(t => t.toLowerCase()))];
41
+ }