npm - @timmeck/brain - Versions diffs - 1.8.1 → 1.8.2 - Mend

@timmeck/brain 1.8.1 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

package/BRAIN_PLAN.md +3324 -3324
package/LICENSE +21 -21
package/dist/cli/commands/dashboard.js +595 -595
package/dist/dashboard/server.js +25 -25
package/dist/db/migrations/001_core_schema.js +115 -115
package/dist/db/migrations/002_learning_schema.js +33 -33
package/dist/db/migrations/003_code_schema.js +48 -48
package/dist/db/migrations/004_synapses_schema.js +52 -52
package/dist/db/migrations/005_fts_indexes.js +73 -73
package/dist/db/migrations/007_feedback.js +8 -8
package/dist/db/migrations/008_git_integration.js +33 -33
package/dist/db/migrations/009_embeddings.js +3 -3
package/dist/db/repositories/antipattern.repository.js +3 -3
package/dist/db/repositories/code-module.repository.js +32 -32
package/dist/db/repositories/notification.repository.js +3 -3
package/dist/db/repositories/project.repository.js +21 -21
package/dist/db/repositories/rule.repository.js +24 -24
package/dist/db/repositories/solution.repository.js +50 -50
package/dist/db/repositories/synapse.repository.js +18 -18
package/dist/db/repositories/terminal.repository.js +24 -24
package/dist/ipc/server.d.ts +8 -0
package/dist/ipc/server.js +67 -1
package/dist/ipc/server.js.map +1 -1
package/dist/matching/error-matcher.js +5 -5
package/dist/matching/fingerprint.js +6 -1
package/dist/matching/fingerprint.js.map +1 -1
package/dist/services/error.service.js +4 -3
package/dist/services/error.service.js.map +1 -1
package/dist/services/git.service.js +14 -14
package/package.json +49 -49
package/src/api/server.ts +395 -395
package/src/brain.ts +266 -266
package/src/cli/colors.ts +116 -116
package/src/cli/commands/config.ts +169 -169
package/src/cli/commands/dashboard.ts +755 -755
package/src/cli/commands/doctor.ts +118 -118
package/src/cli/commands/explain.ts +83 -83
package/src/cli/commands/export.ts +31 -31
package/src/cli/commands/import.ts +199 -199
package/src/cli/commands/insights.ts +65 -65
package/src/cli/commands/learn.ts +24 -24
package/src/cli/commands/modules.ts +53 -53
package/src/cli/commands/network.ts +67 -67
package/src/cli/commands/projects.ts +42 -42
package/src/cli/commands/query.ts +120 -120
package/src/cli/commands/start.ts +62 -62
package/src/cli/commands/status.ts +75 -75
package/src/cli/commands/stop.ts +34 -34
package/src/cli/ipc-helper.ts +22 -22
package/src/cli/update-check.ts +63 -63
package/src/code/fingerprint.ts +87 -87
package/src/code/parsers/generic.ts +29 -29
package/src/code/parsers/python.ts +54 -54
package/src/code/parsers/typescript.ts +65 -65
package/src/code/registry.ts +60 -60
package/src/dashboard/server.ts +142 -142
package/src/db/connection.ts +22 -22
package/src/db/migrations/001_core_schema.ts +120 -120
package/src/db/migrations/002_learning_schema.ts +38 -38
package/src/db/migrations/003_code_schema.ts +53 -53
package/src/db/migrations/004_synapses_schema.ts +57 -57
package/src/db/migrations/005_fts_indexes.ts +78 -78
package/src/db/migrations/006_synapses_phase3.ts +17 -17
package/src/db/migrations/007_feedback.ts +13 -13
package/src/db/migrations/008_git_integration.ts +38 -38
package/src/db/migrations/009_embeddings.ts +8 -8
package/src/db/repositories/antipattern.repository.ts +66 -66
package/src/db/repositories/code-module.repository.ts +142 -142
package/src/db/repositories/notification.repository.ts +66 -66
package/src/db/repositories/project.repository.ts +93 -93
package/src/db/repositories/rule.repository.ts +108 -108
package/src/db/repositories/solution.repository.ts +154 -154
package/src/db/repositories/synapse.repository.ts +153 -153
package/src/db/repositories/terminal.repository.ts +101 -101
package/src/embeddings/engine.ts +238 -238
package/src/index.ts +63 -63
package/src/ipc/client.ts +118 -118
package/src/ipc/protocol.ts +35 -35
package/src/ipc/router.ts +133 -133
package/src/ipc/server.ts +176 -110
package/src/learning/decay.ts +46 -46
package/src/learning/pattern-extractor.ts +90 -90
package/src/learning/rule-generator.ts +74 -74
package/src/matching/error-matcher.ts +5 -5
package/src/matching/fingerprint.ts +34 -29
package/src/matching/similarity.ts +61 -61
package/src/matching/tfidf.ts +74 -74
package/src/matching/tokenizer.ts +41 -41
package/src/mcp/auto-detect.ts +93 -93
package/src/mcp/http-server.ts +140 -140
package/src/mcp/server.ts +73 -73
package/src/parsing/error-parser.ts +28 -28
package/src/parsing/parsers/compiler.ts +93 -93
package/src/parsing/parsers/generic.ts +28 -28
package/src/parsing/parsers/go.ts +97 -97
package/src/parsing/parsers/node.ts +69 -69
package/src/parsing/parsers/python.ts +62 -62
package/src/parsing/parsers/rust.ts +50 -50
package/src/parsing/parsers/shell.ts +42 -42
package/src/parsing/types.ts +47 -47
package/src/research/gap-analyzer.ts +135 -135
package/src/research/insight-generator.ts +123 -123
package/src/research/research-engine.ts +116 -116
package/src/research/synergy-detector.ts +126 -126
package/src/research/template-extractor.ts +130 -130
package/src/research/trend-analyzer.ts +127 -127
package/src/services/code.service.ts +271 -271
package/src/services/error.service.ts +4 -3
package/src/services/git.service.ts +132 -132
package/src/services/notification.service.ts +41 -41
package/src/services/synapse.service.ts +59 -59
package/src/services/terminal.service.ts +81 -81
package/src/synapses/activation.ts +80 -80
package/src/synapses/decay.ts +38 -38
package/src/synapses/hebbian.ts +69 -69
package/src/synapses/pathfinder.ts +81 -81
package/src/synapses/synapse-manager.ts +109 -109
package/src/types/code.types.ts +52 -52
package/src/types/error.types.ts +67 -67
package/src/types/ipc.types.ts +8 -8
package/src/types/mcp.types.ts +53 -53
package/src/types/research.types.ts +28 -28
package/src/types/solution.types.ts +30 -30
package/src/utils/events.ts +45 -45
package/src/utils/hash.ts +5 -5
package/src/utils/logger.ts +48 -48
package/src/utils/paths.ts +19 -19
package/tests/e2e/test_code_intelligence.py +1015 -0
package/tests/e2e/test_error_memory.py +451 -0
package/tests/e2e/test_full_integration.py +534 -0
package/tests/fixtures/code-modules/modules.ts +83 -83
package/tests/fixtures/errors/go.ts +9 -9
package/tests/fixtures/errors/node.ts +24 -24
package/tests/fixtures/errors/python.ts +21 -21
package/tests/fixtures/errors/rust.ts +25 -25
package/tests/fixtures/errors/shell.ts +15 -15
package/tests/fixtures/solutions/solutions.ts +27 -27
package/tests/helpers/setup-db.ts +52 -52
package/tests/integration/code-flow.test.ts +86 -86
package/tests/integration/error-flow.test.ts +83 -83
package/tests/integration/ipc-flow.test.ts +166 -166
package/tests/integration/learning-cycle.test.ts +82 -82
package/tests/integration/synapse-flow.test.ts +117 -117
package/tests/unit/code/analyzer.test.ts +58 -58
package/tests/unit/code/fingerprint.test.ts +51 -51
package/tests/unit/code/scorer.test.ts +55 -55
package/tests/unit/learning/confidence-scorer.test.ts +60 -60
package/tests/unit/learning/decay.test.ts +45 -45
package/tests/unit/learning/pattern-extractor.test.ts +50 -50
package/tests/unit/matching/error-matcher.test.ts +69 -69
package/tests/unit/matching/fingerprint.test.ts +47 -47
package/tests/unit/matching/similarity.test.ts +65 -65
package/tests/unit/matching/tfidf.test.ts +71 -71
package/tests/unit/matching/tokenizer.test.ts +83 -83
package/tests/unit/parsing/parsers.test.ts +113 -113
package/tests/unit/research/gap-analyzer.test.ts +45 -45
package/tests/unit/research/trend-analyzer.test.ts +45 -45
package/tests/unit/synapses/activation.test.ts +80 -80
package/tests/unit/synapses/decay.test.ts +27 -27
package/tests/unit/synapses/hebbian.test.ts +96 -96
package/tests/unit/synapses/pathfinder.test.ts +72 -72
package/tsconfig.json +18 -18

package/src/learning/rule-generator.ts CHANGED Viewed

@@ -1,74 +1,74 @@
-import type { LearningConfig } from '../types/config.types.js';
-import type { RuleRepository } from '../db/repositories/rule.repository.js';
-import type { ErrorPattern } from './pattern-extractor.js';
-import { getLogger } from '../utils/logger.js';
-export interface GeneratedRule {
-  pattern: string;
-  action: string;
-  description: string;
-  confidence: number;
-  sourceErrorIds: number[];
-}
-/**
- * Generate prevention rules from extracted patterns.
- */
-export function generateRules(
-  patterns: ErrorPattern[],
-  config: LearningConfig,
-): GeneratedRule[] {
-  return patterns
-    .filter(p =>
-      p.occurrences >= config.minOccurrences &&
-      p.confidence >= config.minConfidence,
-    )
-    .map(pattern => ({
-      pattern: pattern.messageRegex,
-      action: pattern.confidence >= 0.90
-        ? `Auto-fix available for ${pattern.errorType}`
-        : `Suggestion: check ${pattern.errorType} pattern (${pattern.occurrences} occurrences)`,
-      description: `Auto-generated from ${pattern.occurrences} occurrences of ${pattern.errorType}`,
-      confidence: pattern.confidence,
-      sourceErrorIds: pattern.errorIds,
-    }));
-}
-/**
- * Persist generated rules to the database.
- */
-export function persistRules(
-  rules: GeneratedRule[],
-  ruleRepo: RuleRepository,
-  projectId?: number,
-): number {
-  const logger = getLogger();
-  let created = 0;
-  for (const rule of rules) {
-    // Check if similar rule already exists
-    const existing = ruleRepo.findByPattern(rule.pattern);
-    if (existing.length > 0) {
-      // Update confidence of existing rule
-      const best = existing[0]!;
-      if (rule.confidence > best.confidence) {
-        ruleRepo.update(best.id, { confidence: rule.confidence });
-      }
-      continue;
-    }
-    ruleRepo.create({
-      pattern: rule.pattern,
-      action: rule.action,
-      description: rule.description,
-      confidence: rule.confidence,
-      occurrences: 0,
-      active: 1,
-      project_id: projectId ?? null,
-    });
-    created++;
-    logger.info(`New rule generated: ${rule.pattern.substring(0, 50)}...`);
-  }
-  return created;
-}
+import type { LearningConfig } from '../types/config.types.js';
+import type { RuleRepository } from '../db/repositories/rule.repository.js';
+import type { ErrorPattern } from './pattern-extractor.js';
+import { getLogger } from '../utils/logger.js';
+export interface GeneratedRule {
+  pattern: string;
+  action: string;
+  description: string;
+  confidence: number;
+  sourceErrorIds: number[];
+}
+/**
+ * Generate prevention rules from extracted patterns.
+ */
+export function generateRules(
+  patterns: ErrorPattern[],
+  config: LearningConfig,
+): GeneratedRule[] {
+  return patterns
+    .filter(p =>
+      p.occurrences >= config.minOccurrences &&
+      p.confidence >= config.minConfidence,
+    )
+    .map(pattern => ({
+      pattern: pattern.messageRegex,
+      action: pattern.confidence >= 0.90
+        ? `Auto-fix available for ${pattern.errorType}`
+        : `Suggestion: check ${pattern.errorType} pattern (${pattern.occurrences} occurrences)`,
+      description: `Auto-generated from ${pattern.occurrences} occurrences of ${pattern.errorType}`,
+      confidence: pattern.confidence,
+      sourceErrorIds: pattern.errorIds,
+    }));
+}
+/**
+ * Persist generated rules to the database.
+ */
+export function persistRules(
+  rules: GeneratedRule[],
+  ruleRepo: RuleRepository,
+  projectId?: number,
+): number {
+  const logger = getLogger();
+  let created = 0;
+  for (const rule of rules) {
+    // Check if similar rule already exists
+    const existing = ruleRepo.findByPattern(rule.pattern);
+    if (existing.length > 0) {
+      // Update confidence of existing rule
+      const best = existing[0]!;
+      if (rule.confidence > best.confidence) {
+        ruleRepo.update(best.id, { confidence: rule.confidence });
+      }
+      continue;
+    }
+    ruleRepo.create({
+      pattern: rule.pattern,
+      action: rule.action,
+      description: rule.description,
+      confidence: rule.confidence,
+      occurrences: 0,
+      active: 1,
+      project_id: projectId ?? null,
+    });
+    created++;
+    logger.info(`New rule generated: ${rule.pattern.substring(0, 50)}...`);
+  }
+  return created;
+}

package/src/matching/error-matcher.ts CHANGED Viewed

@@ -23,12 +23,12 @@ interface MatchSignal {
 // Base signals (used when vector search is NOT available)
 const SIGNALS_BASE: MatchSignal[] = [
-  { name: 'fingerprint', weight: 0.30, compute: fingerprintMatch },
-  { name: 'message_similarity', weight: 0.20, compute: messageSimilarity },
+  { name: 'fingerprint', weight: 0.20, compute: fingerprintMatch },
+  { name: 'message_similarity', weight: 0.25, compute: messageSimilarity },
   { name: 'type_match', weight: 0.15, compute: typeMatch },
   { name: 'stack_similarity', weight: 0.15, compute: stackSimilarity },
-  { name: 'file_similarity', weight: 0.10, compute: fileSimilarity },
-  { name: 'context_similarity', weight: 0.10, compute: contextSimilarity },
+  { name: 'file_similarity', weight: 0.12, compute: fileSimilarity },
+  { name: 'context_similarity', weight: 0.13, compute: contextSimilarity },
 ];
 // Hybrid signals (used when vector search IS available — vector gets 20% weight)
@@ -42,7 +42,7 @@ const SIGNALS_HYBRID: MatchSignal[] = [
 ];
 const VECTOR_WEIGHT = 0.20;
-const MATCH_THRESHOLD = 0.70;
+const MATCH_THRESHOLD = 0.55;
 const STRONG_MATCH_THRESHOLD = 0.90;
 /**

package/src/matching/fingerprint.ts CHANGED Viewed

@@ -1,29 +1,34 @@
-import path from 'node:path';
-import { sha256 } from '../utils/hash.js';
-import type { StackFrame } from '../parsing/types.js';
-export function templateMessage(msg: string): string {
-  return msg
-    .replace(/[A-Z]:\\[\w\-.\\ ]+\.\w+/g, '<PATH>')
-    .replace(/\/[\w\-./ ]+\.\w+/g, '<PATH>')
-    .replace(/:(\d+):(\d+)/g, ':<LINE>:<COL>')
-    .replace(/line \d+/gi, 'line <LINE>')
-    .replace(/0x[0-9a-fA-F]+/g, '<ADDR>')
-    .replace(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi, '<UUID>')
-    .replace(/https?:\/\/[^\s]+/g, '<URL>')
-    .replace(/\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/g, '<TIMESTAMP>');
-}
-export function generateFingerprint(
-  errorType: string,
-  message: string,
-  frames: StackFrame[],
-): string {
-  const template = templateMessage(message);
-  const topFrames = frames
-    .slice(0, 3)
-    .map(f => `${f.function_name || '<anon>'}@${path.basename(f.file_path || '<unknown>')}`)
-    .join('|');
-  const input = `${errorType}::${template}::${topFrames}`;
-  return sha256(input);
-}
+import path from 'node:path';
+import { sha256 } from '../utils/hash.js';
+import type { StackFrame } from '../parsing/types.js';
+export function templateMessage(msg: string): string {
+  return msg
+    .replace(/[A-Z]:\\[\w\-.\\ ]+\.\w+/g, '<PATH>')
+    .replace(/\/[\w\-./ ]+\.\w+/g, '<PATH>')
+    .replace(/:(\d+):(\d+)/g, ':<LINE>:<COL>')
+    .replace(/line \d+/gi, 'line <LINE>')
+    .replace(/0x[0-9a-fA-F]+/g, '<ADDR>')
+    .replace(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi, '<UUID>')
+    .replace(/https?:\/\/[^\s]+/g, '<URL>')
+    .replace(/\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/g, '<TIMESTAMP>')
+    // Normalize JS/TS property access patterns so "reading 'map'" ≈ "reading 'forEach'"
+    .replace(/\(reading ['"][^'"]*['"]\)/g, "(reading '<PROP>')")
+    .replace(/\(writing ['"][^'"]*['"]\)/g, "(writing '<PROP>')")
+    // Normalize quoted identifiers (e.g., 'someVar', "someFunc")
+    .replace(/['"][a-zA-Z_$][\w$]*['"]/g, "'<IDENT>'");
+}
+export function generateFingerprint(
+  errorType: string,
+  message: string,
+  frames: StackFrame[],
+): string {
+  const template = templateMessage(message);
+  const topFrames = frames
+    .slice(0, 3)
+    .map(f => `${f.function_name || '<anon>'}@${path.basename(f.file_path || '<unknown>')}`)
+    .join('|');
+  const input = `${errorType}::${template}::${topFrames}`;
+  return sha256(input);
+}

package/src/matching/similarity.ts CHANGED Viewed

@@ -1,61 +1,61 @@
-export function levenshteinDistance(a: string, b: string): number {
-  if (a === b) return 1.0;
-  if (a.length === 0 || b.length === 0) return 0.0;
-  const dp: number[][] = Array(b.length + 1)
-    .fill(0)
-    .map(() => Array(a.length + 1).fill(0) as number[]);
-  for (let i = 0; i <= a.length; i++) dp[0]![i] = i;
-  for (let j = 0; j <= b.length; j++) dp[j]![0] = j;
-  for (let i = 1; i <= b.length; i++) {
-    for (let j = 1; j <= a.length; j++) {
-      const cost = a[j - 1] === b[i - 1] ? 0 : 1;
-      dp[i]![j] = Math.min(
-        dp[i - 1]![j]! + 1,
-        dp[i]![j - 1]! + 1,
-        dp[i - 1]![j - 1]! + cost,
-      );
-    }
-  }
-  return 1 - dp[b.length]![a.length]! / Math.max(a.length, b.length);
-}
-export function cosineSimilarity(tokensA: string[], tokensB: string[]): number {
-  if (tokensA.length === 0 || tokensB.length === 0) return 0.0;
-  const vocab = new Set([...tokensA, ...tokensB]);
-  const vecA = new Map<string, number>();
-  const vecB = new Map<string, number>();
-  for (const t of tokensA) vecA.set(t, (vecA.get(t) ?? 0) + 1);
-  for (const t of tokensB) vecB.set(t, (vecB.get(t) ?? 0) + 1);
-  let dot = 0;
-  let magA = 0;
-  let magB = 0;
-  for (const word of vocab) {
-    const a = vecA.get(word) ?? 0;
-    const b = vecB.get(word) ?? 0;
-    dot += a * b;
-    magA += a * a;
-    magB += b * b;
-  }
-  const denom = Math.sqrt(magA) * Math.sqrt(magB);
-  return denom === 0 ? 0 : dot / denom;
-}
-export function jaccardSimilarity(tokensA: string[], tokensB: string[]): number {
-  if (tokensA.length === 0 && tokensB.length === 0) return 0.0;
-  const setA = new Set(tokensA);
-  const setB = new Set(tokensB);
-  const intersection = new Set([...setA].filter(x => setB.has(x)));
-  const union = new Set([...setA, ...setB]);
-  return union.size === 0 ? 0 : intersection.size / union.size;
-}
+export function levenshteinDistance(a: string, b: string): number {
+  if (a === b) return 1.0;
+  if (a.length === 0 || b.length === 0) return 0.0;
+  const dp: number[][] = Array(b.length + 1)
+    .fill(0)
+    .map(() => Array(a.length + 1).fill(0) as number[]);
+  for (let i = 0; i <= a.length; i++) dp[0]![i] = i;
+  for (let j = 0; j <= b.length; j++) dp[j]![0] = j;
+  for (let i = 1; i <= b.length; i++) {
+    for (let j = 1; j <= a.length; j++) {
+      const cost = a[j - 1] === b[i - 1] ? 0 : 1;
+      dp[i]![j] = Math.min(
+        dp[i - 1]![j]! + 1,
+        dp[i]![j - 1]! + 1,
+        dp[i - 1]![j - 1]! + cost,
+      );
+    }
+  }
+  return 1 - dp[b.length]![a.length]! / Math.max(a.length, b.length);
+}
+export function cosineSimilarity(tokensA: string[], tokensB: string[]): number {
+  if (tokensA.length === 0 || tokensB.length === 0) return 0.0;
+  const vocab = new Set([...tokensA, ...tokensB]);
+  const vecA = new Map<string, number>();
+  const vecB = new Map<string, number>();
+  for (const t of tokensA) vecA.set(t, (vecA.get(t) ?? 0) + 1);
+  for (const t of tokensB) vecB.set(t, (vecB.get(t) ?? 0) + 1);
+  let dot = 0;
+  let magA = 0;
+  let magB = 0;
+  for (const word of vocab) {
+    const a = vecA.get(word) ?? 0;
+    const b = vecB.get(word) ?? 0;
+    dot += a * b;
+    magA += a * a;
+    magB += b * b;
+  }
+  const denom = Math.sqrt(magA) * Math.sqrt(magB);
+  return denom === 0 ? 0 : dot / denom;
+}
+export function jaccardSimilarity(tokensA: string[], tokensB: string[]): number {
+  if (tokensA.length === 0 && tokensB.length === 0) return 0.0;
+  const setA = new Set(tokensA);
+  const setB = new Set(tokensB);
+  const intersection = new Set([...setA].filter(x => setB.has(x)));
+  const union = new Set([...setA, ...setB]);
+  return union.size === 0 ? 0 : intersection.size / union.size;
+}

package/src/matching/tfidf.ts CHANGED Viewed

@@ -1,74 +1,74 @@
-export class TfIdfIndex {
-  private documents = new Map<number, string[]>();
-  private df = new Map<string, number>();
-  private idf = new Map<string, number>();
-  private documentCount = 0;
-  addDocument(id: number, tokens: string[]): void {
-    if (this.documents.has(id)) {
-      this.removeDocument(id);
-    }
-    const unique = new Set(tokens);
-    for (const token of unique) {
-      this.df.set(token, (this.df.get(token) ?? 0) + 1);
-    }
-    this.documents.set(id, tokens);
-    this.documentCount++;
-    this.recomputeIdfForTerms(unique);
-  }
-  removeDocument(id: number): void {
-    const tokens = this.documents.get(id);
-    if (!tokens) return;
-    const unique = new Set(tokens);
-    for (const token of unique) {
-      const count = this.df.get(token) ?? 0;
-      if (count <= 1) {
-        this.df.delete(token);
-        this.idf.delete(token);
-      } else {
-        this.df.set(token, count - 1);
-      }
-    }
-    this.documents.delete(id);
-    this.documentCount--;
-  }
-  query(tokens: string[], topK: number = 10): Array<{ id: number; score: number }> {
-    const scores = new Map<number, number>();
-    for (const token of tokens) {
-      const idfVal = this.idf.get(token) ?? 0;
-      if (idfVal === 0) continue;
-      for (const [docId, docTokens] of this.documents) {
-        const tf = docTokens.filter(t => t === token).length / docTokens.length;
-        const score = (scores.get(docId) ?? 0) + tf * idfVal;
-        scores.set(docId, score);
-      }
-    }
-    return Array.from(scores.entries())
-      .map(([id, score]) => ({ id, score }))
-      .sort((a, b) => b.score - a.score)
-      .slice(0, topK);
-  }
-  getDocumentCount(): number {
-    return this.documentCount;
-  }
-  getIdf(): ReadonlyMap<string, number> {
-    return this.idf;
-  }
-  private recomputeIdfForTerms(terms: Set<string>): void {
-    for (const term of terms) {
-      const dfVal = this.df.get(term) ?? 0;
-      if (dfVal > 0 && this.documentCount > 0) {
-        this.idf.set(term, Math.log(this.documentCount / dfVal));
-      }
-    }
-  }
-}
+export class TfIdfIndex {
+  private documents = new Map<number, string[]>();
+  private df = new Map<string, number>();
+  private idf = new Map<string, number>();
+  private documentCount = 0;
+  addDocument(id: number, tokens: string[]): void {
+    if (this.documents.has(id)) {
+      this.removeDocument(id);
+    }
+    const unique = new Set(tokens);
+    for (const token of unique) {
+      this.df.set(token, (this.df.get(token) ?? 0) + 1);
+    }
+    this.documents.set(id, tokens);
+    this.documentCount++;
+    this.recomputeIdfForTerms(unique);
+  }
+  removeDocument(id: number): void {
+    const tokens = this.documents.get(id);
+    if (!tokens) return;
+    const unique = new Set(tokens);
+    for (const token of unique) {
+      const count = this.df.get(token) ?? 0;
+      if (count <= 1) {
+        this.df.delete(token);
+        this.idf.delete(token);
+      } else {
+        this.df.set(token, count - 1);
+      }
+    }
+    this.documents.delete(id);
+    this.documentCount--;
+  }
+  query(tokens: string[], topK: number = 10): Array<{ id: number; score: number }> {
+    const scores = new Map<number, number>();
+    for (const token of tokens) {
+      const idfVal = this.idf.get(token) ?? 0;
+      if (idfVal === 0) continue;
+      for (const [docId, docTokens] of this.documents) {
+        const tf = docTokens.filter(t => t === token).length / docTokens.length;
+        const score = (scores.get(docId) ?? 0) + tf * idfVal;
+        scores.set(docId, score);
+      }
+    }
+    return Array.from(scores.entries())
+      .map(([id, score]) => ({ id, score }))
+      .sort((a, b) => b.score - a.score)
+      .slice(0, topK);
+  }
+  getDocumentCount(): number {
+    return this.documentCount;
+  }
+  getIdf(): ReadonlyMap<string, number> {
+    return this.idf;
+  }
+  private recomputeIdfForTerms(terms: Set<string>): void {
+    for (const term of terms) {
+      const dfVal = this.df.get(term) ?? 0;
+      if (dfVal > 0 && this.documentCount > 0) {
+        this.idf.set(term, Math.log(this.documentCount / dfVal));
+      }
+    }
+  }
+}

package/src/matching/tokenizer.ts CHANGED Viewed

@@ -1,41 +1,41 @@
-const STOPWORDS = new Set([
-  'the', 'is', 'are', 'a', 'an', 'and', 'or', 'not', 'in', 'at', 'by', 'for',
-  'from', 'of', 'on', 'to', 'with', 'as', 'error', 'exception', 'throw', 'catch',
-  'was', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
-  'will', 'would', 'could', 'should', 'may', 'might', 'can', 'it', 'its',
-  'this', 'that', 'these', 'those', 'i', 'we', 'you', 'he', 'she', 'they',
-]);
-export function splitCamelCase(text: string): string[] {
-  return text
-    .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
-    .replace(/([a-z\d])([A-Z])/g, '$1 $2')
-    .split(/\s+/)
-    .filter(t => t.length > 0);
-}
-export function splitSnakeCase(text: string): string[] {
-  return text.split(/[_\-]+/).filter(t => t.length > 0);
-}
-export function removeStopwords(tokens: string[]): string[] {
-  return tokens.filter(t => !STOPWORDS.has(t.toLowerCase()));
-}
-export function tokenize(text: string): string[] {
-  const words = text
-    .replace(/[^\w\s]/g, ' ')
-    .split(/\s+/)
-    .filter(t => t.length > 0);
-  const tokens: string[] = [];
-  for (const word of words) {
-    tokens.push(...splitCamelCase(word));
-    if (word.includes('_') || word.includes('-')) {
-      tokens.push(...splitSnakeCase(word));
-    }
-  }
-  const cleaned = removeStopwords(tokens);
-  return [...new Set(cleaned.map(t => t.toLowerCase()))];
-}
+const STOPWORDS = new Set([
+  'the', 'is', 'are', 'a', 'an', 'and', 'or', 'not', 'in', 'at', 'by', 'for',
+  'from', 'of', 'on', 'to', 'with', 'as', 'error', 'exception', 'throw', 'catch',
+  'was', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
+  'will', 'would', 'could', 'should', 'may', 'might', 'can', 'it', 'its',
+  'this', 'that', 'these', 'those', 'i', 'we', 'you', 'he', 'she', 'they',
+]);
+export function splitCamelCase(text: string): string[] {
+  return text
+    .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
+    .replace(/([a-z\d])([A-Z])/g, '$1 $2')
+    .split(/\s+/)
+    .filter(t => t.length > 0);
+}
+export function splitSnakeCase(text: string): string[] {
+  return text.split(/[_\-]+/).filter(t => t.length > 0);
+}
+export function removeStopwords(tokens: string[]): string[] {
+  return tokens.filter(t => !STOPWORDS.has(t.toLowerCase()));
+}
+export function tokenize(text: string): string[] {
+  const words = text
+    .replace(/[^\w\s]/g, ' ')
+    .split(/\s+/)
+    .filter(t => t.length > 0);
+  const tokens: string[] = [];
+  for (const word of words) {
+    tokens.push(...splitCamelCase(word));
+    if (word.includes('_') || word.includes('-')) {
+      tokens.push(...splitSnakeCase(word));
+    }
+  }
+  const cleaned = removeStopwords(tokens);
+  return [...new Set(cleaned.map(t => t.toLowerCase()))];
+}