@comfanion/usethis_search 3.0.0-dev.16 → 3.0.0-dev.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,7 @@ import { mergeResults, DEFAULT_HYBRID_CONFIG } from "./hybrid-search.ts";
16
16
  import { QueryCache, DEFAULT_CACHE_CONFIG } from "./query-cache.ts";
17
17
  import { SearchMetrics } from "./search-metrics.ts";
18
18
  import { GraphDB } from "./graph-db.ts";
19
- import { GraphBuilder } from "./graph-builder.ts";
19
+ import { GraphBuilder, isStructuralPredicate } from "./graph-builder.ts";
20
20
  import { UsageTracker } from "./usage-tracker.ts";
21
21
 
22
22
  // Suppress transformers.js logs unless DEBUG is set
@@ -85,6 +85,19 @@ let HYBRID_CONFIG = { ...DEFAULT_HYBRID_CONFIG };
85
85
  let METRICS_ENABLED = false;
86
86
  let CACHE_ENABLED = true;
87
87
 
88
+ // ── Graph config (v3) ───────────────────────────────────────────────────────
89
+ const DEFAULT_GRAPH_CONFIG = {
90
+ enabled: true,
91
+ max_related: 4,
92
+ min_relevance: 0.5,
93
+ lsp: {
94
+ enabled: true,
95
+ timeout_ms: 5000,
96
+ },
97
+ read_intercept: true,
98
+ };
99
+ let GRAPH_CONFIG = { ...DEFAULT_GRAPH_CONFIG, lsp: { ...DEFAULT_GRAPH_CONFIG.lsp } };
100
+
88
101
  function defaultVectorizerYaml() {
89
102
  return (
90
103
  `vectorizer:\n` +
@@ -121,6 +134,16 @@ function defaultVectorizerYaml() {
121
134
  ` hybrid: true\n` +
122
135
  ` bm25_weight: 0.3\n` +
123
136
  `\n` +
137
+ ` # Graph-based context (v3)\n` +
138
+ ` graph:\n` +
139
+ ` enabled: true\n` +
140
+ ` max_related: 4\n` +
141
+ ` min_relevance: 0.5\n` +
142
+ ` lsp:\n` +
143
+ ` enabled: true\n` +
144
+ ` timeout_ms: 5000\n` +
145
+ ` read_intercept: true\n` +
146
+ `\n` +
124
147
  ` # Quality monitoring\n` +
125
148
  ` quality:\n` +
126
149
  ` enable_metrics: false\n` +
@@ -282,6 +305,26 @@ async function loadConfig(projectRoot) {
282
305
  CACHE_ENABLED = parseBool(qs, "enable_cache", true);
283
306
  }
284
307
 
308
+ // ── Parse graph config (v3) ──────────────────────────────────────────────
309
+ const graphMatch = section.match(/^\s{2}graph:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
310
+ if (graphMatch) {
311
+ const gs = graphMatch[1];
312
+ GRAPH_CONFIG.enabled = parseBool(gs, "enabled", DEFAULT_GRAPH_CONFIG.enabled);
313
+ GRAPH_CONFIG.max_related = parseNumber(gs, "max_related", DEFAULT_GRAPH_CONFIG.max_related);
314
+ GRAPH_CONFIG.min_relevance = parseNumber(gs, "min_relevance", DEFAULT_GRAPH_CONFIG.min_relevance);
315
+ GRAPH_CONFIG.read_intercept = parseBool(gs, "read_intercept", DEFAULT_GRAPH_CONFIG.read_intercept);
316
+
317
+ // Nested lsp: section
318
+ const lspMatch = gs.match(/^\s+lsp:\s*\n([\s\S]*?)(?=^\s{4}[a-zA-Z_\-]+:|\Z)/m);
319
+ if (lspMatch) {
320
+ const ls = lspMatch[1];
321
+ GRAPH_CONFIG.lsp.enabled = parseBool(ls, "enabled", DEFAULT_GRAPH_CONFIG.lsp.enabled);
322
+ GRAPH_CONFIG.lsp.timeout_ms = parseNumber(ls, "timeout_ms", DEFAULT_GRAPH_CONFIG.lsp.timeout_ms);
323
+ }
324
+
325
+ if (DEBUG) console.log("[vectorizer] Graph config:", GRAPH_CONFIG);
326
+ }
327
+
285
328
  // Parse global exclude
286
329
  const excludeMatch = section.match(/^\s{2}exclude:\s*\n((?:\s{4}-\s+.+\n?)*)/m);
287
330
  if (excludeMatch) {
@@ -392,11 +435,19 @@ class CodebaseIndexer {
392
435
  this.db = await lancedb.connect(path.join(this.cacheDir, "lancedb"));
393
436
  await this.loadHashes();
394
437
 
395
- const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
396
- const graphPath = path.join(this.root, ".opencode", "graph", graphType);
397
- await fs.mkdir(path.dirname(graphPath), { recursive: true });
398
- this.graphDB = await new GraphDB(graphPath).init();
399
- this.graphBuilder = new GraphBuilder(this.graphDB, this.root);
438
+ // Graph DB only if graph is enabled in config
439
+ if (GRAPH_CONFIG.enabled) {
440
+ const graphType = this.indexName === "docs" ? "doc_graph" : "code_graph";
441
+ const graphPath = path.join(this.root, ".opencode", "graph", graphType);
442
+ await fs.mkdir(path.dirname(graphPath), { recursive: true });
443
+ this.graphDB = await new GraphDB(graphPath).init();
444
+ this.graphBuilder = new GraphBuilder(
445
+ this.graphDB,
446
+ this.root,
447
+ GRAPH_CONFIG.lsp.enabled,
448
+ GRAPH_CONFIG.lsp.timeout_ms,
449
+ );
450
+ }
400
451
 
401
452
  // Usage tracker — provenance & usage stats
402
453
  this.usageTracker = new UsageTracker(this.cacheDir);
@@ -557,18 +608,39 @@ class CodebaseIndexer {
557
608
  // Semantic chunking
558
609
  const chunks = chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG);
559
610
 
560
- // v3: Assign chunk IDs for graph tracking
561
- const chunksWithIds = this.graphBuilder.assignChunkIds(relPath, chunks);
611
+ // v3: Assign chunk IDs for graph tracking (works without graph — just adds IDs)
612
+ const chunksWithIds = this.graphBuilder
613
+ ? this.graphBuilder.assignChunkIds(relPath, chunks)
614
+ : chunks.map((c, i) => ({ ...c, chunk_id: `chunk:${relPath}::_chunk_${i}` }));
562
615
 
563
616
  // v3: Delete old edges for this file and build new ones
564
- await this.graphDB.deleteByFile(relPath);
565
- await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
617
+ let graphEdgesBuilt = 0;
618
+ if (this.graphBuilder && this.graphDB) {
619
+ await this.graphDB.deleteByFile(relPath);
620
+ graphEdgesBuilt = await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
621
+
622
+ // Log graph creation to indexer.log
623
+ if (graphEdgesBuilt > 0 || DEBUG) {
624
+ const timestamp = new Date().toISOString().slice(11, 19);
625
+ const logMsg = `${timestamp} Graph built: ${relPath} (${chunksWithIds.length} chunks)`;
626
+ if (DEBUG) console.log(`[vectorizer] ${logMsg}`);
627
+
628
+ // Write to indexer.log in .opencode directory
629
+ try {
630
+ const logPath = path.join(this.root, ".opencode", "indexer.log");
631
+ const fsSync = await import("fs");
632
+ fsSync.appendFileSync(logPath, `${logMsg}\n`);
633
+ } catch {
634
+ // non-fatal — logging is advisory
635
+ }
636
+ }
566
637
 
567
- // FR-054: Store graph build timestamp + file hash as metadata triple
568
- try {
569
- await this.graphDB.setFileMeta(relPath, hash, Date.now());
570
- } catch {
571
- // non-fatal — metadata is advisory
638
+ // FR-054: Store graph build timestamp + file hash as metadata triple
639
+ try {
640
+ await this.graphDB.setFileMeta(relPath, hash, Date.now());
641
+ } catch {
642
+ // non-fatal — metadata is advisory
643
+ }
572
644
  }
573
645
 
574
646
  const data = [];
@@ -590,6 +662,9 @@ class CodebaseIndexer {
590
662
  function_name: chunksWithIds[i].function_name || "",
591
663
  class_name: chunksWithIds[i].class_name || "",
592
664
  tags: (fileMeta.tags || []).join(","),
665
+ // Line numbers for "from-to" extraction (default to -1 when unknown)
666
+ start_line: chunksWithIds[i].start_line ?? -1,
667
+ end_line: chunksWithIds[i].end_line ?? -1,
593
668
  });
594
669
  }
595
670
 
@@ -629,7 +704,7 @@ class CodebaseIndexer {
629
704
  const table = await this.db.openTable(tableName);
630
705
  let allRows;
631
706
  try {
632
- allRows = await table.filter("").limit(100000).execute();
707
+ allRows = await table.filter("true").limit(100000).execute();
633
708
  } catch (e) {
634
709
  if (DEBUG) console.log("[vectorizer] BM25 index build failed (corrupted table?):", e.message);
635
710
  return null;
@@ -693,10 +768,15 @@ class CodebaseIndexer {
693
768
  const bm25Results = bm25.search(query, fetchLimit);
694
769
 
695
770
  // Build score maps
771
+ // LanceDB _distance is L2 (euclidean). For normalized vectors,
772
+ // L2 ∈ [0, 2]. Convert to similarity ∈ [0, 1]:
773
+ // similarity = 1 - (distance / 2)
774
+ const distanceToScore = (d: number | null | undefined) =>
775
+ d != null ? Math.max(0, 1 - d / 2) : 0.5;
776
+
696
777
  const vectorScores = new Map();
697
778
  for (let i = 0; i < results.length; i++) {
698
- const score = results[i]._distance != null ? 1 - results[i]._distance : 0.5;
699
- vectorScores.set(i, score);
779
+ vectorScores.set(i, distanceToScore(results[i]._distance));
700
780
  }
701
781
 
702
782
  const bm25Scores = new Map();
@@ -711,7 +791,7 @@ class CodebaseIndexer {
711
791
 
712
792
  for (let i = 0; i < results.length; i++) {
713
793
  const key = `${results[i].file}:${results[i].chunk_index}`;
714
- const vs = results[i]._distance != null ? 1 - results[i]._distance : 0.5;
794
+ const vs = distanceToScore(results[i]._distance);
715
795
  resultMap.set(key, { row: results[i], vectorScore: vs, bm25Score: 0 });
716
796
  }
717
797
 
@@ -812,7 +892,10 @@ class CodebaseIndexer {
812
892
 
813
893
  const outgoing = await this.graphDB.getOutgoing(result.chunk_id);
814
894
  const incoming = await this.graphDB.getIncoming(result.chunk_id);
815
- const allEdges = [...outgoing, ...incoming];
895
+ // Filter out structural and meta edges — only relation edges are useful for context
896
+ const allEdges = [...outgoing, ...incoming].filter(
897
+ e => e.predicate !== "belongs_to" && e.predicate !== "graph_built" && !isStructuralPredicate(e.predicate)
898
+ );
816
899
 
817
900
  const neighbors = [];
818
901
  for (const edge of allEdges) {
@@ -833,8 +916,13 @@ class CodebaseIndexer {
833
916
  });
834
917
  }
835
918
 
919
+ // Apply min_relevance filter, then cap at max_related
836
920
  neighbors.sort((a, b) => b.score - a.score);
837
- result.relatedContext = neighbors.slice(0, 3);
921
+ const minRelevance = GRAPH_CONFIG.min_relevance ?? 0.5;
922
+ const maxRelated = GRAPH_CONFIG.max_related ?? 4;
923
+ result.relatedContext = neighbors
924
+ .filter(n => n.score >= minRelevance)
925
+ .slice(0, maxRelated);
838
926
 
839
927
  // FR-060: Record provenance for each attached chunk
840
928
  if (this.usageTracker) {
@@ -875,7 +963,7 @@ class CodebaseIndexer {
875
963
  const table = await this.db.openTable(tableName);
876
964
  let rows;
877
965
  try {
878
- rows = await table.filter("").limit(100000).execute();
966
+ rows = await table.filter("true").limit(100000).execute();
879
967
  } catch (e) {
880
968
  if (DEBUG) console.log("[vectorizer] Chunk cache build failed (corrupted table?):", e.message);
881
969
  return null;
@@ -1013,7 +1101,7 @@ class CodebaseIndexer {
1013
1101
  const tables = await this.db.tableNames();
1014
1102
  if (tables.includes(tableName)) {
1015
1103
  const table = await this.db.openTable(tableName);
1016
- const allRows = await table.filter("").limit(100000).execute();
1104
+ const allRows = await table.filter("true").limit(100000).execute();
1017
1105
  const chunkData = allRows
1018
1106
  .filter(r => r.chunk_id && r.vector)
1019
1107
  .map(r => ({ chunk_id: r.chunk_id, vector: Array.from(r.vector), file: r.file }));
package/vectorizer.yaml CHANGED
@@ -61,6 +61,22 @@ vectorizer:
61
61
  # Indexes to maintain - each has pattern (what to include) and ignore (what to skip)
62
62
  indexes:
63
63
 
64
+ # Source code index - all common programming languages
65
+ code:
66
+ enabled: true
67
+ pattern: "**/*.{js,ts,jsx,tsx,mjs,cjs,py,go,rs,java,kt,swift,c,cpp,h,hpp,cs,rb,php,scala,clj}"
68
+ ignore:
69
+ - "**/node_modules/**"
70
+ - "**/.git/**"
71
+ - "**/dist/**"
72
+ - "**/build/**"
73
+ - "**/.opencode/**"
74
+ - "**/docs/**"
75
+ - "**/vendor/**"
76
+ - "**/__pycache__/**"
77
+ hybrid: true
78
+ bm25_weight: 0.3
79
+
64
80
  # Documentation index - markdown, text files
65
81
  docs:
66
82
  enabled: true