@comfanion/usethis_search 0.2.0-dev.0 → 3.0.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,204 @@
1
+ /**
2
+ * Usage Tracker — records provenance and usage statistics for chunks.
3
+ *
4
+ * FR-060: Record provenance for each attached chunk {query, main_chunk_id, attached_via_edge_type}
5
+ * FR-061: Increment usage_count when chunk appears in search results
6
+ * FR-062: API to query "where is chunk X used?" → list of referencing chunks
7
+ * FR-063: Use usage_count as additional ranking signal
8
+ *
9
+ * Storage: JSON file at .opencode/vectors/<index>/usage-stats.json
10
+ * Updated asynchronously (non-blocking to search).
11
+ */
12
+
13
+ import fs from "fs/promises"
14
+ import path from "path"
15
+
16
+ // ---------------------------------------------------------------------------
17
+ // Types
18
+ // ---------------------------------------------------------------------------
19
+
20
+ export interface ProvenanceRecord {
21
+ /** The search query that triggered this attachment */
22
+ query: string
23
+ /** The main result chunk that caused context attachment */
24
+ mainChunkId: string
25
+ /** The edge type that linked main → attached chunk */
26
+ edgeType: string
27
+ /** Timestamp */
28
+ timestamp: number
29
+ }
30
+
31
+ export interface ChunkUsageStats {
32
+ /** How many times this chunk appeared in search results (main or attached) */
33
+ usageCount: number
34
+ /** Last time this chunk was returned in a search result */
35
+ lastUsed: number
36
+ /** Recent provenance records (max 20 per chunk to limit storage) */
37
+ provenance: ProvenanceRecord[]
38
+ }
39
+
40
+ export interface UsageData {
41
+ /** Per-chunk usage statistics, keyed by chunk_id */
42
+ chunks: Record<string, ChunkUsageStats>
43
+ /** Global counters */
44
+ totalSearches: number
45
+ lastUpdated: number
46
+ }
47
+
48
+ const MAX_PROVENANCE_PER_CHUNK = 20
49
+
50
+ // ---------------------------------------------------------------------------
51
+ // UsageTracker
52
+ // ---------------------------------------------------------------------------
53
+
54
+ export class UsageTracker {
55
+ private data: UsageData | null = null
56
+ private dirty = false
57
+ private savePath: string
58
+
59
+ constructor(private cacheDir: string) {
60
+ this.savePath = path.join(cacheDir, "usage-stats.json")
61
+ }
62
+
63
+ // ---- lifecycle ----------------------------------------------------------
64
+
65
+ async load(): Promise<void> {
66
+ try {
67
+ const raw = await fs.readFile(this.savePath, "utf-8")
68
+ this.data = JSON.parse(raw)
69
+ } catch {
70
+ this.data = { chunks: {}, totalSearches: 0, lastUpdated: Date.now() }
71
+ }
72
+ }
73
+
74
+ async save(): Promise<void> {
75
+ if (!this.dirty || !this.data) return
76
+ this.data.lastUpdated = Date.now()
77
+ try {
78
+ await fs.mkdir(path.dirname(this.savePath), { recursive: true })
79
+ await fs.writeFile(this.savePath, JSON.stringify(this.data, null, 2), "utf-8")
80
+ this.dirty = false
81
+ } catch {
82
+ // non-fatal
83
+ }
84
+ }
85
+
86
+ // ---- FR-060: record provenance ------------------------------------------
87
+
88
+ /**
89
+ * Record that `attachedChunkId` was attached to `mainChunkId` as context
90
+ * for `query`, via `edgeType` relation.
91
+ */
92
+ recordProvenance(
93
+ query: string,
94
+ mainChunkId: string,
95
+ attachedChunkId: string,
96
+ edgeType: string,
97
+ ): void {
98
+ if (!this.data) return
99
+ const stats = this.ensureChunkStats(attachedChunkId)
100
+ stats.provenance.push({
101
+ query,
102
+ mainChunkId,
103
+ edgeType,
104
+ timestamp: Date.now(),
105
+ })
106
+ // Cap provenance history
107
+ if (stats.provenance.length > MAX_PROVENANCE_PER_CHUNK) {
108
+ stats.provenance = stats.provenance.slice(-MAX_PROVENANCE_PER_CHUNK)
109
+ }
110
+ this.dirty = true
111
+ }
112
+
113
+ // ---- FR-061: increment usage_count --------------------------------------
114
+
115
+ /**
116
+ * Record that these chunk IDs appeared in search results.
117
+ * Call once per search with all result chunk IDs (main + attached).
118
+ */
119
+ recordSearchResults(chunkIds: string[]): void {
120
+ if (!this.data) return
121
+ this.data.totalSearches++
122
+ const now = Date.now()
123
+ for (const id of chunkIds) {
124
+ const stats = this.ensureChunkStats(id)
125
+ stats.usageCount++
126
+ stats.lastUsed = now
127
+ }
128
+ this.dirty = true
129
+ }
130
+
131
+ // ---- FR-062: "where is chunk X used?" -----------------------------------
132
+
133
+ /**
134
+ * Get provenance info for a chunk: which queries led to it,
135
+ * which main chunks it was attached to, via which edges.
136
+ */
137
+ getChunkProvenance(chunkId: string): ProvenanceRecord[] {
138
+ if (!this.data) return []
139
+ return this.data.chunks[chunkId]?.provenance ?? []
140
+ }
141
+
142
+ /**
143
+ * Get usage stats for a chunk.
144
+ */
145
+ getChunkStats(chunkId: string): ChunkUsageStats | null {
146
+ if (!this.data) return null
147
+ return this.data.chunks[chunkId] ?? null
148
+ }
149
+
150
+ // ---- FR-063: usage_count as ranking signal ------------------------------
151
+
152
+ /**
153
+ * Get usage count for a chunk (0 if never seen).
154
+ * Used as additional ranking signal in search.
155
+ */
156
+ getUsageCount(chunkId: string): number {
157
+ if (!this.data) return 0
158
+ return this.data.chunks[chunkId]?.usageCount ?? 0
159
+ }
160
+
161
+ /**
162
+ * Get a usage boost factor for ranking (0.0 – 1.0).
163
+ * Normalized: most-used chunk → 1.0, unused → 0.0.
164
+ */
165
+ getUsageBoost(chunkId: string): number {
166
+ if (!this.data) return 0
167
+ const stats = this.data.chunks[chunkId]
168
+ if (!stats || stats.usageCount === 0) return 0
169
+
170
+ // Find max usage count across all chunks for normalization
171
+ let maxUsage = 1
172
+ for (const s of Object.values(this.data.chunks)) {
173
+ if (s.usageCount > maxUsage) maxUsage = s.usageCount
174
+ }
175
+ return stats.usageCount / maxUsage
176
+ }
177
+
178
+ // ---- summary ------------------------------------------------------------
179
+
180
+ /**
181
+ * Get global usage summary.
182
+ */
183
+ getSummary(): { totalSearches: number; trackedChunks: number; lastUpdated: number } {
184
+ if (!this.data) return { totalSearches: 0, trackedChunks: 0, lastUpdated: 0 }
185
+ return {
186
+ totalSearches: this.data.totalSearches,
187
+ trackedChunks: Object.keys(this.data.chunks).length,
188
+ lastUpdated: this.data.lastUpdated,
189
+ }
190
+ }
191
+
192
+ // ---- internals ----------------------------------------------------------
193
+
194
+ private ensureChunkStats(chunkId: string): ChunkUsageStats {
195
+ if (!this.data!.chunks[chunkId]) {
196
+ this.data!.chunks[chunkId] = {
197
+ usageCount: 0,
198
+ lastUsed: 0,
199
+ provenance: [],
200
+ }
201
+ }
202
+ return this.data!.chunks[chunkId]
203
+ }
204
+ }
package/vectorizer.yaml CHANGED
@@ -39,6 +39,20 @@ vectorizer:
39
39
  hybrid: false # Enable hybrid search (vector + BM25)
40
40
  bm25_weight: 0.3 # BM25 weight in hybrid mode (0.0-1.0)
41
41
 
42
+ # Graph-based context (v3)
43
+ graph:
44
+ enabled: true
45
+ max_related: 3 # How many related chunks to attach
46
+ min_relevance: 0.5 # Minimum score threshold for related context
47
+
48
+ # LSP for code analysis
49
+ lsp:
50
+ enabled: true
51
+ timeout_ms: 5000 # Timeout per file
52
+
53
+ # Read() intercept
54
+ read_intercept: true
55
+
42
56
  # Quality monitoring (v2)
43
57
  quality:
44
58
  enable_metrics: false # Track search quality metrics