@comfanion/usethis_search 0.2.0-dev.0 → 3.0.0-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/file-indexer.ts +13 -0
- package/index.ts +7 -1
- package/package.json +12 -3
- package/tools/codeindex.ts +155 -6
- package/tools/read-interceptor.ts +127 -0
- package/tools/search.ts +14 -1
- package/vectorizer/analyzers/lsp-analyzer.ts +293 -0
- package/vectorizer/analyzers/lsp-client.ts +369 -0
- package/vectorizer/analyzers/regex-analyzer.ts +255 -0
- package/vectorizer/graph-builder.ts +198 -0
- package/vectorizer/graph-db.ts +289 -0
- package/vectorizer/index.js +167 -9
- package/vectorizer/usage-tracker.ts +204 -0
- package/vectorizer.yaml +14 -0
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Usage Tracker — records provenance and usage statistics for chunks.
|
|
3
|
+
*
|
|
4
|
+
* FR-060: Record provenance for each attached chunk {query, main_chunk_id, attached_via_edge_type}
|
|
5
|
+
* FR-061: Increment usage_count when chunk appears in search results
|
|
6
|
+
* FR-062: API to query "where is chunk X used?" → list of referencing chunks
|
|
7
|
+
* FR-063: Use usage_count as additional ranking signal
|
|
8
|
+
*
|
|
9
|
+
* Storage: JSON file at .opencode/vectors/<index>/usage-stats.json
|
|
10
|
+
* Updated asynchronously (non-blocking to search).
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import fs from "fs/promises"
|
|
14
|
+
import path from "path"
|
|
15
|
+
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Types
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
export interface ProvenanceRecord {
|
|
21
|
+
/** The search query that triggered this attachment */
|
|
22
|
+
query: string
|
|
23
|
+
/** The main result chunk that caused context attachment */
|
|
24
|
+
mainChunkId: string
|
|
25
|
+
/** The edge type that linked main → attached chunk */
|
|
26
|
+
edgeType: string
|
|
27
|
+
/** Timestamp */
|
|
28
|
+
timestamp: number
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface ChunkUsageStats {
|
|
32
|
+
/** How many times this chunk appeared in search results (main or attached) */
|
|
33
|
+
usageCount: number
|
|
34
|
+
/** Last time this chunk was returned in a search result */
|
|
35
|
+
lastUsed: number
|
|
36
|
+
/** Recent provenance records (max 20 per chunk to limit storage) */
|
|
37
|
+
provenance: ProvenanceRecord[]
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface UsageData {
|
|
41
|
+
/** Per-chunk usage statistics, keyed by chunk_id */
|
|
42
|
+
chunks: Record<string, ChunkUsageStats>
|
|
43
|
+
/** Global counters */
|
|
44
|
+
totalSearches: number
|
|
45
|
+
lastUpdated: number
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const MAX_PROVENANCE_PER_CHUNK = 20
|
|
49
|
+
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// UsageTracker
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
export class UsageTracker {
|
|
55
|
+
private data: UsageData | null = null
|
|
56
|
+
private dirty = false
|
|
57
|
+
private savePath: string
|
|
58
|
+
|
|
59
|
+
constructor(private cacheDir: string) {
|
|
60
|
+
this.savePath = path.join(cacheDir, "usage-stats.json")
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ---- lifecycle ----------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
async load(): Promise<void> {
|
|
66
|
+
try {
|
|
67
|
+
const raw = await fs.readFile(this.savePath, "utf-8")
|
|
68
|
+
this.data = JSON.parse(raw)
|
|
69
|
+
} catch {
|
|
70
|
+
this.data = { chunks: {}, totalSearches: 0, lastUpdated: Date.now() }
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async save(): Promise<void> {
|
|
75
|
+
if (!this.dirty || !this.data) return
|
|
76
|
+
this.data.lastUpdated = Date.now()
|
|
77
|
+
try {
|
|
78
|
+
await fs.mkdir(path.dirname(this.savePath), { recursive: true })
|
|
79
|
+
await fs.writeFile(this.savePath, JSON.stringify(this.data, null, 2), "utf-8")
|
|
80
|
+
this.dirty = false
|
|
81
|
+
} catch {
|
|
82
|
+
// non-fatal
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// ---- FR-060: record provenance ------------------------------------------
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Record that `attachedChunkId` was attached to `mainChunkId` as context
|
|
90
|
+
* for `query`, via `edgeType` relation.
|
|
91
|
+
*/
|
|
92
|
+
recordProvenance(
|
|
93
|
+
query: string,
|
|
94
|
+
mainChunkId: string,
|
|
95
|
+
attachedChunkId: string,
|
|
96
|
+
edgeType: string,
|
|
97
|
+
): void {
|
|
98
|
+
if (!this.data) return
|
|
99
|
+
const stats = this.ensureChunkStats(attachedChunkId)
|
|
100
|
+
stats.provenance.push({
|
|
101
|
+
query,
|
|
102
|
+
mainChunkId,
|
|
103
|
+
edgeType,
|
|
104
|
+
timestamp: Date.now(),
|
|
105
|
+
})
|
|
106
|
+
// Cap provenance history
|
|
107
|
+
if (stats.provenance.length > MAX_PROVENANCE_PER_CHUNK) {
|
|
108
|
+
stats.provenance = stats.provenance.slice(-MAX_PROVENANCE_PER_CHUNK)
|
|
109
|
+
}
|
|
110
|
+
this.dirty = true
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// ---- FR-061: increment usage_count --------------------------------------
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Record that these chunk IDs appeared in search results.
|
|
117
|
+
* Call once per search with all result chunk IDs (main + attached).
|
|
118
|
+
*/
|
|
119
|
+
recordSearchResults(chunkIds: string[]): void {
|
|
120
|
+
if (!this.data) return
|
|
121
|
+
this.data.totalSearches++
|
|
122
|
+
const now = Date.now()
|
|
123
|
+
for (const id of chunkIds) {
|
|
124
|
+
const stats = this.ensureChunkStats(id)
|
|
125
|
+
stats.usageCount++
|
|
126
|
+
stats.lastUsed = now
|
|
127
|
+
}
|
|
128
|
+
this.dirty = true
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// ---- FR-062: "where is chunk X used?" -----------------------------------
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Get provenance info for a chunk: which queries led to it,
|
|
135
|
+
* which main chunks it was attached to, via which edges.
|
|
136
|
+
*/
|
|
137
|
+
getChunkProvenance(chunkId: string): ProvenanceRecord[] {
|
|
138
|
+
if (!this.data) return []
|
|
139
|
+
return this.data.chunks[chunkId]?.provenance ?? []
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Get usage stats for a chunk.
|
|
144
|
+
*/
|
|
145
|
+
getChunkStats(chunkId: string): ChunkUsageStats | null {
|
|
146
|
+
if (!this.data) return null
|
|
147
|
+
return this.data.chunks[chunkId] ?? null
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// ---- FR-063: usage_count as ranking signal ------------------------------
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Get usage count for a chunk (0 if never seen).
|
|
154
|
+
* Used as additional ranking signal in search.
|
|
155
|
+
*/
|
|
156
|
+
getUsageCount(chunkId: string): number {
|
|
157
|
+
if (!this.data) return 0
|
|
158
|
+
return this.data.chunks[chunkId]?.usageCount ?? 0
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Get a usage boost factor for ranking (0.0 – 1.0).
|
|
163
|
+
* Normalized: most-used chunk → 1.0, unused → 0.0.
|
|
164
|
+
*/
|
|
165
|
+
getUsageBoost(chunkId: string): number {
|
|
166
|
+
if (!this.data) return 0
|
|
167
|
+
const stats = this.data.chunks[chunkId]
|
|
168
|
+
if (!stats || stats.usageCount === 0) return 0
|
|
169
|
+
|
|
170
|
+
// Find max usage count across all chunks for normalization
|
|
171
|
+
let maxUsage = 1
|
|
172
|
+
for (const s of Object.values(this.data.chunks)) {
|
|
173
|
+
if (s.usageCount > maxUsage) maxUsage = s.usageCount
|
|
174
|
+
}
|
|
175
|
+
return stats.usageCount / maxUsage
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// ---- summary ------------------------------------------------------------
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Get global usage summary.
|
|
182
|
+
*/
|
|
183
|
+
getSummary(): { totalSearches: number; trackedChunks: number; lastUpdated: number } {
|
|
184
|
+
if (!this.data) return { totalSearches: 0, trackedChunks: 0, lastUpdated: 0 }
|
|
185
|
+
return {
|
|
186
|
+
totalSearches: this.data.totalSearches,
|
|
187
|
+
trackedChunks: Object.keys(this.data.chunks).length,
|
|
188
|
+
lastUpdated: this.data.lastUpdated,
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// ---- internals ----------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
private ensureChunkStats(chunkId: string): ChunkUsageStats {
|
|
195
|
+
if (!this.data!.chunks[chunkId]) {
|
|
196
|
+
this.data!.chunks[chunkId] = {
|
|
197
|
+
usageCount: 0,
|
|
198
|
+
lastUsed: 0,
|
|
199
|
+
provenance: [],
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
return this.data!.chunks[chunkId]
|
|
203
|
+
}
|
|
204
|
+
}
|
package/vectorizer.yaml
CHANGED
|
@@ -39,6 +39,20 @@ vectorizer:
|
|
|
39
39
|
hybrid: false # Enable hybrid search (vector + BM25)
|
|
40
40
|
bm25_weight: 0.3 # BM25 weight in hybrid mode (0.0-1.0)
|
|
41
41
|
|
|
42
|
+
# Graph-based context (v3)
|
|
43
|
+
graph:
|
|
44
|
+
enabled: true
|
|
45
|
+
max_related: 3 # How many related chunks to attach
|
|
46
|
+
min_relevance: 0.5 # Minimum score threshold for related context
|
|
47
|
+
|
|
48
|
+
# LSP for code analysis
|
|
49
|
+
lsp:
|
|
50
|
+
enabled: true
|
|
51
|
+
timeout_ms: 5000 # Timeout per file
|
|
52
|
+
|
|
53
|
+
# Read() intercept
|
|
54
|
+
read_intercept: true
|
|
55
|
+
|
|
42
56
|
# Quality monitoring (v2)
|
|
43
57
|
quality:
|
|
44
58
|
enable_metrics: false # Track search quality metrics
|