@comfanion/usethis_search 0.1.5 → 0.2.0-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,126 @@
1
+ /**
2
+ * Query Embedding Cache — LRU cache for query embeddings.
3
+ *
4
+ * Avoids recomputing embeddings for identical/recent queries.
5
+ * Uses a simple Map-based LRU with TTL eviction.
6
+ */
7
+
8
+ export interface QueryCacheConfig {
9
+ maxSize: number // max entries (default: 100)
10
+ ttl: number // time-to-live in ms (default: 1 hour)
11
+ }
12
+
13
+ export const DEFAULT_CACHE_CONFIG: QueryCacheConfig = {
14
+ maxSize: 100,
15
+ ttl: 3600000, // 1 hour
16
+ }
17
+
18
+ interface CacheEntry {
19
+ embedding: number[]
20
+ timestamp: number
21
+ }
22
+
23
+ export class QueryCache {
24
+ private cache: Map<string, CacheEntry> = new Map()
25
+ private config: QueryCacheConfig
26
+ private cleanupInterval?: ReturnType<typeof setInterval>
27
+
28
+ constructor(config: QueryCacheConfig = DEFAULT_CACHE_CONFIG) {
29
+ this.config = config
30
+ // Periodic eviction of expired entries every 5 minutes
31
+ this.cleanupInterval = setInterval(() => {
32
+ this.evictExpired()
33
+ }, 300_000)
34
+ // Allow Node to exit even if interval is running
35
+ if (this.cleanupInterval && typeof this.cleanupInterval === "object" && "unref" in this.cleanupInterval) {
36
+ this.cleanupInterval.unref()
37
+ }
38
+ }
39
+
40
+ /**
41
+ * Get cached embedding for a query string.
42
+ * Returns `null` if not found or expired.
43
+ */
44
+ get(query: string): number[] | null {
45
+ const key = this.normalizeKey(query)
46
+ const entry = this.cache.get(key)
47
+
48
+ if (!entry) return null
49
+
50
+ // Check TTL
51
+ if (Date.now() - entry.timestamp > this.config.ttl) {
52
+ this.cache.delete(key)
53
+ return null
54
+ }
55
+
56
+ // Move to end (most recently used) — delete & re-insert
57
+ this.cache.delete(key)
58
+ this.cache.set(key, entry)
59
+
60
+ return entry.embedding
61
+ }
62
+
63
+ /**
64
+ * Store embedding for a query string.
65
+ */
66
+ set(query: string, embedding: number[]): void {
67
+ const key = this.normalizeKey(query)
68
+
69
+ // If already exists, delete first (to update position)
70
+ this.cache.delete(key)
71
+
72
+ // Evict oldest if at capacity
73
+ if (this.cache.size >= this.config.maxSize) {
74
+ const oldest = this.cache.keys().next().value
75
+ if (oldest !== undefined) {
76
+ this.cache.delete(oldest)
77
+ }
78
+ }
79
+
80
+ this.cache.set(key, { embedding, timestamp: Date.now() })
81
+ }
82
+
83
+ /** Check if a query is cached (and not expired). */
84
+ has(query: string): boolean {
85
+ return this.get(query) !== null
86
+ }
87
+
88
+ /** Remove all entries. */
89
+ clear(): void {
90
+ this.cache.clear()
91
+ }
92
+
93
+ /** Stop periodic cleanup and release all memory. */
94
+ destroy(): void {
95
+ if (this.cleanupInterval) {
96
+ clearInterval(this.cleanupInterval)
97
+ this.cleanupInterval = undefined
98
+ }
99
+ this.cache.clear()
100
+ }
101
+
102
+ /** Current number of (non-expired) entries. */
103
+ get size(): number {
104
+ // Lazy: don't evict expired on size check
105
+ return this.cache.size
106
+ }
107
+
108
+ /** Evict all expired entries. */
109
+ evictExpired(): number {
110
+ const now = Date.now()
111
+ let evicted = 0
112
+ for (const [key, entry] of this.cache) {
113
+ if (now - entry.timestamp > this.config.ttl) {
114
+ this.cache.delete(key)
115
+ evicted++
116
+ }
117
+ }
118
+ return evicted
119
+ }
120
+
121
+ // ── Internal ──────────────────────────────────────────────────────────────
122
+
123
+ private normalizeKey(query: string): string {
124
+ return query.trim().toLowerCase()
125
+ }
126
+ }
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Search Metrics — tracks search quality and usage patterns.
3
+ *
4
+ * Logs queries, scores, result counts, and computes aggregate stats.
5
+ * Persists to .opencode/vectors/metrics.json.
6
+ */
7
+
8
+ import fs from "fs/promises"
9
+ import path from "path"
10
+
11
+ // ── Types ───────────────────────────────────────────────────────────────────
12
+
13
+ export interface QueryRecord {
14
+ query: string
15
+ timestamp: string // ISO
16
+ index: string
17
+ results_count: number
18
+ avg_score: number
19
+ top_score: number
20
+ hybrid: boolean
21
+ }
22
+
23
+ export interface MetricsSummary {
24
+ total_queries: number
25
+ avg_results_per_query: number
26
+ zero_results_rate: number
27
+ avg_relevance: number
28
+ avg_top_score: number
29
+ }
30
+
31
+ export interface MetricsData {
32
+ queries: QueryRecord[]
33
+ summary: MetricsSummary
34
+ }
35
+
36
+ // ── Constants ───────────────────────────────────────────────────────────────
37
+
38
+ const MAX_STORED_QUERIES = 500 // keep last N queries
39
+ const HIGH_RELEVANCE_THRESHOLD = 0.7
40
+
41
+ // ── SearchMetrics class ─────────────────────────────────────────────────────
42
+
43
+ export class SearchMetrics {
44
+ private metricsPath: string
45
+ private data: MetricsData
46
+
47
+ constructor(projectRoot: string) {
48
+ this.metricsPath = path.join(projectRoot, ".opencode", "vectors", "metrics.json")
49
+ this.data = { queries: [], summary: this.emptySummary() }
50
+ }
51
+
52
+ /** Load metrics from disk. */
53
+ async load(): Promise<void> {
54
+ try {
55
+ const raw = await fs.readFile(this.metricsPath, "utf8")
56
+ this.data = JSON.parse(raw)
57
+ // Trim if loaded data exceeds limit (e.g. from older version)
58
+ if (this.data.queries && this.data.queries.length > MAX_STORED_QUERIES) {
59
+ this.data.queries = this.data.queries.slice(-MAX_STORED_QUERIES)
60
+ this.data.summary = this.computeSummary()
61
+ }
62
+ } catch {
63
+ this.data = { queries: [], summary: this.emptySummary() }
64
+ }
65
+ }
66
+
67
+ /** Save metrics to disk. */
68
+ async save(): Promise<void> {
69
+ try {
70
+ const dir = path.dirname(this.metricsPath)
71
+ await fs.mkdir(dir, { recursive: true })
72
+ await fs.writeFile(this.metricsPath, JSON.stringify(this.data, null, 2))
73
+ } catch {
74
+ // non-fatal
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Record a search query and its results.
80
+ * @param scores Array of relevance scores (1 - distance) for each result
81
+ */
82
+ recordQuery(
83
+ query: string,
84
+ index: string,
85
+ scores: number[],
86
+ hybrid: boolean = false,
87
+ ): void {
88
+ const record: QueryRecord = {
89
+ query,
90
+ timestamp: new Date().toISOString(),
91
+ index,
92
+ results_count: scores.length,
93
+ avg_score: scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0,
94
+ top_score: scores.length > 0 ? Math.max(...scores) : 0,
95
+ hybrid,
96
+ }
97
+
98
+ this.data.queries.push(record)
99
+
100
+ // Trim to max
101
+ if (this.data.queries.length > MAX_STORED_QUERIES) {
102
+ this.data.queries = this.data.queries.slice(-MAX_STORED_QUERIES)
103
+ }
104
+
105
+ // Recompute summary
106
+ this.data.summary = this.computeSummary()
107
+ }
108
+
109
+ /** Get current summary. */
110
+ getSummary(): MetricsSummary {
111
+ return this.data.summary
112
+ }
113
+
114
+ /** Get raw query records (last N). */
115
+ getQueries(limit: number = 50): QueryRecord[] {
116
+ return this.data.queries.slice(-limit)
117
+ }
118
+
119
+ /** Compute context relevance: % of queries where top_score > threshold. */
120
+ getContextRelevance(): number {
121
+ if (this.data.queries.length === 0) return 0
122
+ const relevant = this.data.queries.filter((q) => q.top_score >= HIGH_RELEVANCE_THRESHOLD)
123
+ return relevant.length / this.data.queries.length
124
+ }
125
+
126
+ // ── Internal ──────────────────────────────────────────────────────────────
127
+
128
+ private computeSummary(): MetricsSummary {
129
+ const queries = this.data.queries
130
+ if (queries.length === 0) return this.emptySummary()
131
+
132
+ const totalResults = queries.reduce((sum, q) => sum + q.results_count, 0)
133
+ const zeroResults = queries.filter((q) => q.results_count === 0).length
134
+ const avgRelevance = queries.reduce((sum, q) => sum + q.avg_score, 0) / queries.length
135
+ const avgTopScore = queries.reduce((sum, q) => sum + q.top_score, 0) / queries.length
136
+
137
+ return {
138
+ total_queries: queries.length,
139
+ avg_results_per_query: totalResults / queries.length,
140
+ zero_results_rate: zeroResults / queries.length,
141
+ avg_relevance: Math.round(avgRelevance * 1000) / 1000,
142
+ avg_top_score: Math.round(avgTopScore * 1000) / 1000,
143
+ }
144
+ }
145
+
146
+ private emptySummary(): MetricsSummary {
147
+ return {
148
+ total_queries: 0,
149
+ avg_results_per_query: 0,
150
+ zero_results_rate: 0,
151
+ avg_relevance: 0,
152
+ avg_top_score: 0,
153
+ }
154
+ }
155
+ }
@@ -0,0 +1,81 @@
1
+ vectorizer:
2
+ # Enable/disable vectorizer functionality
3
+ enabled: true
4
+
5
+ # Auto-index files when they change (requires file-indexer plugin)
6
+ auto_index: true
7
+
8
+ # Embedding model
9
+ model: "Xenova/all-MiniLM-L6-v2"
10
+
11
+ # Debounce time in ms (wait before indexing after file change)
12
+ debounce_ms: 1000
13
+
14
+ # Content cleaning before chunking (v2)
15
+ cleaning:
16
+ remove_toc: true
17
+ remove_frontmatter_metadata: false
18
+ remove_imports: false
19
+ remove_comments: false
20
+
21
+ # Chunking strategy (v2)
22
+ chunking:
23
+ strategy: "semantic" # fixed | semantic
24
+ markdown:
25
+ split_by_headings: true
26
+ min_chunk_size: 200
27
+ max_chunk_size: 2000
28
+ preserve_heading_hierarchy: true
29
+ code:
30
+ split_by_functions: true
31
+ include_function_signature: true
32
+ min_chunk_size: 300
33
+ max_chunk_size: 1500
34
+ fixed:
35
+ max_chars: 1500
36
+
37
+ # Search configuration (v2)
38
+ search:
39
+ hybrid: false # Enable hybrid search (vector + BM25)
40
+ bm25_weight: 0.3 # BM25 weight in hybrid mode (0.0-1.0)
41
+
42
+ # Quality monitoring (v2)
43
+ quality:
44
+ enable_metrics: false # Track search quality metrics
45
+ enable_cache: true # LRU cache for query embeddings
46
+
47
+ # Indexes to maintain - each has pattern (what to include) and ignore (what to skip)
48
+ indexes:
49
+
50
+ # Documentation index - markdown, text files
51
+ docs:
52
+ enabled: true
53
+ pattern: "docs/**/*.{md,mdx,txt,rst,adoc}"
54
+ ignore: []
55
+
56
+ # Configuration index - yaml, json, toml
57
+ config:
58
+ enabled: false
59
+ pattern: "**/*.{yaml,yml,json,toml,ini}"
60
+ ignore:
61
+ - "**/node_modules/**"
62
+ - "**/.git/**"
63
+ - "**/dist/**"
64
+ - "**/build/**"
65
+ - "**/.opencode/**"
66
+ - "**/docs/**"
67
+ - "**/vendor/**"
68
+ - "**/__pycache__/**"
69
+ - "**/*.min.js"
70
+ - "**/*.bundle.js"
71
+ - "**/package-lock.json"
72
+ - "**/yarn.lock"
73
+
74
+ # Global exclude patterns (applied to ALL indexes, in addition to per-index ignore)
75
+ exclude:
76
+ - node_modules
77
+ - vendor
78
+ - dist
79
+ - build
80
+ - out
81
+ - __pycache__