agentikit 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,247 @@
1
+ import type { StashEntry } from "./metadata"
2
+
3
+ // ── Adapter Interface ───────────────────────────────────────────────────────
4
+
5
+ export interface ScoredEntry {
6
+ id: string
7
+ text: string
8
+ entry: StashEntry
9
+ path: string
10
+ }
11
+
12
+ export interface ScoredResult {
13
+ entry: StashEntry
14
+ path: string
15
+ score: number
16
+ }
17
+
18
+ export interface SearchAdapter {
19
+ buildIndex(entries: ScoredEntry[]): void
20
+ search(query: string, limit: number, typeFilter?: string): ScoredResult[]
21
+ }
22
+
23
+ // ── TF-IDF Implementation ───────────────────────────────────────────────────
24
+
25
+ interface TfIdfDocument {
26
+ entry: ScoredEntry
27
+ termFreqs: Map<string, number>
28
+ magnitude: number
29
+ }
30
+
31
+ interface SerializedTfIdf {
32
+ idf: Record<string, number>
33
+ docs: Array<{
34
+ id: string
35
+ termFreqs: Record<string, number>
36
+ magnitude: number
37
+ }>
38
+ }
39
+
40
+ export class TfIdfAdapter implements SearchAdapter {
41
+ private documents: TfIdfDocument[] = []
42
+ private idf: Map<string, number> = new Map()
43
+ private entries: ScoredEntry[] = []
44
+
45
+ buildIndex(entries: ScoredEntry[]): void {
46
+ this.entries = entries
47
+ const docCount = entries.length
48
+ if (docCount === 0) return
49
+
50
+ // Compute term frequencies per document
51
+ const docFreqs = new Map<string, number>()
52
+ this.documents = entries.map((entry) => {
53
+ const tokens = tokenize(entry.text)
54
+ const termFreqs = new Map<string, number>()
55
+
56
+ for (const token of tokens) {
57
+ termFreqs.set(token, (termFreqs.get(token) || 0) + 1)
58
+ }
59
+
60
+ // Track document frequency for IDF
61
+ for (const term of termFreqs.keys()) {
62
+ docFreqs.set(term, (docFreqs.get(term) || 0) + 1)
63
+ }
64
+
65
+ return { entry, termFreqs, magnitude: 0 }
66
+ })
67
+
68
+ // Compute IDF: log(N / df)
69
+ this.idf = new Map()
70
+ for (const [term, df] of docFreqs) {
71
+ this.idf.set(term, Math.log(docCount / df))
72
+ }
73
+
74
+ // Compute document magnitudes for cosine similarity
75
+ for (const doc of this.documents) {
76
+ let sumSq = 0
77
+ for (const [term, tf] of doc.termFreqs) {
78
+ const idf = this.idf.get(term) || 0
79
+ const tfidf = tf * idf
80
+ sumSq += tfidf * tfidf
81
+ }
82
+ doc.magnitude = Math.sqrt(sumSq)
83
+ }
84
+ }
85
+
86
+ search(query: string, limit: number, typeFilter?: string): ScoredResult[] {
87
+ if (this.documents.length === 0) return []
88
+
89
+ const queryTokens = tokenize(query.toLowerCase())
90
+ if (queryTokens.length === 0) {
91
+ // Empty query: return all, sorted by type
92
+ return this.documents
93
+ .filter((d) => !typeFilter || typeFilter === "any" || d.entry.entry.type === typeFilter)
94
+ .slice(0, limit)
95
+ .map((d) => ({
96
+ entry: d.entry.entry,
97
+ path: d.entry.path,
98
+ score: 1,
99
+ }))
100
+ }
101
+
102
+ // Build query TF-IDF vector
103
+ const queryTermFreqs = new Map<string, number>()
104
+ for (const token of queryTokens) {
105
+ queryTermFreqs.set(token, (queryTermFreqs.get(token) || 0) + 1)
106
+ }
107
+
108
+ let queryMagnitude = 0
109
+ const queryVector = new Map<string, number>()
110
+ for (const [term, tf] of queryTermFreqs) {
111
+ const idf = this.idf.get(term) || 0
112
+ const tfidf = tf * idf
113
+ queryVector.set(term, tfidf)
114
+ queryMagnitude += tfidf * tfidf
115
+ }
116
+ queryMagnitude = Math.sqrt(queryMagnitude)
117
+
118
+ if (queryMagnitude === 0) {
119
+ // All query terms are unknown — fallback to substring match
120
+ return this.substringFallback(query, limit, typeFilter)
121
+ }
122
+
123
+ const results: ScoredResult[] = []
124
+ const querySet = new Set(queryTokens)
125
+
126
+ for (const doc of this.documents) {
127
+ if (typeFilter && typeFilter !== "any" && doc.entry.entry.type !== typeFilter) continue
128
+
129
+ // Cosine similarity
130
+ let dotProduct = 0
131
+ for (const [term, queryTfidf] of queryVector) {
132
+ const docTf = doc.termFreqs.get(term) || 0
133
+ if (docTf === 0) continue
134
+ const docIdf = this.idf.get(term) || 0
135
+ dotProduct += queryTfidf * (docTf * docIdf)
136
+ }
137
+
138
+ let score = doc.magnitude > 0 && queryMagnitude > 0
139
+ ? dotProduct / (doc.magnitude * queryMagnitude)
140
+ : 0
141
+
142
+ // Boost: tag exact match
143
+ const tags = doc.entry.entry.tags || []
144
+ for (const tag of tags) {
145
+ if (querySet.has(tag.toLowerCase())) {
146
+ score += 0.15
147
+ }
148
+ }
149
+
150
+ // Boost: name contains query token
151
+ const nameLower = doc.entry.entry.name.toLowerCase().replace(/[-_]/g, " ")
152
+ for (const token of queryTokens) {
153
+ if (nameLower.includes(token)) {
154
+ score += 0.1
155
+ break
156
+ }
157
+ }
158
+
159
+ if (score > 0) {
160
+ results.push({
161
+ entry: doc.entry.entry,
162
+ path: doc.entry.path,
163
+ score: Math.round(score * 1000) / 1000,
164
+ })
165
+ }
166
+ }
167
+
168
+ results.sort((a, b) => b.score - a.score)
169
+ return results.slice(0, limit)
170
+ }
171
+
172
+ serialize(): SerializedTfIdf {
173
+ const idf: Record<string, number> = {}
174
+ for (const [term, val] of this.idf) {
175
+ idf[term] = val
176
+ }
177
+ const docs = this.documents.map((d) => {
178
+ const termFreqs: Record<string, number> = {}
179
+ for (const [term, tf] of d.termFreqs) {
180
+ termFreqs[term] = tf
181
+ }
182
+ return { id: d.entry.id, termFreqs, magnitude: d.magnitude }
183
+ })
184
+ return { idf, docs }
185
+ }
186
+
187
+ static deserialize(data: SerializedTfIdf, entries: ScoredEntry[]): TfIdfAdapter {
188
+ const adapter = new TfIdfAdapter()
189
+ adapter.entries = entries
190
+
191
+ adapter.idf = new Map(Object.entries(data.idf))
192
+
193
+ const entryMap = new Map(entries.map((e) => [e.id, e]))
194
+ adapter.documents = data.docs
195
+ .map((d) => {
196
+ const entry = entryMap.get(d.id)
197
+ if (!entry) return null
198
+ return {
199
+ entry,
200
+ termFreqs: new Map(Object.entries(d.termFreqs)),
201
+ magnitude: d.magnitude,
202
+ }
203
+ })
204
+ .filter((d): d is TfIdfDocument => d !== null)
205
+
206
+ return adapter
207
+ }
208
+
209
+ private substringFallback(query: string, limit: number, typeFilter?: string): ScoredResult[] {
210
+ const q = query.toLowerCase()
211
+ return this.documents
212
+ .filter((d) => {
213
+ if (typeFilter && typeFilter !== "any" && d.entry.entry.type !== typeFilter) return false
214
+ return d.entry.text.includes(q) || d.entry.entry.name.toLowerCase().includes(q)
215
+ })
216
+ .slice(0, limit)
217
+ .map((d) => ({
218
+ entry: d.entry.entry,
219
+ path: d.entry.path,
220
+ score: 0.5,
221
+ }))
222
+ }
223
+ }
224
+
225
+ // ── Tokenization ────────────────────────────────────────────────────────────
226
+
227
+ const STOP_WORDS = new Set([
228
+ "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
229
+ "have", "has", "had", "do", "does", "did", "will", "would", "could",
230
+ "should", "may", "might", "shall", "can", "need", "dare", "ought",
231
+ "to", "of", "in", "for", "on", "with", "at", "by", "from", "as",
232
+ "into", "through", "during", "before", "after", "above", "below",
233
+ "and", "but", "or", "nor", "not", "so", "yet", "both", "either",
234
+ "neither", "each", "every", "all", "any", "few", "more", "most",
235
+ "other", "some", "such", "no", "only", "own", "same", "than",
236
+ "too", "very", "just", "because", "if", "when", "where", "how",
237
+ "what", "which", "who", "whom", "this", "that", "these", "those",
238
+ "it", "its",
239
+ ])
240
+
241
+ function tokenize(text: string): string[] {
242
+ return text
243
+ .toLowerCase()
244
+ .replace(/[^a-z0-9]+/g, " ")
245
+ .split(/\s+/)
246
+ .filter((t) => t.length > 1 && !STOP_WORDS.has(t))
247
+ }