searchsocket 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,403 @@
1
+ type ScopeMode = "fixed" | "git" | "env";
2
+ type SourceMode = "static-output" | "crawl" | "content-files" | "build";
3
+ type EmbeddingProvider = "openai";
4
+ type RerankProvider = "none" | "jina";
5
+ interface SearchSocketConfig {
6
+ project?: {
7
+ id?: string;
8
+ baseUrl?: string;
9
+ };
10
+ scope?: {
11
+ mode?: ScopeMode;
12
+ fixed?: string;
13
+ envVar?: string;
14
+ sanitize?: boolean;
15
+ };
16
+ source?: {
17
+ mode?: SourceMode;
18
+ staticOutputDir?: string;
19
+ strictRouteMapping?: boolean;
20
+ crawl?: {
21
+ baseUrl: string;
22
+ routes?: string[];
23
+ sitemapUrl?: string;
24
+ };
25
+ contentFiles?: {
26
+ globs: string[];
27
+ baseDir?: string;
28
+ };
29
+ build?: {
30
+ outputDir?: string;
31
+ paramValues?: Record<string, string[]>;
32
+ exclude?: string[];
33
+ previewTimeout?: number;
34
+ };
35
+ };
36
+ extract?: {
37
+ mainSelector?: string;
38
+ dropTags?: string[];
39
+ dropSelectors?: string[];
40
+ ignoreAttr?: string;
41
+ noindexAttr?: string;
42
+ respectRobotsNoindex?: boolean;
43
+ };
44
+ transform?: {
45
+ output?: "markdown";
46
+ preserveCodeBlocks?: boolean;
47
+ preserveTables?: boolean;
48
+ };
49
+ chunking?: {
50
+ strategy?: "hybrid";
51
+ maxChars?: number;
52
+ overlapChars?: number;
53
+ minChars?: number;
54
+ headingPathDepth?: number;
55
+ dontSplitInside?: Array<"code" | "table" | "blockquote">;
56
+ prependTitle?: boolean;
57
+ pageSummaryChunk?: boolean;
58
+ };
59
+ embeddings?: {
60
+ provider?: EmbeddingProvider;
61
+ model?: string;
62
+ apiKeyEnv?: string;
63
+ batchSize?: number;
64
+ concurrency?: number;
65
+ pricePer1kTokens?: number;
66
+ };
67
+ vector?: {
68
+ dimension?: number;
69
+ turso?: {
70
+ urlEnv?: string;
71
+ authTokenEnv?: string;
72
+ localPath?: string;
73
+ };
74
+ };
75
+ rerank?: {
76
+ provider?: RerankProvider;
77
+ topN?: number;
78
+ jina?: {
79
+ apiKeyEnv?: string;
80
+ model?: string;
81
+ };
82
+ };
83
+ ranking?: {
84
+ enableIncomingLinkBoost?: boolean;
85
+ enableDepthBoost?: boolean;
86
+ pageWeights?: Record<string, number>;
87
+ aggregationCap?: number;
88
+ aggregationDecay?: number;
89
+ minChunkScoreRatio?: number;
90
+ weights?: {
91
+ incomingLinks?: number;
92
+ depth?: number;
93
+ rerank?: number;
94
+ aggregation?: number;
95
+ };
96
+ };
97
+ api?: {
98
+ path?: string;
99
+ cors?: {
100
+ allowOrigins?: string[];
101
+ };
102
+ rateLimit?: {
103
+ windowMs?: number;
104
+ max?: number;
105
+ };
106
+ };
107
+ mcp?: {
108
+ enable?: boolean;
109
+ transport?: "stdio" | "http";
110
+ http?: {
111
+ port?: number;
112
+ path?: string;
113
+ };
114
+ };
115
+ state?: {
116
+ dir?: string;
117
+ writeMirror?: boolean;
118
+ };
119
+ }
120
+ interface ResolvedSearchSocketConfig {
121
+ project: {
122
+ id: string;
123
+ baseUrl?: string;
124
+ };
125
+ scope: {
126
+ mode: ScopeMode;
127
+ fixed: string;
128
+ envVar: string;
129
+ sanitize: boolean;
130
+ };
131
+ source: {
132
+ mode: SourceMode;
133
+ staticOutputDir: string;
134
+ strictRouteMapping: boolean;
135
+ crawl?: {
136
+ baseUrl: string;
137
+ routes: string[];
138
+ sitemapUrl?: string;
139
+ };
140
+ contentFiles?: {
141
+ globs: string[];
142
+ baseDir: string;
143
+ };
144
+ build?: {
145
+ outputDir: string;
146
+ paramValues: Record<string, string[]>;
147
+ exclude: string[];
148
+ previewTimeout: number;
149
+ };
150
+ };
151
+ extract: {
152
+ mainSelector: string;
153
+ dropTags: string[];
154
+ dropSelectors: string[];
155
+ ignoreAttr: string;
156
+ noindexAttr: string;
157
+ respectRobotsNoindex: boolean;
158
+ };
159
+ transform: {
160
+ output: "markdown";
161
+ preserveCodeBlocks: boolean;
162
+ preserveTables: boolean;
163
+ };
164
+ chunking: {
165
+ strategy: "hybrid";
166
+ maxChars: number;
167
+ overlapChars: number;
168
+ minChars: number;
169
+ headingPathDepth: number;
170
+ dontSplitInside: Array<"code" | "table" | "blockquote">;
171
+ prependTitle: boolean;
172
+ pageSummaryChunk: boolean;
173
+ };
174
+ embeddings: {
175
+ provider: EmbeddingProvider;
176
+ model: string;
177
+ apiKeyEnv: string;
178
+ batchSize: number;
179
+ concurrency: number;
180
+ pricePer1kTokens?: number;
181
+ };
182
+ vector: {
183
+ dimension?: number;
184
+ turso: {
185
+ urlEnv: string;
186
+ authTokenEnv: string;
187
+ localPath: string;
188
+ };
189
+ };
190
+ rerank: {
191
+ provider: RerankProvider;
192
+ topN: number;
193
+ jina: {
194
+ apiKeyEnv: string;
195
+ model: string;
196
+ };
197
+ };
198
+ ranking: {
199
+ enableIncomingLinkBoost: boolean;
200
+ enableDepthBoost: boolean;
201
+ pageWeights: Record<string, number>;
202
+ aggregationCap: number;
203
+ aggregationDecay: number;
204
+ minChunkScoreRatio: number;
205
+ weights: {
206
+ incomingLinks: number;
207
+ depth: number;
208
+ rerank: number;
209
+ aggregation: number;
210
+ };
211
+ };
212
+ api: {
213
+ path: string;
214
+ cors: {
215
+ allowOrigins: string[];
216
+ };
217
+ rateLimit?: {
218
+ windowMs: number;
219
+ max: number;
220
+ };
221
+ };
222
+ mcp: {
223
+ enable: boolean;
224
+ transport: "stdio" | "http";
225
+ http: {
226
+ port: number;
227
+ path: string;
228
+ };
229
+ };
230
+ state: {
231
+ dir: string;
232
+ writeMirror: boolean;
233
+ };
234
+ }
235
+ interface Scope {
236
+ projectId: string;
237
+ scopeName: string;
238
+ scopeId: string;
239
+ }
240
+ interface Chunk {
241
+ chunkKey: string;
242
+ ordinal: number;
243
+ url: string;
244
+ path: string;
245
+ title: string;
246
+ sectionTitle?: string;
247
+ headingPath: string[];
248
+ chunkText: string;
249
+ snippet: string;
250
+ depth: number;
251
+ incomingLinks: number;
252
+ routeFile: string;
253
+ tags: string[];
254
+ contentHash: string;
255
+ }
256
+ interface VectorRecord {
257
+ id: string;
258
+ vector: number[];
259
+ metadata: {
260
+ projectId: string;
261
+ scopeName: string;
262
+ url: string;
263
+ path: string;
264
+ title: string;
265
+ sectionTitle: string;
266
+ headingPath: string[];
267
+ snippet: string;
268
+ contentHash: string;
269
+ modelId: string;
270
+ depth: number;
271
+ incomingLinks: number;
272
+ routeFile: string;
273
+ tags: string[];
274
+ };
275
+ }
276
+ interface QueryOpts {
277
+ topK: number;
278
+ pathPrefix?: string;
279
+ tags?: string[];
280
+ }
281
+ interface VectorHit {
282
+ id: string;
283
+ score: number;
284
+ metadata: VectorRecord["metadata"];
285
+ }
286
+ interface PageRecord {
287
+ url: string;
288
+ title: string;
289
+ markdown: string;
290
+ projectId: string;
291
+ scopeName: string;
292
+ routeFile: string;
293
+ routeResolution: "exact" | "best-effort";
294
+ incomingLinks: number;
295
+ outgoingLinks: number;
296
+ depth: number;
297
+ tags: string[];
298
+ indexedAt: string;
299
+ }
300
+ interface ScopeInfo {
301
+ projectId: string;
302
+ scopeName: string;
303
+ modelId: string;
304
+ lastIndexedAt: string;
305
+ vectorCount?: number;
306
+ lastEstimateTokens?: number;
307
+ lastEstimateCostUSD?: number;
308
+ lastEstimateChangedChunks?: number;
309
+ }
310
+ interface VectorStore {
311
+ upsert(records: VectorRecord[], scope: Scope): Promise<void>;
312
+ query(queryVector: number[], opts: QueryOpts, scope: Scope): Promise<VectorHit[]>;
313
+ deleteByIds(ids: string[], scope: Scope): Promise<void>;
314
+ deleteScope(scope: Scope): Promise<void>;
315
+ listScopes(scopeProjectId: string): Promise<ScopeInfo[]>;
316
+ recordScope(info: ScopeInfo): Promise<void>;
317
+ health(): Promise<{
318
+ ok: boolean;
319
+ details?: string;
320
+ }>;
321
+ getContentHashes(scope: Scope): Promise<Map<string, string>>;
322
+ upsertPages(pages: PageRecord[], scope: Scope): Promise<void>;
323
+ getPage(url: string, scope: Scope): Promise<PageRecord | null>;
324
+ deletePages(scope: Scope): Promise<void>;
325
+ getScopeModelId(scope: Scope): Promise<string | null>;
326
+ }
327
+ interface EmbeddingsProvider {
328
+ embedTexts(texts: string[], modelId: string): Promise<number[][]>;
329
+ estimateTokens(text: string): number;
330
+ }
331
+ interface RerankCandidate {
332
+ id: string;
333
+ text: string;
334
+ }
335
+ interface Reranker {
336
+ rerank(query: string, candidates: RerankCandidate[], topN?: number): Promise<Array<{
337
+ id: string;
338
+ score: number;
339
+ }>>;
340
+ }
341
+ interface SearchRequest {
342
+ q: string;
343
+ topK?: number;
344
+ scope?: string;
345
+ pathPrefix?: string;
346
+ tags?: string[];
347
+ rerank?: boolean;
348
+ groupBy?: "page" | "chunk";
349
+ }
350
+ interface SearchResultChunk {
351
+ sectionTitle?: string;
352
+ snippet: string;
353
+ headingPath: string[];
354
+ score: number;
355
+ }
356
+ interface SearchResult {
357
+ url: string;
358
+ title: string;
359
+ sectionTitle?: string;
360
+ snippet: string;
361
+ score: number;
362
+ routeFile: string;
363
+ chunks?: SearchResultChunk[];
364
+ }
365
+ interface SearchResponse {
366
+ q: string;
367
+ scope: string;
368
+ results: SearchResult[];
369
+ meta: {
370
+ timingsMs: {
371
+ embed: number;
372
+ vector: number;
373
+ rerank: number;
374
+ total: number;
375
+ };
376
+ usedRerank: boolean;
377
+ modelId: string;
378
+ };
379
+ }
380
+ interface IndexStats {
381
+ pagesProcessed: number;
382
+ chunksTotal: number;
383
+ chunksChanged: number;
384
+ newEmbeddings: number;
385
+ deletes: number;
386
+ estimatedTokens: number;
387
+ estimatedCostUSD: number;
388
+ routeExact: number;
389
+ routeBestEffort: number;
390
+ stageTimingsMs: Record<string, number>;
391
+ }
392
+ interface IndexOptions {
393
+ scopeOverride?: string;
394
+ changedOnly?: boolean;
395
+ force?: boolean;
396
+ dryRun?: boolean;
397
+ sourceOverride?: SourceMode;
398
+ maxPages?: number;
399
+ maxChunks?: number;
400
+ verbose?: boolean;
401
+ }
402
+
403
+ export type { Chunk as C, EmbeddingsProvider as E, IndexOptions as I, QueryOpts as Q, ResolvedSearchSocketConfig as R, SearchRequest as S, VectorStore as V, SearchResponse as a, SearchSocketConfig as b, Scope as c, Reranker as d, RerankCandidate as e, IndexStats as f, VectorHit as g, VectorRecord as h };