searchsocket 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,403 @@
1
+ type ScopeMode = "fixed" | "git" | "env";
2
+ type SourceMode = "static-output" | "crawl" | "content-files" | "build";
3
+ type EmbeddingProvider = "openai";
4
+ type RerankProvider = "none" | "jina";
5
+ interface SearchSocketConfig {
6
+ project?: {
7
+ id?: string;
8
+ baseUrl?: string;
9
+ };
10
+ scope?: {
11
+ mode?: ScopeMode;
12
+ fixed?: string;
13
+ envVar?: string;
14
+ sanitize?: boolean;
15
+ };
16
+ source?: {
17
+ mode?: SourceMode;
18
+ staticOutputDir?: string;
19
+ strictRouteMapping?: boolean;
20
+ crawl?: {
21
+ baseUrl: string;
22
+ routes?: string[];
23
+ sitemapUrl?: string;
24
+ };
25
+ contentFiles?: {
26
+ globs: string[];
27
+ baseDir?: string;
28
+ };
29
+ build?: {
30
+ outputDir?: string;
31
+ paramValues?: Record<string, string[]>;
32
+ exclude?: string[];
33
+ previewTimeout?: number;
34
+ };
35
+ };
36
+ extract?: {
37
+ mainSelector?: string;
38
+ dropTags?: string[];
39
+ dropSelectors?: string[];
40
+ ignoreAttr?: string;
41
+ noindexAttr?: string;
42
+ respectRobotsNoindex?: boolean;
43
+ };
44
+ transform?: {
45
+ output?: "markdown";
46
+ preserveCodeBlocks?: boolean;
47
+ preserveTables?: boolean;
48
+ };
49
+ chunking?: {
50
+ strategy?: "hybrid";
51
+ maxChars?: number;
52
+ overlapChars?: number;
53
+ minChars?: number;
54
+ headingPathDepth?: number;
55
+ dontSplitInside?: Array<"code" | "table" | "blockquote">;
56
+ prependTitle?: boolean;
57
+ pageSummaryChunk?: boolean;
58
+ };
59
+ embeddings?: {
60
+ provider?: EmbeddingProvider;
61
+ model?: string;
62
+ apiKeyEnv?: string;
63
+ batchSize?: number;
64
+ concurrency?: number;
65
+ pricePer1kTokens?: number;
66
+ };
67
+ vector?: {
68
+ dimension?: number;
69
+ turso?: {
70
+ urlEnv?: string;
71
+ authTokenEnv?: string;
72
+ localPath?: string;
73
+ };
74
+ };
75
+ rerank?: {
76
+ provider?: RerankProvider;
77
+ topN?: number;
78
+ jina?: {
79
+ apiKeyEnv?: string;
80
+ model?: string;
81
+ };
82
+ };
83
+ ranking?: {
84
+ enableIncomingLinkBoost?: boolean;
85
+ enableDepthBoost?: boolean;
86
+ pageWeights?: Record<string, number>;
87
+ aggregationCap?: number;
88
+ aggregationDecay?: number;
89
+ minChunkScoreRatio?: number;
90
+ weights?: {
91
+ incomingLinks?: number;
92
+ depth?: number;
93
+ rerank?: number;
94
+ aggregation?: number;
95
+ };
96
+ };
97
+ api?: {
98
+ path?: string;
99
+ cors?: {
100
+ allowOrigins?: string[];
101
+ };
102
+ rateLimit?: {
103
+ windowMs?: number;
104
+ max?: number;
105
+ };
106
+ };
107
+ mcp?: {
108
+ enable?: boolean;
109
+ transport?: "stdio" | "http";
110
+ http?: {
111
+ port?: number;
112
+ path?: string;
113
+ };
114
+ };
115
+ state?: {
116
+ dir?: string;
117
+ writeMirror?: boolean;
118
+ };
119
+ }
120
+ interface ResolvedSearchSocketConfig {
121
+ project: {
122
+ id: string;
123
+ baseUrl?: string;
124
+ };
125
+ scope: {
126
+ mode: ScopeMode;
127
+ fixed: string;
128
+ envVar: string;
129
+ sanitize: boolean;
130
+ };
131
+ source: {
132
+ mode: SourceMode;
133
+ staticOutputDir: string;
134
+ strictRouteMapping: boolean;
135
+ crawl?: {
136
+ baseUrl: string;
137
+ routes: string[];
138
+ sitemapUrl?: string;
139
+ };
140
+ contentFiles?: {
141
+ globs: string[];
142
+ baseDir: string;
143
+ };
144
+ build?: {
145
+ outputDir: string;
146
+ paramValues: Record<string, string[]>;
147
+ exclude: string[];
148
+ previewTimeout: number;
149
+ };
150
+ };
151
+ extract: {
152
+ mainSelector: string;
153
+ dropTags: string[];
154
+ dropSelectors: string[];
155
+ ignoreAttr: string;
156
+ noindexAttr: string;
157
+ respectRobotsNoindex: boolean;
158
+ };
159
+ transform: {
160
+ output: "markdown";
161
+ preserveCodeBlocks: boolean;
162
+ preserveTables: boolean;
163
+ };
164
+ chunking: {
165
+ strategy: "hybrid";
166
+ maxChars: number;
167
+ overlapChars: number;
168
+ minChars: number;
169
+ headingPathDepth: number;
170
+ dontSplitInside: Array<"code" | "table" | "blockquote">;
171
+ prependTitle: boolean;
172
+ pageSummaryChunk: boolean;
173
+ };
174
+ embeddings: {
175
+ provider: EmbeddingProvider;
176
+ model: string;
177
+ apiKeyEnv: string;
178
+ batchSize: number;
179
+ concurrency: number;
180
+ pricePer1kTokens?: number;
181
+ };
182
+ vector: {
183
+ dimension?: number;
184
+ turso: {
185
+ urlEnv: string;
186
+ authTokenEnv: string;
187
+ localPath: string;
188
+ };
189
+ };
190
+ rerank: {
191
+ provider: RerankProvider;
192
+ topN: number;
193
+ jina: {
194
+ apiKeyEnv: string;
195
+ model: string;
196
+ };
197
+ };
198
+ ranking: {
199
+ enableIncomingLinkBoost: boolean;
200
+ enableDepthBoost: boolean;
201
+ pageWeights: Record<string, number>;
202
+ aggregationCap: number;
203
+ aggregationDecay: number;
204
+ minChunkScoreRatio: number;
205
+ weights: {
206
+ incomingLinks: number;
207
+ depth: number;
208
+ rerank: number;
209
+ aggregation: number;
210
+ };
211
+ };
212
+ api: {
213
+ path: string;
214
+ cors: {
215
+ allowOrigins: string[];
216
+ };
217
+ rateLimit?: {
218
+ windowMs: number;
219
+ max: number;
220
+ };
221
+ };
222
+ mcp: {
223
+ enable: boolean;
224
+ transport: "stdio" | "http";
225
+ http: {
226
+ port: number;
227
+ path: string;
228
+ };
229
+ };
230
+ state: {
231
+ dir: string;
232
+ writeMirror: boolean;
233
+ };
234
+ }
235
+ interface Scope {
236
+ projectId: string;
237
+ scopeName: string;
238
+ scopeId: string;
239
+ }
240
+ interface Chunk {
241
+ chunkKey: string;
242
+ ordinal: number;
243
+ url: string;
244
+ path: string;
245
+ title: string;
246
+ sectionTitle?: string;
247
+ headingPath: string[];
248
+ chunkText: string;
249
+ snippet: string;
250
+ depth: number;
251
+ incomingLinks: number;
252
+ routeFile: string;
253
+ tags: string[];
254
+ contentHash: string;
255
+ }
256
+ interface VectorRecord {
257
+ id: string;
258
+ vector: number[];
259
+ metadata: {
260
+ projectId: string;
261
+ scopeName: string;
262
+ url: string;
263
+ path: string;
264
+ title: string;
265
+ sectionTitle: string;
266
+ headingPath: string[];
267
+ snippet: string;
268
+ contentHash: string;
269
+ modelId: string;
270
+ depth: number;
271
+ incomingLinks: number;
272
+ routeFile: string;
273
+ tags: string[];
274
+ };
275
+ }
276
+ interface QueryOpts {
277
+ topK: number;
278
+ pathPrefix?: string;
279
+ tags?: string[];
280
+ }
281
+ interface VectorHit {
282
+ id: string;
283
+ score: number;
284
+ metadata: VectorRecord["metadata"];
285
+ }
286
+ interface PageRecord {
287
+ url: string;
288
+ title: string;
289
+ markdown: string;
290
+ projectId: string;
291
+ scopeName: string;
292
+ routeFile: string;
293
+ routeResolution: "exact" | "best-effort";
294
+ incomingLinks: number;
295
+ outgoingLinks: number;
296
+ depth: number;
297
+ tags: string[];
298
+ indexedAt: string;
299
+ }
300
+ interface ScopeInfo {
301
+ projectId: string;
302
+ scopeName: string;
303
+ modelId: string;
304
+ lastIndexedAt: string;
305
+ vectorCount?: number;
306
+ lastEstimateTokens?: number;
307
+ lastEstimateCostUSD?: number;
308
+ lastEstimateChangedChunks?: number;
309
+ }
310
+ interface VectorStore {
311
+ upsert(records: VectorRecord[], scope: Scope): Promise<void>;
312
+ query(queryVector: number[], opts: QueryOpts, scope: Scope): Promise<VectorHit[]>;
313
+ deleteByIds(ids: string[], scope: Scope): Promise<void>;
314
+ deleteScope(scope: Scope): Promise<void>;
315
+ listScopes(scopeProjectId: string): Promise<ScopeInfo[]>;
316
+ recordScope(info: ScopeInfo): Promise<void>;
317
+ health(): Promise<{
318
+ ok: boolean;
319
+ details?: string;
320
+ }>;
321
+ getContentHashes(scope: Scope): Promise<Map<string, string>>;
322
+ upsertPages(pages: PageRecord[], scope: Scope): Promise<void>;
323
+ getPage(url: string, scope: Scope): Promise<PageRecord | null>;
324
+ deletePages(scope: Scope): Promise<void>;
325
+ getScopeModelId(scope: Scope): Promise<string | null>;
326
+ }
327
+ interface EmbeddingsProvider {
328
+ embedTexts(texts: string[], modelId: string): Promise<number[][]>;
329
+ estimateTokens(text: string): number;
330
+ }
331
+ interface RerankCandidate {
332
+ id: string;
333
+ text: string;
334
+ }
335
+ interface Reranker {
336
+ rerank(query: string, candidates: RerankCandidate[], topN?: number): Promise<Array<{
337
+ id: string;
338
+ score: number;
339
+ }>>;
340
+ }
341
+ interface SearchRequest {
342
+ q: string;
343
+ topK?: number;
344
+ scope?: string;
345
+ pathPrefix?: string;
346
+ tags?: string[];
347
+ rerank?: boolean;
348
+ groupBy?: "page" | "chunk";
349
+ }
350
+ interface SearchResultChunk {
351
+ sectionTitle?: string;
352
+ snippet: string;
353
+ headingPath: string[];
354
+ score: number;
355
+ }
356
+ interface SearchResult {
357
+ url: string;
358
+ title: string;
359
+ sectionTitle?: string;
360
+ snippet: string;
361
+ score: number;
362
+ routeFile: string;
363
+ chunks?: SearchResultChunk[];
364
+ }
365
+ interface SearchResponse {
366
+ q: string;
367
+ scope: string;
368
+ results: SearchResult[];
369
+ meta: {
370
+ timingsMs: {
371
+ embed: number;
372
+ vector: number;
373
+ rerank: number;
374
+ total: number;
375
+ };
376
+ usedRerank: boolean;
377
+ modelId: string;
378
+ };
379
+ }
380
+ interface IndexStats {
381
+ pagesProcessed: number;
382
+ chunksTotal: number;
383
+ chunksChanged: number;
384
+ newEmbeddings: number;
385
+ deletes: number;
386
+ estimatedTokens: number;
387
+ estimatedCostUSD: number;
388
+ routeExact: number;
389
+ routeBestEffort: number;
390
+ stageTimingsMs: Record<string, number>;
391
+ }
392
+ interface IndexOptions {
393
+ scopeOverride?: string;
394
+ changedOnly?: boolean;
395
+ force?: boolean;
396
+ dryRun?: boolean;
397
+ sourceOverride?: SourceMode;
398
+ maxPages?: number;
399
+ maxChunks?: number;
400
+ verbose?: boolean;
401
+ }
402
+
403
+ export type { Chunk as C, EmbeddingsProvider as E, IndexOptions as I, QueryOpts as Q, ResolvedSearchSocketConfig as R, SearchRequest as S, VectorStore as V, SearchResponse as a, SearchSocketConfig as b, Scope as c, Reranker as d, RerankCandidate as e, IndexStats as f, VectorHit as g, VectorRecord as h };
package/package.json ADDED
@@ -0,0 +1,86 @@
1
+ {
2
+ "name": "searchsocket",
3
+ "version": "0.2.0",
4
+ "description": "Semantic site search and MCP retrieval for SvelteKit static sites",
5
+ "license": "MIT",
6
+ "author": "Greg Priday <greg@siteorigin.com>",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "https://github.com/gregpriday/searchsocket.git"
10
+ },
11
+ "homepage": "https://github.com/gregpriday/searchsocket",
12
+ "bugs": {
13
+ "url": "https://github.com/gregpriday/searchsocket/issues"
14
+ },
15
+ "keywords": [
16
+ "search",
17
+ "semantic-search",
18
+ "sveltekit",
19
+ "mcp",
20
+ "embeddings",
21
+ "vector-search",
22
+ "site-search",
23
+ "static-site"
24
+ ],
25
+ "type": "module",
26
+ "files": [
27
+ "dist",
28
+ "README.md"
29
+ ],
30
+ "bin": {
31
+ "searchsocket": "dist/cli.js"
32
+ },
33
+ "exports": {
34
+ ".": {
35
+ "types": "./dist/index.d.ts",
36
+ "import": "./dist/index.js",
37
+ "require": "./dist/index.cjs"
38
+ },
39
+ "./sveltekit": {
40
+ "types": "./dist/sveltekit.d.ts",
41
+ "import": "./dist/sveltekit.js",
42
+ "require": "./dist/sveltekit.cjs"
43
+ },
44
+ "./client": {
45
+ "types": "./dist/client.d.ts",
46
+ "import": "./dist/client.js",
47
+ "require": "./dist/client.cjs"
48
+ }
49
+ },
50
+ "scripts": {
51
+ "build": "tsup",
52
+ "clean": "rm -rf dist",
53
+ "typecheck": "tsc --noEmit",
54
+ "test": "vitest run",
55
+ "test:watch": "vitest"
56
+ },
57
+ "engines": {
58
+ "node": ">=20"
59
+ },
60
+ "packageManager": "pnpm@10.29.2",
61
+ "dependencies": {
62
+ "@libsql/client": "^0.17.0",
63
+ "@modelcontextprotocol/sdk": "^1.26.0",
64
+ "cheerio": "^1.2.0",
65
+ "chokidar": "^5.0.0",
66
+ "commander": "^14.0.3",
67
+ "dotenv": "^17.3.1",
68
+ "express": "^5.2.1",
69
+ "fast-glob": "^3.3.3",
70
+ "gray-matter": "^4.0.3",
71
+ "jiti": "^2.6.1",
72
+ "openai": "^6.19.0",
73
+ "p-limit": "^7.3.0",
74
+ "turndown": "^7.2.2",
75
+ "turndown-plugin-gfm": "^1.0.2",
76
+ "zod": "^4.3.6"
77
+ },
78
+ "devDependencies": {
79
+ "@types/express": "^5.0.6",
80
+ "@types/node": "^25.2.2",
81
+ "@types/turndown": "^5.0.6",
82
+ "tsup": "^8.5.1",
83
+ "typescript": "^5.9.3",
84
+ "vitest": "^4.0.18"
85
+ }
86
+ }