searchsocket 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,5 @@
1
1
  type ScopeMode = "fixed" | "git" | "env";
2
2
  type SourceMode = "static-output" | "crawl" | "content-files" | "build";
3
- type EmbeddingProvider = "jina";
4
3
  interface SearchSocketConfig {
5
4
  project?: {
6
5
  id?: string;
@@ -61,29 +60,18 @@ interface SearchSocketConfig {
61
60
  prependTitle?: boolean;
62
61
  pageSummaryChunk?: boolean;
63
62
  };
64
- embeddings?: {
65
- provider?: EmbeddingProvider;
66
- model?: string;
67
- apiKey?: string;
68
- apiKeyEnv?: string;
69
- batchSize?: number;
70
- concurrency?: number;
71
- pricePer1kTokens?: number;
63
+ upstash?: {
64
+ url?: string;
65
+ token?: string;
66
+ urlEnv?: string;
67
+ tokenEnv?: string;
72
68
  };
73
- vector?: {
74
- dimension?: number;
75
- turso?: {
76
- url?: string;
77
- authToken?: string;
78
- urlEnv?: string;
79
- authTokenEnv?: string;
80
- localPath?: string;
81
- };
82
- };
83
- rerank?: {
84
- enabled?: boolean;
85
- topN?: number;
86
- model?: string;
69
+ search?: {
70
+ semanticWeight?: number;
71
+ inputEnrichment?: boolean;
72
+ reranking?: boolean;
73
+ dualSearch?: boolean;
74
+ pageSearchWeight?: number;
87
75
  };
88
76
  ranking?: {
89
77
  enableIncomingLinkBoost?: boolean;
@@ -93,11 +81,12 @@ interface SearchSocketConfig {
93
81
  aggregationDecay?: number;
94
82
  minChunkScoreRatio?: number;
95
83
  minScore?: number;
84
+ scoreGapThreshold?: number;
96
85
  weights?: {
97
86
  incomingLinks?: number;
98
87
  depth?: number;
99
- rerank?: number;
100
88
  aggregation?: number;
89
+ titleMatch?: number;
101
90
  };
102
91
  };
103
92
  api?: {
@@ -120,7 +109,6 @@ interface SearchSocketConfig {
120
109
  };
121
110
  state?: {
122
111
  dir?: string;
123
- writeMirror?: boolean;
124
112
  };
125
113
  }
126
114
  interface ResolvedSearchSocketConfig {
@@ -183,29 +171,18 @@ interface ResolvedSearchSocketConfig {
183
171
  prependTitle: boolean;
184
172
  pageSummaryChunk: boolean;
185
173
  };
186
- embeddings: {
187
- provider: EmbeddingProvider;
188
- model: string;
189
- apiKey?: string;
190
- apiKeyEnv: string;
191
- batchSize: number;
192
- concurrency: number;
193
- pricePer1kTokens?: number;
174
+ upstash: {
175
+ url?: string;
176
+ token?: string;
177
+ urlEnv: string;
178
+ tokenEnv: string;
194
179
  };
195
- vector: {
196
- dimension?: number;
197
- turso: {
198
- url?: string;
199
- authToken?: string;
200
- urlEnv: string;
201
- authTokenEnv: string;
202
- localPath: string;
203
- };
204
- };
205
- rerank: {
206
- enabled: boolean;
207
- topN: number;
208
- model: string;
180
+ search: {
181
+ semanticWeight: number;
182
+ inputEnrichment: boolean;
183
+ reranking: boolean;
184
+ dualSearch: boolean;
185
+ pageSearchWeight: number;
209
186
  };
210
187
  ranking: {
211
188
  enableIncomingLinkBoost: boolean;
@@ -215,11 +192,12 @@ interface ResolvedSearchSocketConfig {
215
192
  aggregationDecay: number;
216
193
  minChunkScoreRatio: number;
217
194
  minScore: number;
195
+ scoreGapThreshold: number;
218
196
  weights: {
219
197
  incomingLinks: number;
220
198
  depth: number;
221
- rerank: number;
222
199
  aggregation: number;
200
+ titleMatch: number;
223
201
  };
224
202
  };
225
203
  api: {
@@ -242,7 +220,6 @@ interface ResolvedSearchSocketConfig {
242
220
  };
243
221
  state: {
244
222
  dir: string;
245
- writeMirror: boolean;
246
223
  };
247
224
  }
248
225
  interface Scope {
@@ -268,9 +245,9 @@ interface Chunk {
268
245
  description?: string;
269
246
  keywords?: string[];
270
247
  }
271
- interface VectorRecord {
248
+ interface VectorHit {
272
249
  id: string;
273
- vector: number[];
250
+ score: number;
274
251
  metadata: {
275
252
  projectId: string;
276
253
  scopeName: string;
@@ -283,7 +260,6 @@ interface VectorRecord {
283
260
  chunkText: string;
284
261
  ordinal: number;
285
262
  contentHash: string;
286
- modelId: string;
287
263
  depth: number;
288
264
  incomingLinks: number;
289
265
  routeFile: string;
@@ -292,16 +268,6 @@ interface VectorRecord {
292
268
  keywords?: string[];
293
269
  };
294
270
  }
295
- interface QueryOpts {
296
- topK: number;
297
- pathPrefix?: string;
298
- tags?: string[];
299
- }
300
- interface VectorHit {
301
- id: string;
302
- score: number;
303
- metadata: VectorRecord["metadata"];
304
- }
305
271
  interface PageRecord {
306
272
  url: string;
307
273
  title: string;
@@ -315,48 +281,26 @@ interface PageRecord {
315
281
  depth: number;
316
282
  tags: string[];
317
283
  indexedAt: string;
284
+ summary?: string;
285
+ description?: string;
286
+ keywords?: string[];
287
+ }
288
+ interface PageHit {
289
+ id: string;
290
+ score: number;
291
+ title: string;
292
+ url: string;
293
+ description: string;
294
+ tags: string[];
295
+ depth: number;
296
+ incomingLinks: number;
297
+ routeFile: string;
318
298
  }
319
299
  interface ScopeInfo {
320
300
  projectId: string;
321
301
  scopeName: string;
322
- modelId: string;
323
302
  lastIndexedAt: string;
324
- vectorCount?: number;
325
- lastEstimateTokens?: number;
326
- lastEstimateCostUSD?: number;
327
- lastEstimateChangedChunks?: number;
328
- }
329
- interface VectorStore {
330
- upsert(records: VectorRecord[], scope: Scope): Promise<void>;
331
- query(queryVector: number[], opts: QueryOpts, scope: Scope): Promise<VectorHit[]>;
332
- deleteByIds(ids: string[], scope: Scope): Promise<void>;
333
- deleteScope(scope: Scope): Promise<void>;
334
- listScopes(scopeProjectId: string): Promise<ScopeInfo[]>;
335
- recordScope(info: ScopeInfo): Promise<void>;
336
- health(): Promise<{
337
- ok: boolean;
338
- details?: string;
339
- }>;
340
- getContentHashes(scope: Scope): Promise<Map<string, string>>;
341
- upsertPages(pages: PageRecord[], scope: Scope): Promise<void>;
342
- getPage(url: string, scope: Scope): Promise<PageRecord | null>;
343
- deletePages(scope: Scope): Promise<void>;
344
- getScopeModelId(scope: Scope): Promise<string | null>;
345
- dropAllTables(): Promise<void>;
346
- }
347
- interface EmbeddingsProvider {
348
- embedTexts(texts: string[], modelId: string, task?: string): Promise<number[][]>;
349
- estimateTokens(text: string): number;
350
- }
351
- interface RerankCandidate {
352
- id: string;
353
- text: string;
354
- }
355
- interface Reranker {
356
- rerank(query: string, candidates: RerankCandidate[], topN?: number): Promise<Array<{
357
- id: string;
358
- score: number;
359
- }>>;
303
+ documentCount?: number;
360
304
  }
361
305
  interface SearchRequest {
362
306
  q: string;
@@ -364,9 +308,7 @@ interface SearchRequest {
364
308
  scope?: string;
365
309
  pathPrefix?: string;
366
310
  tags?: string[];
367
- rerank?: boolean;
368
311
  groupBy?: "page" | "chunk";
369
- stream?: boolean;
370
312
  }
371
313
  interface SearchResultChunk {
372
314
  sectionTitle?: string;
@@ -389,23 +331,17 @@ interface SearchResponse {
389
331
  results: SearchResult[];
390
332
  meta: {
391
333
  timingsMs: {
392
- embed: number;
393
- vector: number;
394
- rerank: number;
334
+ search: number;
395
335
  total: number;
396
336
  };
397
- usedRerank: boolean;
398
- modelId: string;
399
337
  };
400
338
  }
401
339
  interface IndexStats {
402
340
  pagesProcessed: number;
403
341
  chunksTotal: number;
404
342
  chunksChanged: number;
405
- newEmbeddings: number;
343
+ documentsUpserted: number;
406
344
  deletes: number;
407
- estimatedTokens: number;
408
- estimatedCostUSD: number;
409
345
  routeExact: number;
410
346
  routeBestEffort: number;
411
347
  stageTimingsMs: Record<string, number>;
@@ -420,36 +356,5 @@ interface IndexOptions {
420
356
  maxChunks?: number;
421
357
  verbose?: boolean;
422
358
  }
423
- interface StreamSearchEvent {
424
- phase: "initial" | "reranked";
425
- data: SearchResponse;
426
- }
427
- interface StreamSearchErrorEvent {
428
- phase: "error";
429
- data: {
430
- error: {
431
- code: string;
432
- message: string;
433
- };
434
- };
435
- }
436
- type StreamEvent = StreamSearchEvent | StreamSearchErrorEvent;
437
- interface MergeSearchOptions {
438
- /**
439
- * If any single result moved more than this many positions, adopt
440
- * the reranked order. The reranker is semantic — if it strongly
441
- * disagrees with vector similarity on even one result, trust it.
442
- * @default 3
443
- */
444
- maxDisplacement?: number;
445
- }
446
- interface MergeSearchResult {
447
- response: SearchResponse;
448
- usedRerankedOrder: boolean;
449
- displacements: Array<{
450
- url: string;
451
- displacement: number;
452
- }>;
453
- }
454
359
 
455
- export type { Chunk as C, EmbeddingsProvider as E, IndexOptions as I, MergeSearchOptions as M, QueryOpts as Q, ResolvedSearchSocketConfig as R, SearchResponse as S, VectorStore as V, MergeSearchResult as a, SearchRequest as b, StreamSearchEvent as c, SearchSocketConfig as d, Scope as e, Reranker as f, RerankCandidate as g, IndexStats as h, StreamEvent as i, StreamSearchErrorEvent as j, VectorHit as k, VectorRecord as l };
360
+ export type { Chunk as C, IndexOptions as I, PageHit as P, ResolvedSearchSocketConfig as R, SearchRequest as S, VectorHit as V, SearchResponse as a, SearchResult as b, SearchSocketConfig as c, Scope as d, ScopeInfo as e, PageRecord as f, IndexStats as g };
@@ -1,6 +1,5 @@
1
1
  type ScopeMode = "fixed" | "git" | "env";
2
2
  type SourceMode = "static-output" | "crawl" | "content-files" | "build";
3
- type EmbeddingProvider = "jina";
4
3
  interface SearchSocketConfig {
5
4
  project?: {
6
5
  id?: string;
@@ -61,29 +60,18 @@ interface SearchSocketConfig {
61
60
  prependTitle?: boolean;
62
61
  pageSummaryChunk?: boolean;
63
62
  };
64
- embeddings?: {
65
- provider?: EmbeddingProvider;
66
- model?: string;
67
- apiKey?: string;
68
- apiKeyEnv?: string;
69
- batchSize?: number;
70
- concurrency?: number;
71
- pricePer1kTokens?: number;
63
+ upstash?: {
64
+ url?: string;
65
+ token?: string;
66
+ urlEnv?: string;
67
+ tokenEnv?: string;
72
68
  };
73
- vector?: {
74
- dimension?: number;
75
- turso?: {
76
- url?: string;
77
- authToken?: string;
78
- urlEnv?: string;
79
- authTokenEnv?: string;
80
- localPath?: string;
81
- };
82
- };
83
- rerank?: {
84
- enabled?: boolean;
85
- topN?: number;
86
- model?: string;
69
+ search?: {
70
+ semanticWeight?: number;
71
+ inputEnrichment?: boolean;
72
+ reranking?: boolean;
73
+ dualSearch?: boolean;
74
+ pageSearchWeight?: number;
87
75
  };
88
76
  ranking?: {
89
77
  enableIncomingLinkBoost?: boolean;
@@ -93,11 +81,12 @@ interface SearchSocketConfig {
93
81
  aggregationDecay?: number;
94
82
  minChunkScoreRatio?: number;
95
83
  minScore?: number;
84
+ scoreGapThreshold?: number;
96
85
  weights?: {
97
86
  incomingLinks?: number;
98
87
  depth?: number;
99
- rerank?: number;
100
88
  aggregation?: number;
89
+ titleMatch?: number;
101
90
  };
102
91
  };
103
92
  api?: {
@@ -120,7 +109,6 @@ interface SearchSocketConfig {
120
109
  };
121
110
  state?: {
122
111
  dir?: string;
123
- writeMirror?: boolean;
124
112
  };
125
113
  }
126
114
  interface ResolvedSearchSocketConfig {
@@ -183,29 +171,18 @@ interface ResolvedSearchSocketConfig {
183
171
  prependTitle: boolean;
184
172
  pageSummaryChunk: boolean;
185
173
  };
186
- embeddings: {
187
- provider: EmbeddingProvider;
188
- model: string;
189
- apiKey?: string;
190
- apiKeyEnv: string;
191
- batchSize: number;
192
- concurrency: number;
193
- pricePer1kTokens?: number;
174
+ upstash: {
175
+ url?: string;
176
+ token?: string;
177
+ urlEnv: string;
178
+ tokenEnv: string;
194
179
  };
195
- vector: {
196
- dimension?: number;
197
- turso: {
198
- url?: string;
199
- authToken?: string;
200
- urlEnv: string;
201
- authTokenEnv: string;
202
- localPath: string;
203
- };
204
- };
205
- rerank: {
206
- enabled: boolean;
207
- topN: number;
208
- model: string;
180
+ search: {
181
+ semanticWeight: number;
182
+ inputEnrichment: boolean;
183
+ reranking: boolean;
184
+ dualSearch: boolean;
185
+ pageSearchWeight: number;
209
186
  };
210
187
  ranking: {
211
188
  enableIncomingLinkBoost: boolean;
@@ -215,11 +192,12 @@ interface ResolvedSearchSocketConfig {
215
192
  aggregationDecay: number;
216
193
  minChunkScoreRatio: number;
217
194
  minScore: number;
195
+ scoreGapThreshold: number;
218
196
  weights: {
219
197
  incomingLinks: number;
220
198
  depth: number;
221
- rerank: number;
222
199
  aggregation: number;
200
+ titleMatch: number;
223
201
  };
224
202
  };
225
203
  api: {
@@ -242,7 +220,6 @@ interface ResolvedSearchSocketConfig {
242
220
  };
243
221
  state: {
244
222
  dir: string;
245
- writeMirror: boolean;
246
223
  };
247
224
  }
248
225
  interface Scope {
@@ -268,9 +245,9 @@ interface Chunk {
268
245
  description?: string;
269
246
  keywords?: string[];
270
247
  }
271
- interface VectorRecord {
248
+ interface VectorHit {
272
249
  id: string;
273
- vector: number[];
250
+ score: number;
274
251
  metadata: {
275
252
  projectId: string;
276
253
  scopeName: string;
@@ -283,7 +260,6 @@ interface VectorRecord {
283
260
  chunkText: string;
284
261
  ordinal: number;
285
262
  contentHash: string;
286
- modelId: string;
287
263
  depth: number;
288
264
  incomingLinks: number;
289
265
  routeFile: string;
@@ -292,16 +268,6 @@ interface VectorRecord {
292
268
  keywords?: string[];
293
269
  };
294
270
  }
295
- interface QueryOpts {
296
- topK: number;
297
- pathPrefix?: string;
298
- tags?: string[];
299
- }
300
- interface VectorHit {
301
- id: string;
302
- score: number;
303
- metadata: VectorRecord["metadata"];
304
- }
305
271
  interface PageRecord {
306
272
  url: string;
307
273
  title: string;
@@ -315,48 +281,26 @@ interface PageRecord {
315
281
  depth: number;
316
282
  tags: string[];
317
283
  indexedAt: string;
284
+ summary?: string;
285
+ description?: string;
286
+ keywords?: string[];
287
+ }
288
+ interface PageHit {
289
+ id: string;
290
+ score: number;
291
+ title: string;
292
+ url: string;
293
+ description: string;
294
+ tags: string[];
295
+ depth: number;
296
+ incomingLinks: number;
297
+ routeFile: string;
318
298
  }
319
299
  interface ScopeInfo {
320
300
  projectId: string;
321
301
  scopeName: string;
322
- modelId: string;
323
302
  lastIndexedAt: string;
324
- vectorCount?: number;
325
- lastEstimateTokens?: number;
326
- lastEstimateCostUSD?: number;
327
- lastEstimateChangedChunks?: number;
328
- }
329
- interface VectorStore {
330
- upsert(records: VectorRecord[], scope: Scope): Promise<void>;
331
- query(queryVector: number[], opts: QueryOpts, scope: Scope): Promise<VectorHit[]>;
332
- deleteByIds(ids: string[], scope: Scope): Promise<void>;
333
- deleteScope(scope: Scope): Promise<void>;
334
- listScopes(scopeProjectId: string): Promise<ScopeInfo[]>;
335
- recordScope(info: ScopeInfo): Promise<void>;
336
- health(): Promise<{
337
- ok: boolean;
338
- details?: string;
339
- }>;
340
- getContentHashes(scope: Scope): Promise<Map<string, string>>;
341
- upsertPages(pages: PageRecord[], scope: Scope): Promise<void>;
342
- getPage(url: string, scope: Scope): Promise<PageRecord | null>;
343
- deletePages(scope: Scope): Promise<void>;
344
- getScopeModelId(scope: Scope): Promise<string | null>;
345
- dropAllTables(): Promise<void>;
346
- }
347
- interface EmbeddingsProvider {
348
- embedTexts(texts: string[], modelId: string, task?: string): Promise<number[][]>;
349
- estimateTokens(text: string): number;
350
- }
351
- interface RerankCandidate {
352
- id: string;
353
- text: string;
354
- }
355
- interface Reranker {
356
- rerank(query: string, candidates: RerankCandidate[], topN?: number): Promise<Array<{
357
- id: string;
358
- score: number;
359
- }>>;
303
+ documentCount?: number;
360
304
  }
361
305
  interface SearchRequest {
362
306
  q: string;
@@ -364,9 +308,7 @@ interface SearchRequest {
364
308
  scope?: string;
365
309
  pathPrefix?: string;
366
310
  tags?: string[];
367
- rerank?: boolean;
368
311
  groupBy?: "page" | "chunk";
369
- stream?: boolean;
370
312
  }
371
313
  interface SearchResultChunk {
372
314
  sectionTitle?: string;
@@ -389,23 +331,17 @@ interface SearchResponse {
389
331
  results: SearchResult[];
390
332
  meta: {
391
333
  timingsMs: {
392
- embed: number;
393
- vector: number;
394
- rerank: number;
334
+ search: number;
395
335
  total: number;
396
336
  };
397
- usedRerank: boolean;
398
- modelId: string;
399
337
  };
400
338
  }
401
339
  interface IndexStats {
402
340
  pagesProcessed: number;
403
341
  chunksTotal: number;
404
342
  chunksChanged: number;
405
- newEmbeddings: number;
343
+ documentsUpserted: number;
406
344
  deletes: number;
407
- estimatedTokens: number;
408
- estimatedCostUSD: number;
409
345
  routeExact: number;
410
346
  routeBestEffort: number;
411
347
  stageTimingsMs: Record<string, number>;
@@ -420,36 +356,5 @@ interface IndexOptions {
420
356
  maxChunks?: number;
421
357
  verbose?: boolean;
422
358
  }
423
- interface StreamSearchEvent {
424
- phase: "initial" | "reranked";
425
- data: SearchResponse;
426
- }
427
- interface StreamSearchErrorEvent {
428
- phase: "error";
429
- data: {
430
- error: {
431
- code: string;
432
- message: string;
433
- };
434
- };
435
- }
436
- type StreamEvent = StreamSearchEvent | StreamSearchErrorEvent;
437
- interface MergeSearchOptions {
438
- /**
439
- * If any single result moved more than this many positions, adopt
440
- * the reranked order. The reranker is semantic — if it strongly
441
- * disagrees with vector similarity on even one result, trust it.
442
- * @default 3
443
- */
444
- maxDisplacement?: number;
445
- }
446
- interface MergeSearchResult {
447
- response: SearchResponse;
448
- usedRerankedOrder: boolean;
449
- displacements: Array<{
450
- url: string;
451
- displacement: number;
452
- }>;
453
- }
454
359
 
455
- export type { Chunk as C, EmbeddingsProvider as E, IndexOptions as I, MergeSearchOptions as M, QueryOpts as Q, ResolvedSearchSocketConfig as R, SearchResponse as S, VectorStore as V, MergeSearchResult as a, SearchRequest as b, StreamSearchEvent as c, SearchSocketConfig as d, Scope as e, Reranker as f, RerankCandidate as g, IndexStats as h, StreamEvent as i, StreamSearchErrorEvent as j, VectorHit as k, VectorRecord as l };
360
+ export type { Chunk as C, IndexOptions as I, PageHit as P, ResolvedSearchSocketConfig as R, SearchRequest as S, VectorHit as V, SearchResponse as a, SearchResult as b, SearchSocketConfig as c, Scope as d, ScopeInfo as e, PageRecord as f, IndexStats as g };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "searchsocket",
3
- "version": "0.4.0",
3
+ "version": "0.5.0",
4
4
  "description": "Semantic site search and MCP retrieval for SvelteKit static sites",
5
5
  "license": "MIT",
6
6
  "author": "Greg Priday <greg@siteorigin.com>",
@@ -46,6 +46,11 @@
46
46
  "types": "./dist/client.d.ts",
47
47
  "import": "./dist/client.js",
48
48
  "require": "./dist/client.cjs"
49
+ },
50
+ "./scroll": {
51
+ "types": "./dist/scroll.d.ts",
52
+ "import": "./dist/scroll.js",
53
+ "require": "./dist/scroll.cjs"
49
54
  }
50
55
  },
51
56
  "scripts": {
@@ -53,15 +58,16 @@
53
58
  "clean": "rm -rf dist",
54
59
  "typecheck": "tsc --noEmit",
55
60
  "test": "vitest run",
56
- "test:watch": "vitest"
61
+ "test:watch": "vitest",
62
+ "test:quality": "SEARCHSOCKET_QUALITY_TESTS=1 vitest run tests/quality.test.ts"
57
63
  },
58
64
  "engines": {
59
65
  "node": ">=20"
60
66
  },
61
67
  "packageManager": "pnpm@10.29.2",
62
68
  "dependencies": {
63
- "@libsql/client": "^0.17.0",
64
69
  "@modelcontextprotocol/sdk": "^1.26.0",
70
+ "@upstash/search": "^0.1.7",
65
71
  "cheerio": "^1.2.0",
66
72
  "chokidar": "^5.0.0",
67
73
  "commander": "^14.0.3",
@@ -79,6 +85,7 @@
79
85
  "@types/express": "^5.0.6",
80
86
  "@types/node": "^25.2.2",
81
87
  "@types/turndown": "^5.0.6",
88
+ "jsdom": "^28.1.0",
82
89
  "tsup": "^8.5.1",
83
90
  "typescript": "^5.9.3",
84
91
  "vitest": "^4.0.18"