searchsocket 0.3.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,5 @@
1
1
  type ScopeMode = "fixed" | "git" | "env";
2
2
  type SourceMode = "static-output" | "crawl" | "content-files" | "build";
3
- type EmbeddingProvider = "jina";
4
3
  interface SearchSocketConfig {
5
4
  project?: {
6
5
  id?: string;
@@ -12,6 +11,8 @@ interface SearchSocketConfig {
12
11
  envVar?: string;
13
12
  sanitize?: boolean;
14
13
  };
14
+ exclude?: string[];
15
+ respectRobotsTxt?: boolean;
15
16
  source?: {
16
17
  mode?: SourceMode;
17
18
  staticOutputDir?: string;
@@ -59,29 +60,18 @@ interface SearchSocketConfig {
59
60
  prependTitle?: boolean;
60
61
  pageSummaryChunk?: boolean;
61
62
  };
62
- embeddings?: {
63
- provider?: EmbeddingProvider;
64
- model?: string;
65
- apiKey?: string;
66
- apiKeyEnv?: string;
67
- batchSize?: number;
68
- concurrency?: number;
69
- pricePer1kTokens?: number;
63
+ upstash?: {
64
+ url?: string;
65
+ token?: string;
66
+ urlEnv?: string;
67
+ tokenEnv?: string;
70
68
  };
71
- vector?: {
72
- dimension?: number;
73
- turso?: {
74
- url?: string;
75
- authToken?: string;
76
- urlEnv?: string;
77
- authTokenEnv?: string;
78
- localPath?: string;
79
- };
80
- };
81
- rerank?: {
82
- enabled?: boolean;
83
- topN?: number;
84
- model?: string;
69
+ search?: {
70
+ semanticWeight?: number;
71
+ inputEnrichment?: boolean;
72
+ reranking?: boolean;
73
+ dualSearch?: boolean;
74
+ pageSearchWeight?: number;
85
75
  };
86
76
  ranking?: {
87
77
  enableIncomingLinkBoost?: boolean;
@@ -91,11 +81,12 @@ interface SearchSocketConfig {
91
81
  aggregationDecay?: number;
92
82
  minChunkScoreRatio?: number;
93
83
  minScore?: number;
84
+ scoreGapThreshold?: number;
94
85
  weights?: {
95
86
  incomingLinks?: number;
96
87
  depth?: number;
97
- rerank?: number;
98
88
  aggregation?: number;
89
+ titleMatch?: number;
99
90
  };
100
91
  };
101
92
  api?: {
@@ -118,7 +109,6 @@ interface SearchSocketConfig {
118
109
  };
119
110
  state?: {
120
111
  dir?: string;
121
- writeMirror?: boolean;
122
112
  };
123
113
  }
124
114
  interface ResolvedSearchSocketConfig {
@@ -132,6 +122,8 @@ interface ResolvedSearchSocketConfig {
132
122
  envVar: string;
133
123
  sanitize: boolean;
134
124
  };
125
+ exclude: string[];
126
+ respectRobotsTxt: boolean;
135
127
  source: {
136
128
  mode: SourceMode;
137
129
  staticOutputDir: string;
@@ -179,29 +171,18 @@ interface ResolvedSearchSocketConfig {
179
171
  prependTitle: boolean;
180
172
  pageSummaryChunk: boolean;
181
173
  };
182
- embeddings: {
183
- provider: EmbeddingProvider;
184
- model: string;
185
- apiKey?: string;
186
- apiKeyEnv: string;
187
- batchSize: number;
188
- concurrency: number;
189
- pricePer1kTokens?: number;
190
- };
191
- vector: {
192
- dimension?: number;
193
- turso: {
194
- url?: string;
195
- authToken?: string;
196
- urlEnv: string;
197
- authTokenEnv: string;
198
- localPath: string;
199
- };
174
+ upstash: {
175
+ url?: string;
176
+ token?: string;
177
+ urlEnv: string;
178
+ tokenEnv: string;
200
179
  };
201
- rerank: {
202
- enabled: boolean;
203
- topN: number;
204
- model: string;
180
+ search: {
181
+ semanticWeight: number;
182
+ inputEnrichment: boolean;
183
+ reranking: boolean;
184
+ dualSearch: boolean;
185
+ pageSearchWeight: number;
205
186
  };
206
187
  ranking: {
207
188
  enableIncomingLinkBoost: boolean;
@@ -211,11 +192,12 @@ interface ResolvedSearchSocketConfig {
211
192
  aggregationDecay: number;
212
193
  minChunkScoreRatio: number;
213
194
  minScore: number;
195
+ scoreGapThreshold: number;
214
196
  weights: {
215
197
  incomingLinks: number;
216
198
  depth: number;
217
- rerank: number;
218
199
  aggregation: number;
200
+ titleMatch: number;
219
201
  };
220
202
  };
221
203
  api: {
@@ -238,7 +220,6 @@ interface ResolvedSearchSocketConfig {
238
220
  };
239
221
  state: {
240
222
  dir: string;
241
- writeMirror: boolean;
242
223
  };
243
224
  }
244
225
  interface Scope {
@@ -264,9 +245,9 @@ interface Chunk {
264
245
  description?: string;
265
246
  keywords?: string[];
266
247
  }
267
- interface VectorRecord {
248
+ interface VectorHit {
268
249
  id: string;
269
- vector: number[];
250
+ score: number;
270
251
  metadata: {
271
252
  projectId: string;
272
253
  scopeName: string;
@@ -279,7 +260,6 @@ interface VectorRecord {
279
260
  chunkText: string;
280
261
  ordinal: number;
281
262
  contentHash: string;
282
- modelId: string;
283
263
  depth: number;
284
264
  incomingLinks: number;
285
265
  routeFile: string;
@@ -288,16 +268,6 @@ interface VectorRecord {
288
268
  keywords?: string[];
289
269
  };
290
270
  }
291
- interface QueryOpts {
292
- topK: number;
293
- pathPrefix?: string;
294
- tags?: string[];
295
- }
296
- interface VectorHit {
297
- id: string;
298
- score: number;
299
- metadata: VectorRecord["metadata"];
300
- }
301
271
  interface PageRecord {
302
272
  url: string;
303
273
  title: string;
@@ -311,48 +281,26 @@ interface PageRecord {
311
281
  depth: number;
312
282
  tags: string[];
313
283
  indexedAt: string;
284
+ summary?: string;
285
+ description?: string;
286
+ keywords?: string[];
287
+ }
288
+ interface PageHit {
289
+ id: string;
290
+ score: number;
291
+ title: string;
292
+ url: string;
293
+ description: string;
294
+ tags: string[];
295
+ depth: number;
296
+ incomingLinks: number;
297
+ routeFile: string;
314
298
  }
315
299
  interface ScopeInfo {
316
300
  projectId: string;
317
301
  scopeName: string;
318
- modelId: string;
319
302
  lastIndexedAt: string;
320
- vectorCount?: number;
321
- lastEstimateTokens?: number;
322
- lastEstimateCostUSD?: number;
323
- lastEstimateChangedChunks?: number;
324
- }
325
- interface VectorStore {
326
- upsert(records: VectorRecord[], scope: Scope): Promise<void>;
327
- query(queryVector: number[], opts: QueryOpts, scope: Scope): Promise<VectorHit[]>;
328
- deleteByIds(ids: string[], scope: Scope): Promise<void>;
329
- deleteScope(scope: Scope): Promise<void>;
330
- listScopes(scopeProjectId: string): Promise<ScopeInfo[]>;
331
- recordScope(info: ScopeInfo): Promise<void>;
332
- health(): Promise<{
333
- ok: boolean;
334
- details?: string;
335
- }>;
336
- getContentHashes(scope: Scope): Promise<Map<string, string>>;
337
- upsertPages(pages: PageRecord[], scope: Scope): Promise<void>;
338
- getPage(url: string, scope: Scope): Promise<PageRecord | null>;
339
- deletePages(scope: Scope): Promise<void>;
340
- getScopeModelId(scope: Scope): Promise<string | null>;
341
- dropAllTables(): Promise<void>;
342
- }
343
- interface EmbeddingsProvider {
344
- embedTexts(texts: string[], modelId: string, task?: string): Promise<number[][]>;
345
- estimateTokens(text: string): number;
346
- }
347
- interface RerankCandidate {
348
- id: string;
349
- text: string;
350
- }
351
- interface Reranker {
352
- rerank(query: string, candidates: RerankCandidate[], topN?: number): Promise<Array<{
353
- id: string;
354
- score: number;
355
- }>>;
303
+ documentCount?: number;
356
304
  }
357
305
  interface SearchRequest {
358
306
  q: string;
@@ -360,7 +308,6 @@ interface SearchRequest {
360
308
  scope?: string;
361
309
  pathPrefix?: string;
362
310
  tags?: string[];
363
- rerank?: boolean;
364
311
  groupBy?: "page" | "chunk";
365
312
  }
366
313
  interface SearchResultChunk {
@@ -384,23 +331,17 @@ interface SearchResponse {
384
331
  results: SearchResult[];
385
332
  meta: {
386
333
  timingsMs: {
387
- embed: number;
388
- vector: number;
389
- rerank: number;
334
+ search: number;
390
335
  total: number;
391
336
  };
392
- usedRerank: boolean;
393
- modelId: string;
394
337
  };
395
338
  }
396
339
  interface IndexStats {
397
340
  pagesProcessed: number;
398
341
  chunksTotal: number;
399
342
  chunksChanged: number;
400
- newEmbeddings: number;
343
+ documentsUpserted: number;
401
344
  deletes: number;
402
- estimatedTokens: number;
403
- estimatedCostUSD: number;
404
345
  routeExact: number;
405
346
  routeBestEffort: number;
406
347
  stageTimingsMs: Record<string, number>;
@@ -416,4 +357,4 @@ interface IndexOptions {
416
357
  verbose?: boolean;
417
358
  }
418
359
 
419
- export type { Chunk as C, EmbeddingsProvider as E, IndexOptions as I, QueryOpts as Q, ResolvedSearchSocketConfig as R, SearchRequest as S, VectorStore as V, SearchResponse as a, SearchSocketConfig as b, Scope as c, Reranker as d, RerankCandidate as e, IndexStats as f, VectorHit as g, VectorRecord as h };
360
+ export type { Chunk as C, IndexOptions as I, PageHit as P, ResolvedSearchSocketConfig as R, SearchRequest as S, VectorHit as V, SearchResponse as a, SearchResult as b, SearchSocketConfig as c, Scope as d, ScopeInfo as e, PageRecord as f, IndexStats as g };
@@ -1,6 +1,5 @@
1
1
  type ScopeMode = "fixed" | "git" | "env";
2
2
  type SourceMode = "static-output" | "crawl" | "content-files" | "build";
3
- type EmbeddingProvider = "jina";
4
3
  interface SearchSocketConfig {
5
4
  project?: {
6
5
  id?: string;
@@ -12,6 +11,8 @@ interface SearchSocketConfig {
12
11
  envVar?: string;
13
12
  sanitize?: boolean;
14
13
  };
14
+ exclude?: string[];
15
+ respectRobotsTxt?: boolean;
15
16
  source?: {
16
17
  mode?: SourceMode;
17
18
  staticOutputDir?: string;
@@ -59,29 +60,18 @@ interface SearchSocketConfig {
59
60
  prependTitle?: boolean;
60
61
  pageSummaryChunk?: boolean;
61
62
  };
62
- embeddings?: {
63
- provider?: EmbeddingProvider;
64
- model?: string;
65
- apiKey?: string;
66
- apiKeyEnv?: string;
67
- batchSize?: number;
68
- concurrency?: number;
69
- pricePer1kTokens?: number;
63
+ upstash?: {
64
+ url?: string;
65
+ token?: string;
66
+ urlEnv?: string;
67
+ tokenEnv?: string;
70
68
  };
71
- vector?: {
72
- dimension?: number;
73
- turso?: {
74
- url?: string;
75
- authToken?: string;
76
- urlEnv?: string;
77
- authTokenEnv?: string;
78
- localPath?: string;
79
- };
80
- };
81
- rerank?: {
82
- enabled?: boolean;
83
- topN?: number;
84
- model?: string;
69
+ search?: {
70
+ semanticWeight?: number;
71
+ inputEnrichment?: boolean;
72
+ reranking?: boolean;
73
+ dualSearch?: boolean;
74
+ pageSearchWeight?: number;
85
75
  };
86
76
  ranking?: {
87
77
  enableIncomingLinkBoost?: boolean;
@@ -91,11 +81,12 @@ interface SearchSocketConfig {
91
81
  aggregationDecay?: number;
92
82
  minChunkScoreRatio?: number;
93
83
  minScore?: number;
84
+ scoreGapThreshold?: number;
94
85
  weights?: {
95
86
  incomingLinks?: number;
96
87
  depth?: number;
97
- rerank?: number;
98
88
  aggregation?: number;
89
+ titleMatch?: number;
99
90
  };
100
91
  };
101
92
  api?: {
@@ -118,7 +109,6 @@ interface SearchSocketConfig {
118
109
  };
119
110
  state?: {
120
111
  dir?: string;
121
- writeMirror?: boolean;
122
112
  };
123
113
  }
124
114
  interface ResolvedSearchSocketConfig {
@@ -132,6 +122,8 @@ interface ResolvedSearchSocketConfig {
132
122
  envVar: string;
133
123
  sanitize: boolean;
134
124
  };
125
+ exclude: string[];
126
+ respectRobotsTxt: boolean;
135
127
  source: {
136
128
  mode: SourceMode;
137
129
  staticOutputDir: string;
@@ -179,29 +171,18 @@ interface ResolvedSearchSocketConfig {
179
171
  prependTitle: boolean;
180
172
  pageSummaryChunk: boolean;
181
173
  };
182
- embeddings: {
183
- provider: EmbeddingProvider;
184
- model: string;
185
- apiKey?: string;
186
- apiKeyEnv: string;
187
- batchSize: number;
188
- concurrency: number;
189
- pricePer1kTokens?: number;
190
- };
191
- vector: {
192
- dimension?: number;
193
- turso: {
194
- url?: string;
195
- authToken?: string;
196
- urlEnv: string;
197
- authTokenEnv: string;
198
- localPath: string;
199
- };
174
+ upstash: {
175
+ url?: string;
176
+ token?: string;
177
+ urlEnv: string;
178
+ tokenEnv: string;
200
179
  };
201
- rerank: {
202
- enabled: boolean;
203
- topN: number;
204
- model: string;
180
+ search: {
181
+ semanticWeight: number;
182
+ inputEnrichment: boolean;
183
+ reranking: boolean;
184
+ dualSearch: boolean;
185
+ pageSearchWeight: number;
205
186
  };
206
187
  ranking: {
207
188
  enableIncomingLinkBoost: boolean;
@@ -211,11 +192,12 @@ interface ResolvedSearchSocketConfig {
211
192
  aggregationDecay: number;
212
193
  minChunkScoreRatio: number;
213
194
  minScore: number;
195
+ scoreGapThreshold: number;
214
196
  weights: {
215
197
  incomingLinks: number;
216
198
  depth: number;
217
- rerank: number;
218
199
  aggregation: number;
200
+ titleMatch: number;
219
201
  };
220
202
  };
221
203
  api: {
@@ -238,7 +220,6 @@ interface ResolvedSearchSocketConfig {
238
220
  };
239
221
  state: {
240
222
  dir: string;
241
- writeMirror: boolean;
242
223
  };
243
224
  }
244
225
  interface Scope {
@@ -264,9 +245,9 @@ interface Chunk {
264
245
  description?: string;
265
246
  keywords?: string[];
266
247
  }
267
- interface VectorRecord {
248
+ interface VectorHit {
268
249
  id: string;
269
- vector: number[];
250
+ score: number;
270
251
  metadata: {
271
252
  projectId: string;
272
253
  scopeName: string;
@@ -279,7 +260,6 @@ interface VectorRecord {
279
260
  chunkText: string;
280
261
  ordinal: number;
281
262
  contentHash: string;
282
- modelId: string;
283
263
  depth: number;
284
264
  incomingLinks: number;
285
265
  routeFile: string;
@@ -288,16 +268,6 @@ interface VectorRecord {
288
268
  keywords?: string[];
289
269
  };
290
270
  }
291
- interface QueryOpts {
292
- topK: number;
293
- pathPrefix?: string;
294
- tags?: string[];
295
- }
296
- interface VectorHit {
297
- id: string;
298
- score: number;
299
- metadata: VectorRecord["metadata"];
300
- }
301
271
  interface PageRecord {
302
272
  url: string;
303
273
  title: string;
@@ -311,48 +281,26 @@ interface PageRecord {
311
281
  depth: number;
312
282
  tags: string[];
313
283
  indexedAt: string;
284
+ summary?: string;
285
+ description?: string;
286
+ keywords?: string[];
287
+ }
288
+ interface PageHit {
289
+ id: string;
290
+ score: number;
291
+ title: string;
292
+ url: string;
293
+ description: string;
294
+ tags: string[];
295
+ depth: number;
296
+ incomingLinks: number;
297
+ routeFile: string;
314
298
  }
315
299
  interface ScopeInfo {
316
300
  projectId: string;
317
301
  scopeName: string;
318
- modelId: string;
319
302
  lastIndexedAt: string;
320
- vectorCount?: number;
321
- lastEstimateTokens?: number;
322
- lastEstimateCostUSD?: number;
323
- lastEstimateChangedChunks?: number;
324
- }
325
- interface VectorStore {
326
- upsert(records: VectorRecord[], scope: Scope): Promise<void>;
327
- query(queryVector: number[], opts: QueryOpts, scope: Scope): Promise<VectorHit[]>;
328
- deleteByIds(ids: string[], scope: Scope): Promise<void>;
329
- deleteScope(scope: Scope): Promise<void>;
330
- listScopes(scopeProjectId: string): Promise<ScopeInfo[]>;
331
- recordScope(info: ScopeInfo): Promise<void>;
332
- health(): Promise<{
333
- ok: boolean;
334
- details?: string;
335
- }>;
336
- getContentHashes(scope: Scope): Promise<Map<string, string>>;
337
- upsertPages(pages: PageRecord[], scope: Scope): Promise<void>;
338
- getPage(url: string, scope: Scope): Promise<PageRecord | null>;
339
- deletePages(scope: Scope): Promise<void>;
340
- getScopeModelId(scope: Scope): Promise<string | null>;
341
- dropAllTables(): Promise<void>;
342
- }
343
- interface EmbeddingsProvider {
344
- embedTexts(texts: string[], modelId: string, task?: string): Promise<number[][]>;
345
- estimateTokens(text: string): number;
346
- }
347
- interface RerankCandidate {
348
- id: string;
349
- text: string;
350
- }
351
- interface Reranker {
352
- rerank(query: string, candidates: RerankCandidate[], topN?: number): Promise<Array<{
353
- id: string;
354
- score: number;
355
- }>>;
303
+ documentCount?: number;
356
304
  }
357
305
  interface SearchRequest {
358
306
  q: string;
@@ -360,7 +308,6 @@ interface SearchRequest {
360
308
  scope?: string;
361
309
  pathPrefix?: string;
362
310
  tags?: string[];
363
- rerank?: boolean;
364
311
  groupBy?: "page" | "chunk";
365
312
  }
366
313
  interface SearchResultChunk {
@@ -384,23 +331,17 @@ interface SearchResponse {
384
331
  results: SearchResult[];
385
332
  meta: {
386
333
  timingsMs: {
387
- embed: number;
388
- vector: number;
389
- rerank: number;
334
+ search: number;
390
335
  total: number;
391
336
  };
392
- usedRerank: boolean;
393
- modelId: string;
394
337
  };
395
338
  }
396
339
  interface IndexStats {
397
340
  pagesProcessed: number;
398
341
  chunksTotal: number;
399
342
  chunksChanged: number;
400
- newEmbeddings: number;
343
+ documentsUpserted: number;
401
344
  deletes: number;
402
- estimatedTokens: number;
403
- estimatedCostUSD: number;
404
345
  routeExact: number;
405
346
  routeBestEffort: number;
406
347
  stageTimingsMs: Record<string, number>;
@@ -416,4 +357,4 @@ interface IndexOptions {
416
357
  verbose?: boolean;
417
358
  }
418
359
 
419
- export type { Chunk as C, EmbeddingsProvider as E, IndexOptions as I, QueryOpts as Q, ResolvedSearchSocketConfig as R, SearchRequest as S, VectorStore as V, SearchResponse as a, SearchSocketConfig as b, Scope as c, Reranker as d, RerankCandidate as e, IndexStats as f, VectorHit as g, VectorRecord as h };
360
+ export type { Chunk as C, IndexOptions as I, PageHit as P, ResolvedSearchSocketConfig as R, SearchRequest as S, VectorHit as V, SearchResponse as a, SearchResult as b, SearchSocketConfig as c, Scope as d, ScopeInfo as e, PageRecord as f, IndexStats as g };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "searchsocket",
3
- "version": "0.3.3",
3
+ "version": "0.5.0",
4
4
  "description": "Semantic site search and MCP retrieval for SvelteKit static sites",
5
5
  "license": "MIT",
6
6
  "author": "Greg Priday <greg@siteorigin.com>",
@@ -46,6 +46,11 @@
46
46
  "types": "./dist/client.d.ts",
47
47
  "import": "./dist/client.js",
48
48
  "require": "./dist/client.cjs"
49
+ },
50
+ "./scroll": {
51
+ "types": "./dist/scroll.d.ts",
52
+ "import": "./dist/scroll.js",
53
+ "require": "./dist/scroll.cjs"
49
54
  }
50
55
  },
51
56
  "scripts": {
@@ -53,15 +58,16 @@
53
58
  "clean": "rm -rf dist",
54
59
  "typecheck": "tsc --noEmit",
55
60
  "test": "vitest run",
56
- "test:watch": "vitest"
61
+ "test:watch": "vitest",
62
+ "test:quality": "SEARCHSOCKET_QUALITY_TESTS=1 vitest run tests/quality.test.ts"
57
63
  },
58
64
  "engines": {
59
65
  "node": ">=20"
60
66
  },
61
67
  "packageManager": "pnpm@10.29.2",
62
68
  "dependencies": {
63
- "@libsql/client": "^0.17.0",
64
69
  "@modelcontextprotocol/sdk": "^1.26.0",
70
+ "@upstash/search": "^0.1.7",
65
71
  "cheerio": "^1.2.0",
66
72
  "chokidar": "^5.0.0",
67
73
  "commander": "^14.0.3",
@@ -79,6 +85,7 @@
79
85
  "@types/express": "^5.0.6",
80
86
  "@types/node": "^25.2.2",
81
87
  "@types/turndown": "^5.0.6",
88
+ "jsdom": "^28.1.0",
82
89
  "tsup": "^8.5.1",
83
90
  "typescript": "^5.9.3",
84
91
  "vitest": "^4.0.18"