searchsocket 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,10 @@
1
+ type Awaitable<T> = T | Promise<T>;
1
2
  type ScopeMode = "fixed" | "git" | "env";
2
3
  type SourceMode = "static-output" | "crawl" | "content-files" | "build";
4
+ interface OutgoingLink {
5
+ url: string;
6
+ anchorText: string;
7
+ }
3
8
  interface SearchSocketConfig {
4
9
  project?: {
5
10
  id?: string;
@@ -43,6 +48,7 @@ interface SearchSocketConfig {
43
48
  dropSelectors?: string[];
44
49
  ignoreAttr?: string;
45
50
  noindexAttr?: string;
51
+ imageDescAttr?: string;
46
52
  respectRobotsNoindex?: boolean;
47
53
  };
48
54
  transform?: {
@@ -59,34 +65,47 @@ interface SearchSocketConfig {
59
65
  dontSplitInside?: Array<"code" | "table" | "blockquote">;
60
66
  prependTitle?: boolean;
61
67
  pageSummaryChunk?: boolean;
68
+ weightHeadings?: boolean;
62
69
  };
63
70
  upstash?: {
64
71
  url?: string;
65
72
  token?: string;
66
73
  urlEnv?: string;
67
74
  tokenEnv?: string;
75
+ namespaces?: {
76
+ pages?: string;
77
+ chunks?: string;
78
+ };
79
+ };
80
+ embedding?: {
81
+ model?: string;
82
+ dimensions?: number;
83
+ taskType?: string;
84
+ batchSize?: number;
68
85
  };
69
86
  search?: {
70
- semanticWeight?: number;
71
- inputEnrichment?: boolean;
72
- reranking?: boolean;
73
87
  dualSearch?: boolean;
74
88
  pageSearchWeight?: number;
75
89
  };
76
90
  ranking?: {
77
91
  enableIncomingLinkBoost?: boolean;
78
92
  enableDepthBoost?: boolean;
93
+ enableFreshnessBoost?: boolean;
94
+ freshnessDecayRate?: number;
95
+ enableAnchorTextBoost?: boolean;
79
96
  pageWeights?: Record<string, number>;
80
97
  aggregationCap?: number;
81
98
  aggregationDecay?: number;
82
99
  minChunkScoreRatio?: number;
83
- minScore?: number;
100
+ minScoreRatio?: number;
84
101
  scoreGapThreshold?: number;
85
102
  weights?: {
86
103
  incomingLinks?: number;
87
104
  depth?: number;
88
105
  aggregation?: number;
89
106
  titleMatch?: number;
107
+ freshness?: number;
108
+ anchorText?: number;
90
109
  };
91
110
  };
92
111
  api?: {
@@ -101,12 +120,28 @@ interface SearchSocketConfig {
101
120
  };
102
121
  mcp?: {
103
122
  enable?: boolean;
123
+ access?: "public" | "private";
104
124
  transport?: "stdio" | "http";
105
125
  http?: {
106
126
  port?: number;
107
127
  path?: string;
128
+ apiKey?: string;
129
+ apiKeyEnv?: string;
130
+ };
131
+ handle?: {
132
+ path?: string;
133
+ apiKey?: string;
134
+ enableJsonResponse?: boolean;
108
135
  };
109
136
  };
137
+ llmsTxt?: {
138
+ enable?: boolean;
139
+ outputPath?: string;
140
+ title?: string;
141
+ description?: string;
142
+ generateFull?: boolean;
143
+ serveMarkdownVariants?: boolean;
144
+ };
110
145
  state?: {
111
146
  dir?: string;
112
147
  };
@@ -154,6 +189,7 @@ interface ResolvedSearchSocketConfig {
154
189
  dropSelectors: string[];
155
190
  ignoreAttr: string;
156
191
  noindexAttr: string;
192
+ imageDescAttr: string;
157
193
  respectRobotsNoindex: boolean;
158
194
  };
159
195
  transform: {
@@ -170,34 +206,47 @@ interface ResolvedSearchSocketConfig {
170
206
  dontSplitInside: Array<"code" | "table" | "blockquote">;
171
207
  prependTitle: boolean;
172
208
  pageSummaryChunk: boolean;
209
+ weightHeadings: boolean;
173
210
  };
174
211
  upstash: {
175
212
  url?: string;
176
213
  token?: string;
177
214
  urlEnv: string;
178
215
  tokenEnv: string;
216
+ namespaces: {
217
+ pages: string;
218
+ chunks: string;
219
+ };
220
+ };
221
+ embedding: {
222
+ model: string;
223
+ dimensions: number;
224
+ taskType: string;
225
+ batchSize: number;
179
226
  };
180
227
  search: {
181
- semanticWeight: number;
182
- inputEnrichment: boolean;
183
- reranking: boolean;
184
228
  dualSearch: boolean;
185
229
  pageSearchWeight: number;
186
230
  };
187
231
  ranking: {
188
232
  enableIncomingLinkBoost: boolean;
189
233
  enableDepthBoost: boolean;
234
+ enableFreshnessBoost: boolean;
235
+ freshnessDecayRate: number;
236
+ enableAnchorTextBoost: boolean;
190
237
  pageWeights: Record<string, number>;
191
238
  aggregationCap: number;
192
239
  aggregationDecay: number;
193
240
  minChunkScoreRatio: number;
194
- minScore: number;
241
+ minScoreRatio: number;
195
242
  scoreGapThreshold: number;
196
243
  weights: {
197
244
  incomingLinks: number;
198
245
  depth: number;
199
246
  aggregation: number;
200
247
  titleMatch: number;
248
+ freshness: number;
249
+ anchorText: number;
201
250
  };
202
251
  };
203
252
  api: {
@@ -212,11 +261,27 @@ interface ResolvedSearchSocketConfig {
212
261
  };
213
262
  mcp: {
214
263
  enable: boolean;
264
+ access: "public" | "private";
215
265
  transport: "stdio" | "http";
216
266
  http: {
217
267
  port: number;
218
268
  path: string;
269
+ apiKey?: string;
270
+ apiKeyEnv?: string;
219
271
  };
272
+ handle: {
273
+ path: string;
274
+ apiKey?: string;
275
+ enableJsonResponse: boolean;
276
+ };
277
+ };
278
+ llmsTxt: {
279
+ enable: boolean;
280
+ outputPath: string;
281
+ title?: string;
282
+ description?: string;
283
+ generateFull: boolean;
284
+ serveMarkdownVariants: boolean;
220
285
  };
221
286
  state: {
222
287
  dir: string;
@@ -227,6 +292,19 @@ interface Scope {
227
292
  scopeName: string;
228
293
  scopeId: string;
229
294
  }
295
+ interface ExtractedPage {
296
+ url: string;
297
+ title: string;
298
+ markdown: string;
299
+ outgoingLinks: OutgoingLink[];
300
+ noindex: boolean;
301
+ tags: string[];
302
+ description?: string;
303
+ keywords?: string[];
304
+ weight?: number;
305
+ publishedAt?: number;
306
+ meta?: Record<string, string | number | boolean | string[]>;
307
+ }
230
308
  interface Chunk {
231
309
  chunkKey: string;
232
310
  ordinal: number;
@@ -234,6 +312,7 @@ interface Chunk {
234
312
  path: string;
235
313
  title: string;
236
314
  sectionTitle?: string;
315
+ headingLevel?: number;
237
316
  headingPath: string[];
238
317
  chunkText: string;
239
318
  snippet: string;
@@ -244,6 +323,9 @@ interface Chunk {
244
323
  contentHash: string;
245
324
  description?: string;
246
325
  keywords?: string[];
326
+ publishedAt?: number;
327
+ incomingAnchorText?: string;
328
+ meta?: Record<string, string | number | boolean | string[]>;
247
329
  }
248
330
  interface VectorHit {
249
331
  id: string;
@@ -264,8 +346,13 @@ interface VectorHit {
264
346
  incomingLinks: number;
265
347
  routeFile: string;
266
348
  tags: string[];
349
+ type?: "chunk" | "page" | "image";
267
350
  description?: string;
268
351
  keywords?: string[];
352
+ incomingAnchorText?: string;
353
+ imageSrc?: string;
354
+ imageAlt?: string;
355
+ publishedAt?: number;
269
356
  };
270
357
  }
271
358
  interface PageRecord {
@@ -278,12 +365,16 @@ interface PageRecord {
278
365
  routeResolution: "exact" | "best-effort";
279
366
  incomingLinks: number;
280
367
  outgoingLinks: number;
368
+ outgoingLinkUrls?: string[];
281
369
  depth: number;
282
370
  tags: string[];
283
371
  indexedAt: string;
284
372
  summary?: string;
285
373
  description?: string;
286
374
  keywords?: string[];
375
+ contentHash?: string;
376
+ publishedAt?: number;
377
+ meta?: Record<string, string | number | boolean | string[]>;
287
378
  }
288
379
  interface PageHit {
289
380
  id: string;
@@ -295,6 +386,7 @@ interface PageHit {
295
386
  depth: number;
296
387
  incomingLinks: number;
297
388
  routeFile: string;
389
+ publishedAt?: number;
298
390
  }
299
391
  interface ScopeInfo {
300
392
  projectId: string;
@@ -302,17 +394,50 @@ interface ScopeInfo {
302
394
  lastIndexedAt: string;
303
395
  documentCount?: number;
304
396
  }
397
+ interface RankingOverrides {
398
+ ranking?: {
399
+ enableIncomingLinkBoost?: boolean;
400
+ enableDepthBoost?: boolean;
401
+ aggregationCap?: number;
402
+ aggregationDecay?: number;
403
+ minChunkScoreRatio?: number;
404
+ minScoreRatio?: number;
405
+ scoreGapThreshold?: number;
406
+ weights?: {
407
+ incomingLinks?: number;
408
+ depth?: number;
409
+ aggregation?: number;
410
+ titleMatch?: number;
411
+ };
412
+ };
413
+ search?: {
414
+ pageSearchWeight?: number;
415
+ };
416
+ }
305
417
  interface SearchRequest {
306
418
  q: string;
307
419
  topK?: number;
308
420
  scope?: string;
309
421
  pathPrefix?: string;
310
422
  tags?: string[];
423
+ filters?: Record<string, string | number | boolean>;
311
424
  groupBy?: "page" | "chunk";
425
+ maxSubResults?: number;
426
+ debug?: boolean;
427
+ rankingOverrides?: RankingOverrides;
428
+ }
429
+ interface ScoreBreakdown {
430
+ baseScore: number;
431
+ incomingLinkBoost: number;
432
+ depthBoost: number;
433
+ titleMatchBoost: number;
434
+ freshnessBoost: number;
435
+ anchorTextMatchBoost: number;
312
436
  }
313
437
  interface SearchResultChunk {
314
438
  sectionTitle?: string;
315
439
  snippet: string;
440
+ chunkText?: string;
316
441
  headingPath: string[];
317
442
  score: number;
318
443
  }
@@ -321,9 +446,11 @@ interface SearchResult {
321
446
  title: string;
322
447
  sectionTitle?: string;
323
448
  snippet: string;
449
+ chunkText?: string;
324
450
  score: number;
325
451
  routeFile: string;
326
452
  chunks?: SearchResultChunk[];
453
+ breakdown?: ScoreBreakdown;
327
454
  }
328
455
  interface SearchResponse {
329
456
  q: string;
@@ -338,6 +465,8 @@ interface SearchResponse {
338
465
  }
339
466
  interface IndexStats {
340
467
  pagesProcessed: number;
468
+ pagesChanged: number;
469
+ pagesDeleted: number;
341
470
  chunksTotal: number;
342
471
  chunksChanged: number;
343
472
  documentsUpserted: number;
@@ -346,6 +475,20 @@ interface IndexStats {
346
475
  routeBestEffort: number;
347
476
  stageTimingsMs: Record<string, number>;
348
477
  }
478
+ interface IndexingHooks {
479
+ transformPage?: (page: ExtractedPage) => Awaitable<ExtractedPage | null>;
480
+ transformChunk?: (chunk: Chunk) => Awaitable<Chunk | null>;
481
+ beforeIndex?: (chunks: Chunk[]) => Awaitable<Chunk[]>;
482
+ afterIndex?: (stats: IndexStats) => Awaitable<void>;
483
+ }
484
+ interface CustomRecord {
485
+ url: string;
486
+ title: string;
487
+ content: string;
488
+ metadata?: Record<string, string>;
489
+ tags?: string[];
490
+ weight?: number;
491
+ }
349
492
  interface IndexOptions {
350
493
  scopeOverride?: string;
351
494
  changedOnly?: boolean;
@@ -355,6 +498,34 @@ interface IndexOptions {
355
498
  maxPages?: number;
356
499
  maxChunks?: number;
357
500
  verbose?: boolean;
501
+ customRecords?: CustomRecord[];
502
+ }
503
+ interface SiteTreeNode {
504
+ url: string;
505
+ title: string;
506
+ depth: number;
507
+ routeFile: string;
508
+ isIndexed: boolean;
509
+ childCount: number;
510
+ children: SiteTreeNode[];
511
+ }
512
+ interface SiteStructureResult {
513
+ root: SiteTreeNode;
514
+ totalPages: number;
515
+ truncated: boolean;
516
+ }
517
+ type RelationshipType = "outgoing_link" | "incoming_link" | "sibling" | "semantic";
518
+ interface RelatedPage {
519
+ url: string;
520
+ title: string;
521
+ score: number;
522
+ relationshipType: RelationshipType;
523
+ routeFile: string;
524
+ }
525
+ interface RelatedPagesResult {
526
+ sourceUrl: string;
527
+ scope: string;
528
+ relatedPages: RelatedPage[];
358
529
  }
359
530
 
360
- export type { Chunk as C, IndexOptions as I, PageHit as P, ResolvedSearchSocketConfig as R, SearchRequest as S, VectorHit as V, SearchResponse as a, SearchResult as b, SearchSocketConfig as c, Scope as d, ScopeInfo as e, PageRecord as f, IndexStats as g };
531
+ export type { Awaitable as A, Chunk as C, IndexingHooks as I, PageHit as P, ResolvedSearchSocketConfig as R, SearchRequest as S, VectorHit as V, SearchResponse as a, SearchResult as b, SearchSocketConfig as c, Scope as d, ScopeInfo as e, PageRecord as f, IndexOptions as g, IndexStats as h, SiteStructureResult as i, RelatedPagesResult as j, CustomRecord as k };