searchsocket 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -12,7 +12,7 @@ import { Command } from "commander";
12
12
  // package.json
13
13
  var package_default = {
14
14
  name: "searchsocket",
15
- version: "0.3.0",
15
+ version: "0.3.1",
16
16
  description: "Semantic site search and MCP retrieval for SvelteKit static sites",
17
17
  license: "MIT",
18
18
  author: "Greg Priday <greg@siteorigin.com>",
@@ -913,23 +913,12 @@ var TursoVectorStore = class {
913
913
  incoming_links INTEGER NOT NULL DEFAULT 0,
914
914
  route_file TEXT NOT NULL DEFAULT '',
915
915
  tags TEXT NOT NULL DEFAULT '[]',
916
+ description TEXT NOT NULL DEFAULT '',
917
+ keywords TEXT NOT NULL DEFAULT '[]',
916
918
  embedding F32_BLOB(${dim})
917
919
  )`,
918
920
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
919
921
  ]);
920
- const chunkMigrationCols = [
921
- { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
922
- { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
923
- ];
924
- for (const col of chunkMigrationCols) {
925
- try {
926
- await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
927
- } catch (error) {
928
- if (error instanceof Error && !error.message.includes("duplicate column")) {
929
- throw error;
930
- }
931
- }
932
- }
933
922
  this.chunksReady = true;
934
923
  }
935
924
  async ensurePages() {
@@ -1007,8 +996,8 @@ var TursoVectorStore = class {
1007
996
  sql: `INSERT OR REPLACE INTO chunks
1008
997
  (id, project_id, scope_name, url, path, title, section_title,
1009
998
  heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
1010
- incoming_links, route_file, tags, embedding)
1011
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
999
+ incoming_links, route_file, tags, description, keywords, embedding)
1000
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
1012
1001
  args: [
1013
1002
  r.id,
1014
1003
  r.metadata.projectId,
@@ -1027,6 +1016,8 @@ var TursoVectorStore = class {
1027
1016
  r.metadata.incomingLinks,
1028
1017
  r.metadata.routeFile,
1029
1018
  JSON.stringify(r.metadata.tags),
1019
+ r.metadata.description ?? "",
1020
+ JSON.stringify(r.metadata.keywords ?? []),
1030
1021
  JSON.stringify(r.vector)
1031
1022
  ]
1032
1023
  }));
@@ -1042,6 +1033,7 @@ var TursoVectorStore = class {
1042
1033
  c.section_title, c.heading_path, c.snippet, c.chunk_text,
1043
1034
  c.ordinal, c.content_hash,
1044
1035
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
1036
+ c.description, c.keywords,
1045
1037
  vector_distance_cos(c.embedding, vector(?)) AS distance
1046
1038
  FROM vector_top_k('idx', vector(?), ?) AS v
1047
1039
  JOIN chunks AS c ON c.rowid = v.id`,
@@ -1072,6 +1064,12 @@ var TursoVectorStore = class {
1072
1064
  }
1073
1065
  const distance = row.distance;
1074
1066
  const score = 1 - distance;
1067
+ const description = row.description || void 0;
1068
+ const keywords = (() => {
1069
+ const raw = row.keywords || "[]";
1070
+ const parsed = JSON.parse(raw);
1071
+ return parsed.length > 0 ? parsed : void 0;
1072
+ })();
1075
1073
  hits.push({
1076
1074
  id: row.id,
1077
1075
  score,
@@ -1091,7 +1089,9 @@ var TursoVectorStore = class {
1091
1089
  depth: row.depth,
1092
1090
  incomingLinks: row.incoming_links,
1093
1091
  routeFile: row.route_file,
1094
- tags
1092
+ tags,
1093
+ description,
1094
+ keywords
1095
1095
  }
1096
1096
  });
1097
1097
  }
@@ -1636,7 +1636,9 @@ function chunkMirrorPage(page, config, scope) {
1636
1636
  incomingLinks: page.incomingLinks,
1637
1637
  routeFile: page.routeFile,
1638
1638
  tags: page.tags,
1639
- contentHash: ""
1639
+ contentHash: "",
1640
+ description: page.description,
1641
+ keywords: page.keywords
1640
1642
  };
1641
1643
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
1642
1644
  summaryChunk.contentHash = sha256(normalizeText(embeddingText));
@@ -1663,7 +1665,9 @@ function chunkMirrorPage(page, config, scope) {
1663
1665
  incomingLinks: page.incomingLinks,
1664
1666
  routeFile: page.routeFile,
1665
1667
  tags: page.tags,
1666
- contentHash: ""
1668
+ contentHash: "",
1669
+ description: page.description,
1670
+ keywords: page.keywords
1667
1671
  };
1668
1672
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
1669
1673
  chunk.contentHash = sha256(normalizeText(embeddingText));
@@ -2774,7 +2778,9 @@ var IndexPipeline = class _IndexPipeline {
2774
2778
  depth: chunk.depth,
2775
2779
  incomingLinks: chunk.incomingLinks,
2776
2780
  routeFile: chunk.routeFile,
2777
- tags: chunk.tags
2781
+ tags: chunk.tags,
2782
+ description: chunk.description,
2783
+ keywords: chunk.keywords
2778
2784
  }
2779
2785
  });
2780
2786
  }
@@ -3226,16 +3232,29 @@ var SearchEngine = class _SearchEngine {
3226
3232
  if (group) group.push(entry);
3227
3233
  else pageGroups.set(url, [entry]);
3228
3234
  }
3235
+ const MAX_CHUNKS_PER_PAGE = 5;
3236
+ const MIN_CHUNKS_PER_PAGE = 1;
3237
+ const MIN_CHUNK_SCORE_RATIO = 0.5;
3229
3238
  const pageCandidates = [];
3230
3239
  for (const [url, chunks] of pageGroups) {
3231
- const sorted = [...chunks].sort(
3232
- (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
3233
- );
3234
- const title = sorted[0].hit.metadata.title;
3235
- const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
3236
- pageCandidates.push({ id: url, text: `${title}
3237
-
3238
- ${body}` });
3240
+ const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
3241
+ const bestScore = byScore[0].finalScore;
3242
+ const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
3243
+ const selected = byScore.filter(
3244
+ (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
3245
+ ).slice(0, MAX_CHUNKS_PER_PAGE);
3246
+ selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
3247
+ const first = selected[0].hit.metadata;
3248
+ const parts = [first.title];
3249
+ if (first.description) {
3250
+ parts.push(first.description);
3251
+ }
3252
+ if (first.keywords && first.keywords.length > 0) {
3253
+ parts.push(first.keywords.join(", "));
3254
+ }
3255
+ const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
3256
+ parts.push(body);
3257
+ pageCandidates.push({ id: url, text: parts.join("\n\n") });
3239
3258
  }
3240
3259
  const reranked = await this.reranker.rerank(
3241
3260
  query,
package/dist/client.d.cts CHANGED
@@ -1,4 +1,4 @@
1
- import { S as SearchRequest, a as SearchResponse } from './types-DAXk6A3Y.cjs';
1
+ import { S as SearchRequest, a as SearchResponse } from './types-BrG6XTUU.cjs';
2
2
 
3
3
  interface SearchClientOptions {
4
4
  endpoint?: string;
package/dist/client.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { S as SearchRequest, a as SearchResponse } from './types-DAXk6A3Y.js';
1
+ import { S as SearchRequest, a as SearchResponse } from './types-BrG6XTUU.js';
2
2
 
3
3
  interface SearchClientOptions {
4
4
  endpoint?: string;
package/dist/index.cjs CHANGED
@@ -17435,23 +17435,12 @@ var TursoVectorStore = class {
17435
17435
  incoming_links INTEGER NOT NULL DEFAULT 0,
17436
17436
  route_file TEXT NOT NULL DEFAULT '',
17437
17437
  tags TEXT NOT NULL DEFAULT '[]',
17438
+ description TEXT NOT NULL DEFAULT '',
17439
+ keywords TEXT NOT NULL DEFAULT '[]',
17438
17440
  embedding F32_BLOB(${dim})
17439
17441
  )`,
17440
17442
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17441
17443
  ]);
17442
- const chunkMigrationCols = [
17443
- { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
17444
- { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
17445
- ];
17446
- for (const col of chunkMigrationCols) {
17447
- try {
17448
- await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
17449
- } catch (error) {
17450
- if (error instanceof Error && !error.message.includes("duplicate column")) {
17451
- throw error;
17452
- }
17453
- }
17454
- }
17455
17444
  this.chunksReady = true;
17456
17445
  }
17457
17446
  async ensurePages() {
@@ -17529,8 +17518,8 @@ var TursoVectorStore = class {
17529
17518
  sql: `INSERT OR REPLACE INTO chunks
17530
17519
  (id, project_id, scope_name, url, path, title, section_title,
17531
17520
  heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17532
- incoming_links, route_file, tags, embedding)
17533
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17521
+ incoming_links, route_file, tags, description, keywords, embedding)
17522
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17534
17523
  args: [
17535
17524
  r.id,
17536
17525
  r.metadata.projectId,
@@ -17549,6 +17538,8 @@ var TursoVectorStore = class {
17549
17538
  r.metadata.incomingLinks,
17550
17539
  r.metadata.routeFile,
17551
17540
  JSON.stringify(r.metadata.tags),
17541
+ r.metadata.description ?? "",
17542
+ JSON.stringify(r.metadata.keywords ?? []),
17552
17543
  JSON.stringify(r.vector)
17553
17544
  ]
17554
17545
  }));
@@ -17564,6 +17555,7 @@ var TursoVectorStore = class {
17564
17555
  c.section_title, c.heading_path, c.snippet, c.chunk_text,
17565
17556
  c.ordinal, c.content_hash,
17566
17557
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17558
+ c.description, c.keywords,
17567
17559
  vector_distance_cos(c.embedding, vector(?)) AS distance
17568
17560
  FROM vector_top_k('idx', vector(?), ?) AS v
17569
17561
  JOIN chunks AS c ON c.rowid = v.id`,
@@ -17594,6 +17586,12 @@ var TursoVectorStore = class {
17594
17586
  }
17595
17587
  const distance = row.distance;
17596
17588
  const score = 1 - distance;
17589
+ const description = row.description || void 0;
17590
+ const keywords = (() => {
17591
+ const raw = row.keywords || "[]";
17592
+ const parsed = JSON.parse(raw);
17593
+ return parsed.length > 0 ? parsed : void 0;
17594
+ })();
17597
17595
  hits.push({
17598
17596
  id: row.id,
17599
17597
  score,
@@ -17613,7 +17611,9 @@ var TursoVectorStore = class {
17613
17611
  depth: row.depth,
17614
17612
  incomingLinks: row.incoming_links,
17615
17613
  routeFile: row.route_file,
17616
- tags
17614
+ tags,
17615
+ description,
17616
+ keywords
17617
17617
  }
17618
17618
  });
17619
17619
  }
@@ -18152,7 +18152,9 @@ function chunkMirrorPage(page, config, scope) {
18152
18152
  incomingLinks: page.incomingLinks,
18153
18153
  routeFile: page.routeFile,
18154
18154
  tags: page.tags,
18155
- contentHash: ""
18155
+ contentHash: "",
18156
+ description: page.description,
18157
+ keywords: page.keywords
18156
18158
  };
18157
18159
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
18158
18160
  summaryChunk.contentHash = sha256(normalizeText(embeddingText));
@@ -18179,7 +18181,9 @@ function chunkMirrorPage(page, config, scope) {
18179
18181
  incomingLinks: page.incomingLinks,
18180
18182
  routeFile: page.routeFile,
18181
18183
  tags: page.tags,
18182
- contentHash: ""
18184
+ contentHash: "",
18185
+ description: page.description,
18186
+ keywords: page.keywords
18183
18187
  };
18184
18188
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
18185
18189
  chunk.contentHash = sha256(normalizeText(embeddingText));
@@ -20150,7 +20154,9 @@ var IndexPipeline = class _IndexPipeline {
20150
20154
  depth: chunk.depth,
20151
20155
  incomingLinks: chunk.incomingLinks,
20152
20156
  routeFile: chunk.routeFile,
20153
- tags: chunk.tags
20157
+ tags: chunk.tags,
20158
+ description: chunk.description,
20159
+ keywords: chunk.keywords
20154
20160
  }
20155
20161
  });
20156
20162
  }
@@ -20495,16 +20501,29 @@ var SearchEngine = class _SearchEngine {
20495
20501
  if (group) group.push(entry);
20496
20502
  else pageGroups.set(url, [entry]);
20497
20503
  }
20504
+ const MAX_CHUNKS_PER_PAGE = 5;
20505
+ const MIN_CHUNKS_PER_PAGE = 1;
20506
+ const MIN_CHUNK_SCORE_RATIO = 0.5;
20498
20507
  const pageCandidates = [];
20499
20508
  for (const [url, chunks] of pageGroups) {
20500
- const sorted = [...chunks].sort(
20501
- (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
20502
- );
20503
- const title = sorted[0].hit.metadata.title;
20504
- const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
20505
- pageCandidates.push({ id: url, text: `${title}
20506
-
20507
- ${body}` });
20509
+ const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
20510
+ const bestScore = byScore[0].finalScore;
20511
+ const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
20512
+ const selected = byScore.filter(
20513
+ (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
20514
+ ).slice(0, MAX_CHUNKS_PER_PAGE);
20515
+ selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
20516
+ const first = selected[0].hit.metadata;
20517
+ const parts = [first.title];
20518
+ if (first.description) {
20519
+ parts.push(first.description);
20520
+ }
20521
+ if (first.keywords && first.keywords.length > 0) {
20522
+ parts.push(first.keywords.join(", "));
20523
+ }
20524
+ const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
20525
+ parts.push(body);
20526
+ pageCandidates.push({ id: url, text: parts.join("\n\n") });
20508
20527
  }
20509
20528
  const reranked = await this.reranker.rerank(
20510
20529
  query,
package/dist/index.d.cts CHANGED
@@ -1,5 +1,5 @@
1
- import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig, c as Scope, E as EmbeddingsProvider, d as Reranker, e as RerankCandidate, V as VectorStore, I as IndexOptions, f as IndexStats, S as SearchRequest, a as SearchResponse } from './types-DAXk6A3Y.cjs';
2
- export { C as Chunk, Q as QueryOpts, g as VectorHit, h as VectorRecord } from './types-DAXk6A3Y.cjs';
1
+ import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig, c as Scope, E as EmbeddingsProvider, d as Reranker, e as RerankCandidate, V as VectorStore, I as IndexOptions, f as IndexStats, S as SearchRequest, a as SearchResponse } from './types-BrG6XTUU.cjs';
2
+ export { C as Chunk, Q as QueryOpts, g as VectorHit, h as VectorRecord } from './types-BrG6XTUU.cjs';
3
3
  export { searchsocketHandle, searchsocketVitePlugin } from './sveltekit.cjs';
4
4
  export { createSearchClient } from './client.cjs';
5
5
 
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig, c as Scope, E as EmbeddingsProvider, d as Reranker, e as RerankCandidate, V as VectorStore, I as IndexOptions, f as IndexStats, S as SearchRequest, a as SearchResponse } from './types-DAXk6A3Y.js';
2
- export { C as Chunk, Q as QueryOpts, g as VectorHit, h as VectorRecord } from './types-DAXk6A3Y.js';
1
+ import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig, c as Scope, E as EmbeddingsProvider, d as Reranker, e as RerankCandidate, V as VectorStore, I as IndexOptions, f as IndexStats, S as SearchRequest, a as SearchResponse } from './types-BrG6XTUU.js';
2
+ export { C as Chunk, Q as QueryOpts, g as VectorHit, h as VectorRecord } from './types-BrG6XTUU.js';
3
3
  export { searchsocketHandle, searchsocketVitePlugin } from './sveltekit.js';
4
4
  export { createSearchClient } from './client.js';
5
5
 
package/dist/index.js CHANGED
@@ -17423,23 +17423,12 @@ var TursoVectorStore = class {
17423
17423
  incoming_links INTEGER NOT NULL DEFAULT 0,
17424
17424
  route_file TEXT NOT NULL DEFAULT '',
17425
17425
  tags TEXT NOT NULL DEFAULT '[]',
17426
+ description TEXT NOT NULL DEFAULT '',
17427
+ keywords TEXT NOT NULL DEFAULT '[]',
17426
17428
  embedding F32_BLOB(${dim})
17427
17429
  )`,
17428
17430
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17429
17431
  ]);
17430
- const chunkMigrationCols = [
17431
- { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
17432
- { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
17433
- ];
17434
- for (const col of chunkMigrationCols) {
17435
- try {
17436
- await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
17437
- } catch (error) {
17438
- if (error instanceof Error && !error.message.includes("duplicate column")) {
17439
- throw error;
17440
- }
17441
- }
17442
- }
17443
17432
  this.chunksReady = true;
17444
17433
  }
17445
17434
  async ensurePages() {
@@ -17517,8 +17506,8 @@ var TursoVectorStore = class {
17517
17506
  sql: `INSERT OR REPLACE INTO chunks
17518
17507
  (id, project_id, scope_name, url, path, title, section_title,
17519
17508
  heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17520
- incoming_links, route_file, tags, embedding)
17521
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17509
+ incoming_links, route_file, tags, description, keywords, embedding)
17510
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17522
17511
  args: [
17523
17512
  r.id,
17524
17513
  r.metadata.projectId,
@@ -17537,6 +17526,8 @@ var TursoVectorStore = class {
17537
17526
  r.metadata.incomingLinks,
17538
17527
  r.metadata.routeFile,
17539
17528
  JSON.stringify(r.metadata.tags),
17529
+ r.metadata.description ?? "",
17530
+ JSON.stringify(r.metadata.keywords ?? []),
17540
17531
  JSON.stringify(r.vector)
17541
17532
  ]
17542
17533
  }));
@@ -17552,6 +17543,7 @@ var TursoVectorStore = class {
17552
17543
  c.section_title, c.heading_path, c.snippet, c.chunk_text,
17553
17544
  c.ordinal, c.content_hash,
17554
17545
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17546
+ c.description, c.keywords,
17555
17547
  vector_distance_cos(c.embedding, vector(?)) AS distance
17556
17548
  FROM vector_top_k('idx', vector(?), ?) AS v
17557
17549
  JOIN chunks AS c ON c.rowid = v.id`,
@@ -17582,6 +17574,12 @@ var TursoVectorStore = class {
17582
17574
  }
17583
17575
  const distance = row.distance;
17584
17576
  const score = 1 - distance;
17577
+ const description = row.description || void 0;
17578
+ const keywords = (() => {
17579
+ const raw = row.keywords || "[]";
17580
+ const parsed = JSON.parse(raw);
17581
+ return parsed.length > 0 ? parsed : void 0;
17582
+ })();
17585
17583
  hits.push({
17586
17584
  id: row.id,
17587
17585
  score,
@@ -17601,7 +17599,9 @@ var TursoVectorStore = class {
17601
17599
  depth: row.depth,
17602
17600
  incomingLinks: row.incoming_links,
17603
17601
  routeFile: row.route_file,
17604
- tags
17602
+ tags,
17603
+ description,
17604
+ keywords
17605
17605
  }
17606
17606
  });
17607
17607
  }
@@ -18140,7 +18140,9 @@ function chunkMirrorPage(page, config, scope) {
18140
18140
  incomingLinks: page.incomingLinks,
18141
18141
  routeFile: page.routeFile,
18142
18142
  tags: page.tags,
18143
- contentHash: ""
18143
+ contentHash: "",
18144
+ description: page.description,
18145
+ keywords: page.keywords
18144
18146
  };
18145
18147
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
18146
18148
  summaryChunk.contentHash = sha256(normalizeText(embeddingText));
@@ -18167,7 +18169,9 @@ function chunkMirrorPage(page, config, scope) {
18167
18169
  incomingLinks: page.incomingLinks,
18168
18170
  routeFile: page.routeFile,
18169
18171
  tags: page.tags,
18170
- contentHash: ""
18172
+ contentHash: "",
18173
+ description: page.description,
18174
+ keywords: page.keywords
18171
18175
  };
18172
18176
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
18173
18177
  chunk.contentHash = sha256(normalizeText(embeddingText));
@@ -20138,7 +20142,9 @@ var IndexPipeline = class _IndexPipeline {
20138
20142
  depth: chunk.depth,
20139
20143
  incomingLinks: chunk.incomingLinks,
20140
20144
  routeFile: chunk.routeFile,
20141
- tags: chunk.tags
20145
+ tags: chunk.tags,
20146
+ description: chunk.description,
20147
+ keywords: chunk.keywords
20142
20148
  }
20143
20149
  });
20144
20150
  }
@@ -20483,16 +20489,29 @@ var SearchEngine = class _SearchEngine {
20483
20489
  if (group) group.push(entry);
20484
20490
  else pageGroups.set(url, [entry]);
20485
20491
  }
20492
+ const MAX_CHUNKS_PER_PAGE = 5;
20493
+ const MIN_CHUNKS_PER_PAGE = 1;
20494
+ const MIN_CHUNK_SCORE_RATIO = 0.5;
20486
20495
  const pageCandidates = [];
20487
20496
  for (const [url, chunks] of pageGroups) {
20488
- const sorted = [...chunks].sort(
20489
- (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
20490
- );
20491
- const title = sorted[0].hit.metadata.title;
20492
- const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
20493
- pageCandidates.push({ id: url, text: `${title}
20494
-
20495
- ${body}` });
20497
+ const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
20498
+ const bestScore = byScore[0].finalScore;
20499
+ const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
20500
+ const selected = byScore.filter(
20501
+ (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
20502
+ ).slice(0, MAX_CHUNKS_PER_PAGE);
20503
+ selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
20504
+ const first = selected[0].hit.metadata;
20505
+ const parts = [first.title];
20506
+ if (first.description) {
20507
+ parts.push(first.description);
20508
+ }
20509
+ if (first.keywords && first.keywords.length > 0) {
20510
+ parts.push(first.keywords.join(", "));
20511
+ }
20512
+ const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
20513
+ parts.push(body);
20514
+ pageCandidates.push({ id: url, text: parts.join("\n\n") });
20496
20515
  }
20497
20516
  const reranked = await this.reranker.rerank(
20498
20517
  query,
@@ -17468,23 +17468,12 @@ var TursoVectorStore = class {
17468
17468
  incoming_links INTEGER NOT NULL DEFAULT 0,
17469
17469
  route_file TEXT NOT NULL DEFAULT '',
17470
17470
  tags TEXT NOT NULL DEFAULT '[]',
17471
+ description TEXT NOT NULL DEFAULT '',
17472
+ keywords TEXT NOT NULL DEFAULT '[]',
17471
17473
  embedding F32_BLOB(${dim})
17472
17474
  )`,
17473
17475
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17474
17476
  ]);
17475
- const chunkMigrationCols = [
17476
- { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
17477
- { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
17478
- ];
17479
- for (const col of chunkMigrationCols) {
17480
- try {
17481
- await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
17482
- } catch (error) {
17483
- if (error instanceof Error && !error.message.includes("duplicate column")) {
17484
- throw error;
17485
- }
17486
- }
17487
- }
17488
17477
  this.chunksReady = true;
17489
17478
  }
17490
17479
  async ensurePages() {
@@ -17562,8 +17551,8 @@ var TursoVectorStore = class {
17562
17551
  sql: `INSERT OR REPLACE INTO chunks
17563
17552
  (id, project_id, scope_name, url, path, title, section_title,
17564
17553
  heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17565
- incoming_links, route_file, tags, embedding)
17566
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17554
+ incoming_links, route_file, tags, description, keywords, embedding)
17555
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17567
17556
  args: [
17568
17557
  r.id,
17569
17558
  r.metadata.projectId,
@@ -17582,6 +17571,8 @@ var TursoVectorStore = class {
17582
17571
  r.metadata.incomingLinks,
17583
17572
  r.metadata.routeFile,
17584
17573
  JSON.stringify(r.metadata.tags),
17574
+ r.metadata.description ?? "",
17575
+ JSON.stringify(r.metadata.keywords ?? []),
17585
17576
  JSON.stringify(r.vector)
17586
17577
  ]
17587
17578
  }));
@@ -17597,6 +17588,7 @@ var TursoVectorStore = class {
17597
17588
  c.section_title, c.heading_path, c.snippet, c.chunk_text,
17598
17589
  c.ordinal, c.content_hash,
17599
17590
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17591
+ c.description, c.keywords,
17600
17592
  vector_distance_cos(c.embedding, vector(?)) AS distance
17601
17593
  FROM vector_top_k('idx', vector(?), ?) AS v
17602
17594
  JOIN chunks AS c ON c.rowid = v.id`,
@@ -17627,6 +17619,12 @@ var TursoVectorStore = class {
17627
17619
  }
17628
17620
  const distance = row.distance;
17629
17621
  const score = 1 - distance;
17622
+ const description = row.description || void 0;
17623
+ const keywords = (() => {
17624
+ const raw = row.keywords || "[]";
17625
+ const parsed = JSON.parse(raw);
17626
+ return parsed.length > 0 ? parsed : void 0;
17627
+ })();
17630
17628
  hits.push({
17631
17629
  id: row.id,
17632
17630
  score,
@@ -17646,7 +17644,9 @@ var TursoVectorStore = class {
17646
17644
  depth: row.depth,
17647
17645
  incomingLinks: row.incoming_links,
17648
17646
  routeFile: row.route_file,
17649
- tags
17647
+ tags,
17648
+ description,
17649
+ keywords
17650
17650
  }
17651
17651
  });
17652
17652
  }
@@ -18162,16 +18162,29 @@ var SearchEngine = class _SearchEngine {
18162
18162
  if (group) group.push(entry);
18163
18163
  else pageGroups.set(url, [entry]);
18164
18164
  }
18165
+ const MAX_CHUNKS_PER_PAGE = 5;
18166
+ const MIN_CHUNKS_PER_PAGE = 1;
18167
+ const MIN_CHUNK_SCORE_RATIO = 0.5;
18165
18168
  const pageCandidates = [];
18166
18169
  for (const [url, chunks] of pageGroups) {
18167
- const sorted = [...chunks].sort(
18168
- (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
18169
- );
18170
- const title = sorted[0].hit.metadata.title;
18171
- const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
18172
- pageCandidates.push({ id: url, text: `${title}
18173
-
18174
- ${body}` });
18170
+ const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
18171
+ const bestScore = byScore[0].finalScore;
18172
+ const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
18173
+ const selected = byScore.filter(
18174
+ (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
18175
+ ).slice(0, MAX_CHUNKS_PER_PAGE);
18176
+ selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
18177
+ const first = selected[0].hit.metadata;
18178
+ const parts = [first.title];
18179
+ if (first.description) {
18180
+ parts.push(first.description);
18181
+ }
18182
+ if (first.keywords && first.keywords.length > 0) {
18183
+ parts.push(first.keywords.join(", "));
18184
+ }
18185
+ const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
18186
+ parts.push(body);
18187
+ pageCandidates.push({ id: url, text: parts.join("\n\n") });
18175
18188
  }
18176
18189
  const reranked = await this.reranker.rerank(
18177
18190
  query,
@@ -18681,7 +18694,9 @@ function chunkMirrorPage(page, config, scope) {
18681
18694
  incomingLinks: page.incomingLinks,
18682
18695
  routeFile: page.routeFile,
18683
18696
  tags: page.tags,
18684
- contentHash: ""
18697
+ contentHash: "",
18698
+ description: page.description,
18699
+ keywords: page.keywords
18685
18700
  };
18686
18701
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
18687
18702
  summaryChunk.contentHash = sha256(normalizeText(embeddingText));
@@ -18708,7 +18723,9 @@ function chunkMirrorPage(page, config, scope) {
18708
18723
  incomingLinks: page.incomingLinks,
18709
18724
  routeFile: page.routeFile,
18710
18725
  tags: page.tags,
18711
- contentHash: ""
18726
+ contentHash: "",
18727
+ description: page.description,
18728
+ keywords: page.keywords
18712
18729
  };
18713
18730
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
18714
18731
  chunk.contentHash = sha256(normalizeText(embeddingText));
@@ -20671,7 +20688,9 @@ var IndexPipeline = class _IndexPipeline {
20671
20688
  depth: chunk.depth,
20672
20689
  incomingLinks: chunk.incomingLinks,
20673
20690
  routeFile: chunk.routeFile,
20674
- tags: chunk.tags
20691
+ tags: chunk.tags,
20692
+ description: chunk.description,
20693
+ keywords: chunk.keywords
20675
20694
  }
20676
20695
  });
20677
20696
  }
@@ -1,4 +1,4 @@
1
- import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig } from './types-DAXk6A3Y.cjs';
1
+ import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig } from './types-BrG6XTUU.cjs';
2
2
 
3
3
  interface SearchSocketHandleOptions {
4
4
  configPath?: string;
@@ -1,4 +1,4 @@
1
- import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig } from './types-DAXk6A3Y.js';
1
+ import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig } from './types-BrG6XTUU.js';
2
2
 
3
3
  interface SearchSocketHandleOptions {
4
4
  configPath?: string;
package/dist/sveltekit.js CHANGED
@@ -17456,23 +17456,12 @@ var TursoVectorStore = class {
17456
17456
  incoming_links INTEGER NOT NULL DEFAULT 0,
17457
17457
  route_file TEXT NOT NULL DEFAULT '',
17458
17458
  tags TEXT NOT NULL DEFAULT '[]',
17459
+ description TEXT NOT NULL DEFAULT '',
17460
+ keywords TEXT NOT NULL DEFAULT '[]',
17459
17461
  embedding F32_BLOB(${dim})
17460
17462
  )`,
17461
17463
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17462
17464
  ]);
17463
- const chunkMigrationCols = [
17464
- { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
17465
- { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
17466
- ];
17467
- for (const col of chunkMigrationCols) {
17468
- try {
17469
- await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
17470
- } catch (error) {
17471
- if (error instanceof Error && !error.message.includes("duplicate column")) {
17472
- throw error;
17473
- }
17474
- }
17475
- }
17476
17465
  this.chunksReady = true;
17477
17466
  }
17478
17467
  async ensurePages() {
@@ -17550,8 +17539,8 @@ var TursoVectorStore = class {
17550
17539
  sql: `INSERT OR REPLACE INTO chunks
17551
17540
  (id, project_id, scope_name, url, path, title, section_title,
17552
17541
  heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17553
- incoming_links, route_file, tags, embedding)
17554
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17542
+ incoming_links, route_file, tags, description, keywords, embedding)
17543
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17555
17544
  args: [
17556
17545
  r.id,
17557
17546
  r.metadata.projectId,
@@ -17570,6 +17559,8 @@ var TursoVectorStore = class {
17570
17559
  r.metadata.incomingLinks,
17571
17560
  r.metadata.routeFile,
17572
17561
  JSON.stringify(r.metadata.tags),
17562
+ r.metadata.description ?? "",
17563
+ JSON.stringify(r.metadata.keywords ?? []),
17573
17564
  JSON.stringify(r.vector)
17574
17565
  ]
17575
17566
  }));
@@ -17585,6 +17576,7 @@ var TursoVectorStore = class {
17585
17576
  c.section_title, c.heading_path, c.snippet, c.chunk_text,
17586
17577
  c.ordinal, c.content_hash,
17587
17578
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17579
+ c.description, c.keywords,
17588
17580
  vector_distance_cos(c.embedding, vector(?)) AS distance
17589
17581
  FROM vector_top_k('idx', vector(?), ?) AS v
17590
17582
  JOIN chunks AS c ON c.rowid = v.id`,
@@ -17615,6 +17607,12 @@ var TursoVectorStore = class {
17615
17607
  }
17616
17608
  const distance = row.distance;
17617
17609
  const score = 1 - distance;
17610
+ const description = row.description || void 0;
17611
+ const keywords = (() => {
17612
+ const raw = row.keywords || "[]";
17613
+ const parsed = JSON.parse(raw);
17614
+ return parsed.length > 0 ? parsed : void 0;
17615
+ })();
17618
17616
  hits.push({
17619
17617
  id: row.id,
17620
17618
  score,
@@ -17634,7 +17632,9 @@ var TursoVectorStore = class {
17634
17632
  depth: row.depth,
17635
17633
  incomingLinks: row.incoming_links,
17636
17634
  routeFile: row.route_file,
17637
- tags
17635
+ tags,
17636
+ description,
17637
+ keywords
17638
17638
  }
17639
17639
  });
17640
17640
  }
@@ -18150,16 +18150,29 @@ var SearchEngine = class _SearchEngine {
18150
18150
  if (group) group.push(entry);
18151
18151
  else pageGroups.set(url, [entry]);
18152
18152
  }
18153
+ const MAX_CHUNKS_PER_PAGE = 5;
18154
+ const MIN_CHUNKS_PER_PAGE = 1;
18155
+ const MIN_CHUNK_SCORE_RATIO = 0.5;
18153
18156
  const pageCandidates = [];
18154
18157
  for (const [url, chunks] of pageGroups) {
18155
- const sorted = [...chunks].sort(
18156
- (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
18157
- );
18158
- const title = sorted[0].hit.metadata.title;
18159
- const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
18160
- pageCandidates.push({ id: url, text: `${title}
18161
-
18162
- ${body}` });
18158
+ const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
18159
+ const bestScore = byScore[0].finalScore;
18160
+ const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
18161
+ const selected = byScore.filter(
18162
+ (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
18163
+ ).slice(0, MAX_CHUNKS_PER_PAGE);
18164
+ selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
18165
+ const first = selected[0].hit.metadata;
18166
+ const parts = [first.title];
18167
+ if (first.description) {
18168
+ parts.push(first.description);
18169
+ }
18170
+ if (first.keywords && first.keywords.length > 0) {
18171
+ parts.push(first.keywords.join(", "));
18172
+ }
18173
+ const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
18174
+ parts.push(body);
18175
+ pageCandidates.push({ id: url, text: parts.join("\n\n") });
18163
18176
  }
18164
18177
  const reranked = await this.reranker.rerank(
18165
18178
  query,
@@ -18669,7 +18682,9 @@ function chunkMirrorPage(page, config, scope) {
18669
18682
  incomingLinks: page.incomingLinks,
18670
18683
  routeFile: page.routeFile,
18671
18684
  tags: page.tags,
18672
- contentHash: ""
18685
+ contentHash: "",
18686
+ description: page.description,
18687
+ keywords: page.keywords
18673
18688
  };
18674
18689
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
18675
18690
  summaryChunk.contentHash = sha256(normalizeText(embeddingText));
@@ -18696,7 +18711,9 @@ function chunkMirrorPage(page, config, scope) {
18696
18711
  incomingLinks: page.incomingLinks,
18697
18712
  routeFile: page.routeFile,
18698
18713
  tags: page.tags,
18699
- contentHash: ""
18714
+ contentHash: "",
18715
+ description: page.description,
18716
+ keywords: page.keywords
18700
18717
  };
18701
18718
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
18702
18719
  chunk.contentHash = sha256(normalizeText(embeddingText));
@@ -20659,7 +20676,9 @@ var IndexPipeline = class _IndexPipeline {
20659
20676
  depth: chunk.depth,
20660
20677
  incomingLinks: chunk.incomingLinks,
20661
20678
  routeFile: chunk.routeFile,
20662
- tags: chunk.tags
20679
+ tags: chunk.tags,
20680
+ description: chunk.description,
20681
+ keywords: chunk.keywords
20663
20682
  }
20664
20683
  });
20665
20684
  }
@@ -261,6 +261,8 @@ interface Chunk {
261
261
  routeFile: string;
262
262
  tags: string[];
263
263
  contentHash: string;
264
+ description?: string;
265
+ keywords?: string[];
264
266
  }
265
267
  interface VectorRecord {
266
268
  id: string;
@@ -282,6 +284,8 @@ interface VectorRecord {
282
284
  incomingLinks: number;
283
285
  routeFile: string;
284
286
  tags: string[];
287
+ description?: string;
288
+ keywords?: string[];
285
289
  };
286
290
  }
287
291
  interface QueryOpts {
@@ -261,6 +261,8 @@ interface Chunk {
261
261
  routeFile: string;
262
262
  tags: string[];
263
263
  contentHash: string;
264
+ description?: string;
265
+ keywords?: string[];
264
266
  }
265
267
  interface VectorRecord {
266
268
  id: string;
@@ -282,6 +284,8 @@ interface VectorRecord {
282
284
  incomingLinks: number;
283
285
  routeFile: string;
284
286
  tags: string[];
287
+ description?: string;
288
+ keywords?: string[];
285
289
  };
286
290
  }
287
291
  interface QueryOpts {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "searchsocket",
3
- "version": "0.3.0",
3
+ "version": "0.3.1",
4
4
  "description": "Semantic site search and MCP retrieval for SvelteKit static sites",
5
5
  "license": "MIT",
6
6
  "author": "Greg Priday <greg@siteorigin.com>",