searchsocket 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -12,7 +12,7 @@ import { Command } from "commander";
12
12
  // package.json
13
13
  var package_default = {
14
14
  name: "searchsocket",
15
- version: "0.3.0",
15
+ version: "0.3.2",
16
16
  description: "Semantic site search and MCP retrieval for SvelteKit static sites",
17
17
  license: "MIT",
18
18
  author: "Greg Priday <greg@siteorigin.com>",
@@ -189,7 +189,7 @@ var searchSocketConfigSchema = z.object({
189
189
  ranking: z.object({
190
190
  enableIncomingLinkBoost: z.boolean().optional(),
191
191
  enableDepthBoost: z.boolean().optional(),
192
- pageWeights: z.record(z.string(), z.number().positive()).optional(),
192
+ pageWeights: z.record(z.string(), z.number().min(0)).optional(),
193
193
  aggregationCap: z.number().int().positive().optional(),
194
194
  aggregationDecay: z.number().min(0).max(1).optional(),
195
195
  minChunkScoreRatio: z.number().min(0).max(1).optional(),
@@ -913,23 +913,12 @@ var TursoVectorStore = class {
913
913
  incoming_links INTEGER NOT NULL DEFAULT 0,
914
914
  route_file TEXT NOT NULL DEFAULT '',
915
915
  tags TEXT NOT NULL DEFAULT '[]',
916
+ description TEXT NOT NULL DEFAULT '',
917
+ keywords TEXT NOT NULL DEFAULT '[]',
916
918
  embedding F32_BLOB(${dim})
917
919
  )`,
918
920
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
919
921
  ]);
920
- const chunkMigrationCols = [
921
- { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
922
- { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
923
- ];
924
- for (const col of chunkMigrationCols) {
925
- try {
926
- await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
927
- } catch (error) {
928
- if (error instanceof Error && !error.message.includes("duplicate column")) {
929
- throw error;
930
- }
931
- }
932
- }
933
922
  this.chunksReady = true;
934
923
  }
935
924
  async ensurePages() {
@@ -1007,8 +996,8 @@ var TursoVectorStore = class {
1007
996
  sql: `INSERT OR REPLACE INTO chunks
1008
997
  (id, project_id, scope_name, url, path, title, section_title,
1009
998
  heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
1010
- incoming_links, route_file, tags, embedding)
1011
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
999
+ incoming_links, route_file, tags, description, keywords, embedding)
1000
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
1012
1001
  args: [
1013
1002
  r.id,
1014
1003
  r.metadata.projectId,
@@ -1027,6 +1016,8 @@ var TursoVectorStore = class {
1027
1016
  r.metadata.incomingLinks,
1028
1017
  r.metadata.routeFile,
1029
1018
  JSON.stringify(r.metadata.tags),
1019
+ r.metadata.description ?? "",
1020
+ JSON.stringify(r.metadata.keywords ?? []),
1030
1021
  JSON.stringify(r.vector)
1031
1022
  ]
1032
1023
  }));
@@ -1042,6 +1033,7 @@ var TursoVectorStore = class {
1042
1033
  c.section_title, c.heading_path, c.snippet, c.chunk_text,
1043
1034
  c.ordinal, c.content_hash,
1044
1035
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
1036
+ c.description, c.keywords,
1045
1037
  vector_distance_cos(c.embedding, vector(?)) AS distance
1046
1038
  FROM vector_top_k('idx', vector(?), ?) AS v
1047
1039
  JOIN chunks AS c ON c.rowid = v.id`,
@@ -1072,6 +1064,12 @@ var TursoVectorStore = class {
1072
1064
  }
1073
1065
  const distance = row.distance;
1074
1066
  const score = 1 - distance;
1067
+ const description = row.description || void 0;
1068
+ const keywords = (() => {
1069
+ const raw = row.keywords || "[]";
1070
+ const parsed = JSON.parse(raw);
1071
+ return parsed.length > 0 ? parsed : void 0;
1072
+ })();
1075
1073
  hits.push({
1076
1074
  id: row.id,
1077
1075
  score,
@@ -1091,7 +1089,9 @@ var TursoVectorStore = class {
1091
1089
  depth: row.depth,
1092
1090
  incomingLinks: row.incoming_links,
1093
1091
  routeFile: row.route_file,
1094
- tags
1092
+ tags,
1093
+ description,
1094
+ keywords
1095
1095
  }
1096
1096
  });
1097
1097
  }
@@ -1636,7 +1636,9 @@ function chunkMirrorPage(page, config, scope) {
1636
1636
  incomingLinks: page.incomingLinks,
1637
1637
  routeFile: page.routeFile,
1638
1638
  tags: page.tags,
1639
- contentHash: ""
1639
+ contentHash: "",
1640
+ description: page.description,
1641
+ keywords: page.keywords
1640
1642
  };
1641
1643
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
1642
1644
  summaryChunk.contentHash = sha256(normalizeText(embeddingText));
@@ -1663,7 +1665,9 @@ function chunkMirrorPage(page, config, scope) {
1663
1665
  incomingLinks: page.incomingLinks,
1664
1666
  routeFile: page.routeFile,
1665
1667
  tags: page.tags,
1666
- contentHash: ""
1668
+ contentHash: "",
1669
+ description: page.description,
1670
+ keywords: page.keywords
1667
1671
  };
1668
1672
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
1669
1673
  chunk.contentHash = sha256(normalizeText(embeddingText));
@@ -1696,7 +1700,7 @@ function extractFromHtml(url, html, config) {
1696
1700
  const $ = load(html);
1697
1701
  const normalizedUrl = normalizeUrlPath(url);
1698
1702
  const pageBaseUrl = new URL(`https://searchsocket.local${normalizedUrl}`);
1699
- const title = normalizeText($("title").first().text() || "") || normalizeText($(`${config.extract.mainSelector} h1`).first().text() || "") || normalizedUrl;
1703
+ const title = $("meta[property='og:title']").attr("content")?.trim() || normalizeText($(`${config.extract.mainSelector} h1`).first().text() || "") || $("meta[name='twitter:title']").attr("content")?.trim() || normalizeText($("title").first().text() || "") || normalizedUrl;
1700
1704
  if (config.extract.respectRobotsNoindex) {
1701
1705
  const robots = $("meta[name='robots']").attr("content") ?? "";
1702
1706
  if (/\bnoindex\b/i.test(robots)) {
@@ -2774,7 +2778,9 @@ var IndexPipeline = class _IndexPipeline {
2774
2778
  depth: chunk.depth,
2775
2779
  incomingLinks: chunk.incomingLinks,
2776
2780
  routeFile: chunk.routeFile,
2777
- tags: chunk.tags
2781
+ tags: chunk.tags,
2782
+ description: chunk.description,
2783
+ keywords: chunk.keywords
2778
2784
  }
2779
2785
  });
2780
2786
  }
@@ -3004,6 +3010,7 @@ function aggregateByPage(ranked, config) {
3004
3010
  }
3005
3011
  let pageScore = maxScore + aggregationBonus * config.ranking.weights.aggregation;
3006
3012
  const pageWeight = findPageWeight(url, config.ranking.pageWeights);
3013
+ if (pageWeight === 0) continue;
3007
3014
  if (pageWeight !== 1) {
3008
3015
  pageScore *= pageWeight;
3009
3016
  }
@@ -3226,16 +3233,29 @@ var SearchEngine = class _SearchEngine {
3226
3233
  if (group) group.push(entry);
3227
3234
  else pageGroups.set(url, [entry]);
3228
3235
  }
3236
+ const MAX_CHUNKS_PER_PAGE = 5;
3237
+ const MIN_CHUNKS_PER_PAGE = 1;
3238
+ const MIN_CHUNK_SCORE_RATIO = 0.5;
3229
3239
  const pageCandidates = [];
3230
3240
  for (const [url, chunks] of pageGroups) {
3231
- const sorted = [...chunks].sort(
3232
- (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
3233
- );
3234
- const title = sorted[0].hit.metadata.title;
3235
- const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
3236
- pageCandidates.push({ id: url, text: `${title}
3237
-
3238
- ${body}` });
3241
+ const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
3242
+ const bestScore = byScore[0].finalScore;
3243
+ const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
3244
+ const selected = byScore.filter(
3245
+ (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
3246
+ ).slice(0, MAX_CHUNKS_PER_PAGE);
3247
+ selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
3248
+ const first = selected[0].hit.metadata;
3249
+ const parts = [first.title];
3250
+ if (first.description) {
3251
+ parts.push(first.description);
3252
+ }
3253
+ if (first.keywords && first.keywords.length > 0) {
3254
+ parts.push(first.keywords.join(", "));
3255
+ }
3256
+ const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
3257
+ parts.push(body);
3258
+ pageCandidates.push({ id: url, text: parts.join("\n\n") });
3239
3259
  }
3240
3260
  const reranked = await this.reranker.rerank(
3241
3261
  query,
package/dist/client.d.cts CHANGED
@@ -1,4 +1,4 @@
1
- import { S as SearchRequest, a as SearchResponse } from './types-DAXk6A3Y.cjs';
1
+ import { S as SearchRequest, a as SearchResponse } from './types-BrG6XTUU.cjs';
2
2
 
3
3
  interface SearchClientOptions {
4
4
  endpoint?: string;
package/dist/client.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { S as SearchRequest, a as SearchResponse } from './types-DAXk6A3Y.js';
1
+ import { S as SearchRequest, a as SearchResponse } from './types-BrG6XTUU.js';
2
2
 
3
3
  interface SearchClientOptions {
4
4
  endpoint?: string;
package/dist/index.cjs CHANGED
@@ -16688,7 +16688,7 @@ var searchSocketConfigSchema = zod.z.object({
16688
16688
  ranking: zod.z.object({
16689
16689
  enableIncomingLinkBoost: zod.z.boolean().optional(),
16690
16690
  enableDepthBoost: zod.z.boolean().optional(),
16691
- pageWeights: zod.z.record(zod.z.string(), zod.z.number().positive()).optional(),
16691
+ pageWeights: zod.z.record(zod.z.string(), zod.z.number().min(0)).optional(),
16692
16692
  aggregationCap: zod.z.number().int().positive().optional(),
16693
16693
  aggregationDecay: zod.z.number().min(0).max(1).optional(),
16694
16694
  minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
@@ -17435,23 +17435,12 @@ var TursoVectorStore = class {
17435
17435
  incoming_links INTEGER NOT NULL DEFAULT 0,
17436
17436
  route_file TEXT NOT NULL DEFAULT '',
17437
17437
  tags TEXT NOT NULL DEFAULT '[]',
17438
+ description TEXT NOT NULL DEFAULT '',
17439
+ keywords TEXT NOT NULL DEFAULT '[]',
17438
17440
  embedding F32_BLOB(${dim})
17439
17441
  )`,
17440
17442
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17441
17443
  ]);
17442
- const chunkMigrationCols = [
17443
- { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
17444
- { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
17445
- ];
17446
- for (const col of chunkMigrationCols) {
17447
- try {
17448
- await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
17449
- } catch (error) {
17450
- if (error instanceof Error && !error.message.includes("duplicate column")) {
17451
- throw error;
17452
- }
17453
- }
17454
- }
17455
17444
  this.chunksReady = true;
17456
17445
  }
17457
17446
  async ensurePages() {
@@ -17529,8 +17518,8 @@ var TursoVectorStore = class {
17529
17518
  sql: `INSERT OR REPLACE INTO chunks
17530
17519
  (id, project_id, scope_name, url, path, title, section_title,
17531
17520
  heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17532
- incoming_links, route_file, tags, embedding)
17533
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17521
+ incoming_links, route_file, tags, description, keywords, embedding)
17522
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17534
17523
  args: [
17535
17524
  r.id,
17536
17525
  r.metadata.projectId,
@@ -17549,6 +17538,8 @@ var TursoVectorStore = class {
17549
17538
  r.metadata.incomingLinks,
17550
17539
  r.metadata.routeFile,
17551
17540
  JSON.stringify(r.metadata.tags),
17541
+ r.metadata.description ?? "",
17542
+ JSON.stringify(r.metadata.keywords ?? []),
17552
17543
  JSON.stringify(r.vector)
17553
17544
  ]
17554
17545
  }));
@@ -17564,6 +17555,7 @@ var TursoVectorStore = class {
17564
17555
  c.section_title, c.heading_path, c.snippet, c.chunk_text,
17565
17556
  c.ordinal, c.content_hash,
17566
17557
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17558
+ c.description, c.keywords,
17567
17559
  vector_distance_cos(c.embedding, vector(?)) AS distance
17568
17560
  FROM vector_top_k('idx', vector(?), ?) AS v
17569
17561
  JOIN chunks AS c ON c.rowid = v.id`,
@@ -17594,6 +17586,12 @@ var TursoVectorStore = class {
17594
17586
  }
17595
17587
  const distance = row.distance;
17596
17588
  const score = 1 - distance;
17589
+ const description = row.description || void 0;
17590
+ const keywords = (() => {
17591
+ const raw = row.keywords || "[]";
17592
+ const parsed = JSON.parse(raw);
17593
+ return parsed.length > 0 ? parsed : void 0;
17594
+ })();
17597
17595
  hits.push({
17598
17596
  id: row.id,
17599
17597
  score,
@@ -17613,7 +17611,9 @@ var TursoVectorStore = class {
17613
17611
  depth: row.depth,
17614
17612
  incomingLinks: row.incoming_links,
17615
17613
  routeFile: row.route_file,
17616
- tags
17614
+ tags,
17615
+ description,
17616
+ keywords
17617
17617
  }
17618
17618
  });
17619
17619
  }
@@ -18152,7 +18152,9 @@ function chunkMirrorPage(page, config, scope) {
18152
18152
  incomingLinks: page.incomingLinks,
18153
18153
  routeFile: page.routeFile,
18154
18154
  tags: page.tags,
18155
- contentHash: ""
18155
+ contentHash: "",
18156
+ description: page.description,
18157
+ keywords: page.keywords
18156
18158
  };
18157
18159
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
18158
18160
  summaryChunk.contentHash = sha256(normalizeText(embeddingText));
@@ -18179,7 +18181,9 @@ function chunkMirrorPage(page, config, scope) {
18179
18181
  incomingLinks: page.incomingLinks,
18180
18182
  routeFile: page.routeFile,
18181
18183
  tags: page.tags,
18182
- contentHash: ""
18184
+ contentHash: "",
18185
+ description: page.description,
18186
+ keywords: page.keywords
18183
18187
  };
18184
18188
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
18185
18189
  chunk.contentHash = sha256(normalizeText(embeddingText));
@@ -19034,7 +19038,7 @@ function extractFromHtml(url, html, config) {
19034
19038
  const $ = cheerio.load(html);
19035
19039
  const normalizedUrl = normalizeUrlPath(url);
19036
19040
  const pageBaseUrl = new URL(`https://searchsocket.local${normalizedUrl}`);
19037
- const title = normalizeText($("title").first().text() || "") || normalizeText($(`${config.extract.mainSelector} h1`).first().text() || "") || normalizedUrl;
19041
+ const title = $("meta[property='og:title']").attr("content")?.trim() || normalizeText($(`${config.extract.mainSelector} h1`).first().text() || "") || $("meta[name='twitter:title']").attr("content")?.trim() || normalizeText($("title").first().text() || "") || normalizedUrl;
19038
19042
  if (config.extract.respectRobotsNoindex) {
19039
19043
  const robots = $("meta[name='robots']").attr("content") ?? "";
19040
19044
  if (/\bnoindex\b/i.test(robots)) {
@@ -20150,7 +20154,9 @@ var IndexPipeline = class _IndexPipeline {
20150
20154
  depth: chunk.depth,
20151
20155
  incomingLinks: chunk.incomingLinks,
20152
20156
  routeFile: chunk.routeFile,
20153
- tags: chunk.tags
20157
+ tags: chunk.tags,
20158
+ description: chunk.description,
20159
+ keywords: chunk.keywords
20154
20160
  }
20155
20161
  });
20156
20162
  }
@@ -20273,6 +20279,7 @@ function aggregateByPage(ranked, config) {
20273
20279
  }
20274
20280
  let pageScore = maxScore + aggregationBonus * config.ranking.weights.aggregation;
20275
20281
  const pageWeight = findPageWeight(url, config.ranking.pageWeights);
20282
+ if (pageWeight === 0) continue;
20276
20283
  if (pageWeight !== 1) {
20277
20284
  pageScore *= pageWeight;
20278
20285
  }
@@ -20495,16 +20502,29 @@ var SearchEngine = class _SearchEngine {
20495
20502
  if (group) group.push(entry);
20496
20503
  else pageGroups.set(url, [entry]);
20497
20504
  }
20505
+ const MAX_CHUNKS_PER_PAGE = 5;
20506
+ const MIN_CHUNKS_PER_PAGE = 1;
20507
+ const MIN_CHUNK_SCORE_RATIO = 0.5;
20498
20508
  const pageCandidates = [];
20499
20509
  for (const [url, chunks] of pageGroups) {
20500
- const sorted = [...chunks].sort(
20501
- (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
20502
- );
20503
- const title = sorted[0].hit.metadata.title;
20504
- const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
20505
- pageCandidates.push({ id: url, text: `${title}
20506
-
20507
- ${body}` });
20510
+ const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
20511
+ const bestScore = byScore[0].finalScore;
20512
+ const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
20513
+ const selected = byScore.filter(
20514
+ (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
20515
+ ).slice(0, MAX_CHUNKS_PER_PAGE);
20516
+ selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
20517
+ const first = selected[0].hit.metadata;
20518
+ const parts = [first.title];
20519
+ if (first.description) {
20520
+ parts.push(first.description);
20521
+ }
20522
+ if (first.keywords && first.keywords.length > 0) {
20523
+ parts.push(first.keywords.join(", "));
20524
+ }
20525
+ const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
20526
+ parts.push(body);
20527
+ pageCandidates.push({ id: url, text: parts.join("\n\n") });
20508
20528
  }
20509
20529
  const reranked = await this.reranker.rerank(
20510
20530
  query,
package/dist/index.d.cts CHANGED
@@ -1,5 +1,5 @@
1
- import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig, c as Scope, E as EmbeddingsProvider, d as Reranker, e as RerankCandidate, V as VectorStore, I as IndexOptions, f as IndexStats, S as SearchRequest, a as SearchResponse } from './types-DAXk6A3Y.cjs';
2
- export { C as Chunk, Q as QueryOpts, g as VectorHit, h as VectorRecord } from './types-DAXk6A3Y.cjs';
1
+ import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig, c as Scope, E as EmbeddingsProvider, d as Reranker, e as RerankCandidate, V as VectorStore, I as IndexOptions, f as IndexStats, S as SearchRequest, a as SearchResponse } from './types-BrG6XTUU.cjs';
2
+ export { C as Chunk, Q as QueryOpts, g as VectorHit, h as VectorRecord } from './types-BrG6XTUU.cjs';
3
3
  export { searchsocketHandle, searchsocketVitePlugin } from './sveltekit.cjs';
4
4
  export { createSearchClient } from './client.cjs';
5
5
 
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig, c as Scope, E as EmbeddingsProvider, d as Reranker, e as RerankCandidate, V as VectorStore, I as IndexOptions, f as IndexStats, S as SearchRequest, a as SearchResponse } from './types-DAXk6A3Y.js';
2
- export { C as Chunk, Q as QueryOpts, g as VectorHit, h as VectorRecord } from './types-DAXk6A3Y.js';
1
+ import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig, c as Scope, E as EmbeddingsProvider, d as Reranker, e as RerankCandidate, V as VectorStore, I as IndexOptions, f as IndexStats, S as SearchRequest, a as SearchResponse } from './types-BrG6XTUU.js';
2
+ export { C as Chunk, Q as QueryOpts, g as VectorHit, h as VectorRecord } from './types-BrG6XTUU.js';
3
3
  export { searchsocketHandle, searchsocketVitePlugin } from './sveltekit.js';
4
4
  export { createSearchClient } from './client.js';
5
5
 
package/dist/index.js CHANGED
@@ -16676,7 +16676,7 @@ var searchSocketConfigSchema = z.object({
16676
16676
  ranking: z.object({
16677
16677
  enableIncomingLinkBoost: z.boolean().optional(),
16678
16678
  enableDepthBoost: z.boolean().optional(),
16679
- pageWeights: z.record(z.string(), z.number().positive()).optional(),
16679
+ pageWeights: z.record(z.string(), z.number().min(0)).optional(),
16680
16680
  aggregationCap: z.number().int().positive().optional(),
16681
16681
  aggregationDecay: z.number().min(0).max(1).optional(),
16682
16682
  minChunkScoreRatio: z.number().min(0).max(1).optional(),
@@ -17423,23 +17423,12 @@ var TursoVectorStore = class {
17423
17423
  incoming_links INTEGER NOT NULL DEFAULT 0,
17424
17424
  route_file TEXT NOT NULL DEFAULT '',
17425
17425
  tags TEXT NOT NULL DEFAULT '[]',
17426
+ description TEXT NOT NULL DEFAULT '',
17427
+ keywords TEXT NOT NULL DEFAULT '[]',
17426
17428
  embedding F32_BLOB(${dim})
17427
17429
  )`,
17428
17430
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17429
17431
  ]);
17430
- const chunkMigrationCols = [
17431
- { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
17432
- { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
17433
- ];
17434
- for (const col of chunkMigrationCols) {
17435
- try {
17436
- await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
17437
- } catch (error) {
17438
- if (error instanceof Error && !error.message.includes("duplicate column")) {
17439
- throw error;
17440
- }
17441
- }
17442
- }
17443
17432
  this.chunksReady = true;
17444
17433
  }
17445
17434
  async ensurePages() {
@@ -17517,8 +17506,8 @@ var TursoVectorStore = class {
17517
17506
  sql: `INSERT OR REPLACE INTO chunks
17518
17507
  (id, project_id, scope_name, url, path, title, section_title,
17519
17508
  heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17520
- incoming_links, route_file, tags, embedding)
17521
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17509
+ incoming_links, route_file, tags, description, keywords, embedding)
17510
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17522
17511
  args: [
17523
17512
  r.id,
17524
17513
  r.metadata.projectId,
@@ -17537,6 +17526,8 @@ var TursoVectorStore = class {
17537
17526
  r.metadata.incomingLinks,
17538
17527
  r.metadata.routeFile,
17539
17528
  JSON.stringify(r.metadata.tags),
17529
+ r.metadata.description ?? "",
17530
+ JSON.stringify(r.metadata.keywords ?? []),
17540
17531
  JSON.stringify(r.vector)
17541
17532
  ]
17542
17533
  }));
@@ -17552,6 +17543,7 @@ var TursoVectorStore = class {
17552
17543
  c.section_title, c.heading_path, c.snippet, c.chunk_text,
17553
17544
  c.ordinal, c.content_hash,
17554
17545
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17546
+ c.description, c.keywords,
17555
17547
  vector_distance_cos(c.embedding, vector(?)) AS distance
17556
17548
  FROM vector_top_k('idx', vector(?), ?) AS v
17557
17549
  JOIN chunks AS c ON c.rowid = v.id`,
@@ -17582,6 +17574,12 @@ var TursoVectorStore = class {
17582
17574
  }
17583
17575
  const distance = row.distance;
17584
17576
  const score = 1 - distance;
17577
+ const description = row.description || void 0;
17578
+ const keywords = (() => {
17579
+ const raw = row.keywords || "[]";
17580
+ const parsed = JSON.parse(raw);
17581
+ return parsed.length > 0 ? parsed : void 0;
17582
+ })();
17585
17583
  hits.push({
17586
17584
  id: row.id,
17587
17585
  score,
@@ -17601,7 +17599,9 @@ var TursoVectorStore = class {
17601
17599
  depth: row.depth,
17602
17600
  incomingLinks: row.incoming_links,
17603
17601
  routeFile: row.route_file,
17604
- tags
17602
+ tags,
17603
+ description,
17604
+ keywords
17605
17605
  }
17606
17606
  });
17607
17607
  }
@@ -18140,7 +18140,9 @@ function chunkMirrorPage(page, config, scope) {
18140
18140
  incomingLinks: page.incomingLinks,
18141
18141
  routeFile: page.routeFile,
18142
18142
  tags: page.tags,
18143
- contentHash: ""
18143
+ contentHash: "",
18144
+ description: page.description,
18145
+ keywords: page.keywords
18144
18146
  };
18145
18147
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
18146
18148
  summaryChunk.contentHash = sha256(normalizeText(embeddingText));
@@ -18167,7 +18169,9 @@ function chunkMirrorPage(page, config, scope) {
18167
18169
  incomingLinks: page.incomingLinks,
18168
18170
  routeFile: page.routeFile,
18169
18171
  tags: page.tags,
18170
- contentHash: ""
18172
+ contentHash: "",
18173
+ description: page.description,
18174
+ keywords: page.keywords
18171
18175
  };
18172
18176
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
18173
18177
  chunk.contentHash = sha256(normalizeText(embeddingText));
@@ -19022,7 +19026,7 @@ function extractFromHtml(url, html, config) {
19022
19026
  const $ = load(html);
19023
19027
  const normalizedUrl = normalizeUrlPath(url);
19024
19028
  const pageBaseUrl = new URL(`https://searchsocket.local${normalizedUrl}`);
19025
- const title = normalizeText($("title").first().text() || "") || normalizeText($(`${config.extract.mainSelector} h1`).first().text() || "") || normalizedUrl;
19029
+ const title = $("meta[property='og:title']").attr("content")?.trim() || normalizeText($(`${config.extract.mainSelector} h1`).first().text() || "") || $("meta[name='twitter:title']").attr("content")?.trim() || normalizeText($("title").first().text() || "") || normalizedUrl;
19026
19030
  if (config.extract.respectRobotsNoindex) {
19027
19031
  const robots = $("meta[name='robots']").attr("content") ?? "";
19028
19032
  if (/\bnoindex\b/i.test(robots)) {
@@ -20138,7 +20142,9 @@ var IndexPipeline = class _IndexPipeline {
20138
20142
  depth: chunk.depth,
20139
20143
  incomingLinks: chunk.incomingLinks,
20140
20144
  routeFile: chunk.routeFile,
20141
- tags: chunk.tags
20145
+ tags: chunk.tags,
20146
+ description: chunk.description,
20147
+ keywords: chunk.keywords
20142
20148
  }
20143
20149
  });
20144
20150
  }
@@ -20261,6 +20267,7 @@ function aggregateByPage(ranked, config) {
20261
20267
  }
20262
20268
  let pageScore = maxScore + aggregationBonus * config.ranking.weights.aggregation;
20263
20269
  const pageWeight = findPageWeight(url, config.ranking.pageWeights);
20270
+ if (pageWeight === 0) continue;
20264
20271
  if (pageWeight !== 1) {
20265
20272
  pageScore *= pageWeight;
20266
20273
  }
@@ -20483,16 +20490,29 @@ var SearchEngine = class _SearchEngine {
20483
20490
  if (group) group.push(entry);
20484
20491
  else pageGroups.set(url, [entry]);
20485
20492
  }
20493
+ const MAX_CHUNKS_PER_PAGE = 5;
20494
+ const MIN_CHUNKS_PER_PAGE = 1;
20495
+ const MIN_CHUNK_SCORE_RATIO = 0.5;
20486
20496
  const pageCandidates = [];
20487
20497
  for (const [url, chunks] of pageGroups) {
20488
- const sorted = [...chunks].sort(
20489
- (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
20490
- );
20491
- const title = sorted[0].hit.metadata.title;
20492
- const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
20493
- pageCandidates.push({ id: url, text: `${title}
20494
-
20495
- ${body}` });
20498
+ const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
20499
+ const bestScore = byScore[0].finalScore;
20500
+ const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
20501
+ const selected = byScore.filter(
20502
+ (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
20503
+ ).slice(0, MAX_CHUNKS_PER_PAGE);
20504
+ selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
20505
+ const first = selected[0].hit.metadata;
20506
+ const parts = [first.title];
20507
+ if (first.description) {
20508
+ parts.push(first.description);
20509
+ }
20510
+ if (first.keywords && first.keywords.length > 0) {
20511
+ parts.push(first.keywords.join(", "));
20512
+ }
20513
+ const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
20514
+ parts.push(body);
20515
+ pageCandidates.push({ id: url, text: parts.join("\n\n") });
20496
20516
  }
20497
20517
  const reranked = await this.reranker.rerank(
20498
20518
  query,
@@ -16684,7 +16684,7 @@ var searchSocketConfigSchema = zod.z.object({
16684
16684
  ranking: zod.z.object({
16685
16685
  enableIncomingLinkBoost: zod.z.boolean().optional(),
16686
16686
  enableDepthBoost: zod.z.boolean().optional(),
16687
- pageWeights: zod.z.record(zod.z.string(), zod.z.number().positive()).optional(),
16687
+ pageWeights: zod.z.record(zod.z.string(), zod.z.number().min(0)).optional(),
16688
16688
  aggregationCap: zod.z.number().int().positive().optional(),
16689
16689
  aggregationDecay: zod.z.number().min(0).max(1).optional(),
16690
16690
  minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
@@ -17468,23 +17468,12 @@ var TursoVectorStore = class {
17468
17468
  incoming_links INTEGER NOT NULL DEFAULT 0,
17469
17469
  route_file TEXT NOT NULL DEFAULT '',
17470
17470
  tags TEXT NOT NULL DEFAULT '[]',
17471
+ description TEXT NOT NULL DEFAULT '',
17472
+ keywords TEXT NOT NULL DEFAULT '[]',
17471
17473
  embedding F32_BLOB(${dim})
17472
17474
  )`,
17473
17475
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17474
17476
  ]);
17475
- const chunkMigrationCols = [
17476
- { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
17477
- { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
17478
- ];
17479
- for (const col of chunkMigrationCols) {
17480
- try {
17481
- await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
17482
- } catch (error) {
17483
- if (error instanceof Error && !error.message.includes("duplicate column")) {
17484
- throw error;
17485
- }
17486
- }
17487
- }
17488
17477
  this.chunksReady = true;
17489
17478
  }
17490
17479
  async ensurePages() {
@@ -17562,8 +17551,8 @@ var TursoVectorStore = class {
17562
17551
  sql: `INSERT OR REPLACE INTO chunks
17563
17552
  (id, project_id, scope_name, url, path, title, section_title,
17564
17553
  heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17565
- incoming_links, route_file, tags, embedding)
17566
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17554
+ incoming_links, route_file, tags, description, keywords, embedding)
17555
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17567
17556
  args: [
17568
17557
  r.id,
17569
17558
  r.metadata.projectId,
@@ -17582,6 +17571,8 @@ var TursoVectorStore = class {
17582
17571
  r.metadata.incomingLinks,
17583
17572
  r.metadata.routeFile,
17584
17573
  JSON.stringify(r.metadata.tags),
17574
+ r.metadata.description ?? "",
17575
+ JSON.stringify(r.metadata.keywords ?? []),
17585
17576
  JSON.stringify(r.vector)
17586
17577
  ]
17587
17578
  }));
@@ -17597,6 +17588,7 @@ var TursoVectorStore = class {
17597
17588
  c.section_title, c.heading_path, c.snippet, c.chunk_text,
17598
17589
  c.ordinal, c.content_hash,
17599
17590
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17591
+ c.description, c.keywords,
17600
17592
  vector_distance_cos(c.embedding, vector(?)) AS distance
17601
17593
  FROM vector_top_k('idx', vector(?), ?) AS v
17602
17594
  JOIN chunks AS c ON c.rowid = v.id`,
@@ -17627,6 +17619,12 @@ var TursoVectorStore = class {
17627
17619
  }
17628
17620
  const distance = row.distance;
17629
17621
  const score = 1 - distance;
17622
+ const description = row.description || void 0;
17623
+ const keywords = (() => {
17624
+ const raw = row.keywords || "[]";
17625
+ const parsed = JSON.parse(raw);
17626
+ return parsed.length > 0 ? parsed : void 0;
17627
+ })();
17630
17628
  hits.push({
17631
17629
  id: row.id,
17632
17630
  score,
@@ -17646,7 +17644,9 @@ var TursoVectorStore = class {
17646
17644
  depth: row.depth,
17647
17645
  incomingLinks: row.incoming_links,
17648
17646
  routeFile: row.route_file,
17649
- tags
17647
+ tags,
17648
+ description,
17649
+ keywords
17650
17650
  }
17651
17651
  });
17652
17652
  }
@@ -17940,6 +17940,7 @@ function aggregateByPage(ranked, config) {
17940
17940
  }
17941
17941
  let pageScore = maxScore + aggregationBonus * config.ranking.weights.aggregation;
17942
17942
  const pageWeight = findPageWeight(url, config.ranking.pageWeights);
17943
+ if (pageWeight === 0) continue;
17943
17944
  if (pageWeight !== 1) {
17944
17945
  pageScore *= pageWeight;
17945
17946
  }
@@ -18162,16 +18163,29 @@ var SearchEngine = class _SearchEngine {
18162
18163
  if (group) group.push(entry);
18163
18164
  else pageGroups.set(url, [entry]);
18164
18165
  }
18166
+ const MAX_CHUNKS_PER_PAGE = 5;
18167
+ const MIN_CHUNKS_PER_PAGE = 1;
18168
+ const MIN_CHUNK_SCORE_RATIO = 0.5;
18165
18169
  const pageCandidates = [];
18166
18170
  for (const [url, chunks] of pageGroups) {
18167
- const sorted = [...chunks].sort(
18168
- (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
18169
- );
18170
- const title = sorted[0].hit.metadata.title;
18171
- const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
18172
- pageCandidates.push({ id: url, text: `${title}
18173
-
18174
- ${body}` });
18171
+ const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
18172
+ const bestScore = byScore[0].finalScore;
18173
+ const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
18174
+ const selected = byScore.filter(
18175
+ (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
18176
+ ).slice(0, MAX_CHUNKS_PER_PAGE);
18177
+ selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
18178
+ const first = selected[0].hit.metadata;
18179
+ const parts = [first.title];
18180
+ if (first.description) {
18181
+ parts.push(first.description);
18182
+ }
18183
+ if (first.keywords && first.keywords.length > 0) {
18184
+ parts.push(first.keywords.join(", "));
18185
+ }
18186
+ const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
18187
+ parts.push(body);
18188
+ pageCandidates.push({ id: url, text: parts.join("\n\n") });
18175
18189
  }
18176
18190
  const reranked = await this.reranker.rerank(
18177
18191
  query,
@@ -18681,7 +18695,9 @@ function chunkMirrorPage(page, config, scope) {
18681
18695
  incomingLinks: page.incomingLinks,
18682
18696
  routeFile: page.routeFile,
18683
18697
  tags: page.tags,
18684
- contentHash: ""
18698
+ contentHash: "",
18699
+ description: page.description,
18700
+ keywords: page.keywords
18685
18701
  };
18686
18702
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
18687
18703
  summaryChunk.contentHash = sha256(normalizeText(embeddingText));
@@ -18708,7 +18724,9 @@ function chunkMirrorPage(page, config, scope) {
18708
18724
  incomingLinks: page.incomingLinks,
18709
18725
  routeFile: page.routeFile,
18710
18726
  tags: page.tags,
18711
- contentHash: ""
18727
+ contentHash: "",
18728
+ description: page.description,
18729
+ keywords: page.keywords
18712
18730
  };
18713
18731
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
18714
18732
  chunk.contentHash = sha256(normalizeText(embeddingText));
@@ -19563,7 +19581,7 @@ function extractFromHtml(url, html, config) {
19563
19581
  const $ = cheerio.load(html);
19564
19582
  const normalizedUrl = normalizeUrlPath(url);
19565
19583
  const pageBaseUrl = new URL(`https://searchsocket.local${normalizedUrl}`);
19566
- const title = normalizeText($("title").first().text() || "") || normalizeText($(`${config.extract.mainSelector} h1`).first().text() || "") || normalizedUrl;
19584
+ const title = $("meta[property='og:title']").attr("content")?.trim() || normalizeText($(`${config.extract.mainSelector} h1`).first().text() || "") || $("meta[name='twitter:title']").attr("content")?.trim() || normalizeText($("title").first().text() || "") || normalizedUrl;
19567
19585
  if (config.extract.respectRobotsNoindex) {
19568
19586
  const robots = $("meta[name='robots']").attr("content") ?? "";
19569
19587
  if (/\bnoindex\b/i.test(robots)) {
@@ -20671,7 +20689,9 @@ var IndexPipeline = class _IndexPipeline {
20671
20689
  depth: chunk.depth,
20672
20690
  incomingLinks: chunk.incomingLinks,
20673
20691
  routeFile: chunk.routeFile,
20674
- tags: chunk.tags
20692
+ tags: chunk.tags,
20693
+ description: chunk.description,
20694
+ keywords: chunk.keywords
20675
20695
  }
20676
20696
  });
20677
20697
  }
@@ -1,4 +1,4 @@
1
- import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig } from './types-DAXk6A3Y.cjs';
1
+ import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig } from './types-BrG6XTUU.cjs';
2
2
 
3
3
  interface SearchSocketHandleOptions {
4
4
  configPath?: string;
@@ -1,4 +1,4 @@
1
- import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig } from './types-DAXk6A3Y.js';
1
+ import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig } from './types-BrG6XTUU.js';
2
2
 
3
3
  interface SearchSocketHandleOptions {
4
4
  configPath?: string;
package/dist/sveltekit.js CHANGED
@@ -16672,7 +16672,7 @@ var searchSocketConfigSchema = z.object({
16672
16672
  ranking: z.object({
16673
16673
  enableIncomingLinkBoost: z.boolean().optional(),
16674
16674
  enableDepthBoost: z.boolean().optional(),
16675
- pageWeights: z.record(z.string(), z.number().positive()).optional(),
16675
+ pageWeights: z.record(z.string(), z.number().min(0)).optional(),
16676
16676
  aggregationCap: z.number().int().positive().optional(),
16677
16677
  aggregationDecay: z.number().min(0).max(1).optional(),
16678
16678
  minChunkScoreRatio: z.number().min(0).max(1).optional(),
@@ -17456,23 +17456,12 @@ var TursoVectorStore = class {
17456
17456
  incoming_links INTEGER NOT NULL DEFAULT 0,
17457
17457
  route_file TEXT NOT NULL DEFAULT '',
17458
17458
  tags TEXT NOT NULL DEFAULT '[]',
17459
+ description TEXT NOT NULL DEFAULT '',
17460
+ keywords TEXT NOT NULL DEFAULT '[]',
17459
17461
  embedding F32_BLOB(${dim})
17460
17462
  )`,
17461
17463
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17462
17464
  ]);
17463
- const chunkMigrationCols = [
17464
- { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
17465
- { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
17466
- ];
17467
- for (const col of chunkMigrationCols) {
17468
- try {
17469
- await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
17470
- } catch (error) {
17471
- if (error instanceof Error && !error.message.includes("duplicate column")) {
17472
- throw error;
17473
- }
17474
- }
17475
- }
17476
17465
  this.chunksReady = true;
17477
17466
  }
17478
17467
  async ensurePages() {
@@ -17550,8 +17539,8 @@ var TursoVectorStore = class {
17550
17539
  sql: `INSERT OR REPLACE INTO chunks
17551
17540
  (id, project_id, scope_name, url, path, title, section_title,
17552
17541
  heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17553
- incoming_links, route_file, tags, embedding)
17554
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17542
+ incoming_links, route_file, tags, description, keywords, embedding)
17543
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17555
17544
  args: [
17556
17545
  r.id,
17557
17546
  r.metadata.projectId,
@@ -17570,6 +17559,8 @@ var TursoVectorStore = class {
17570
17559
  r.metadata.incomingLinks,
17571
17560
  r.metadata.routeFile,
17572
17561
  JSON.stringify(r.metadata.tags),
17562
+ r.metadata.description ?? "",
17563
+ JSON.stringify(r.metadata.keywords ?? []),
17573
17564
  JSON.stringify(r.vector)
17574
17565
  ]
17575
17566
  }));
@@ -17585,6 +17576,7 @@ var TursoVectorStore = class {
17585
17576
  c.section_title, c.heading_path, c.snippet, c.chunk_text,
17586
17577
  c.ordinal, c.content_hash,
17587
17578
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17579
+ c.description, c.keywords,
17588
17580
  vector_distance_cos(c.embedding, vector(?)) AS distance
17589
17581
  FROM vector_top_k('idx', vector(?), ?) AS v
17590
17582
  JOIN chunks AS c ON c.rowid = v.id`,
@@ -17615,6 +17607,12 @@ var TursoVectorStore = class {
17615
17607
  }
17616
17608
  const distance = row.distance;
17617
17609
  const score = 1 - distance;
17610
+ const description = row.description || void 0;
17611
+ const keywords = (() => {
17612
+ const raw = row.keywords || "[]";
17613
+ const parsed = JSON.parse(raw);
17614
+ return parsed.length > 0 ? parsed : void 0;
17615
+ })();
17618
17616
  hits.push({
17619
17617
  id: row.id,
17620
17618
  score,
@@ -17634,7 +17632,9 @@ var TursoVectorStore = class {
17634
17632
  depth: row.depth,
17635
17633
  incomingLinks: row.incoming_links,
17636
17634
  routeFile: row.route_file,
17637
- tags
17635
+ tags,
17636
+ description,
17637
+ keywords
17638
17638
  }
17639
17639
  });
17640
17640
  }
@@ -17928,6 +17928,7 @@ function aggregateByPage(ranked, config) {
17928
17928
  }
17929
17929
  let pageScore = maxScore + aggregationBonus * config.ranking.weights.aggregation;
17930
17930
  const pageWeight = findPageWeight(url, config.ranking.pageWeights);
17931
+ if (pageWeight === 0) continue;
17931
17932
  if (pageWeight !== 1) {
17932
17933
  pageScore *= pageWeight;
17933
17934
  }
@@ -18150,16 +18151,29 @@ var SearchEngine = class _SearchEngine {
18150
18151
  if (group) group.push(entry);
18151
18152
  else pageGroups.set(url, [entry]);
18152
18153
  }
18154
+ const MAX_CHUNKS_PER_PAGE = 5;
18155
+ const MIN_CHUNKS_PER_PAGE = 1;
18156
+ const MIN_CHUNK_SCORE_RATIO = 0.5;
18153
18157
  const pageCandidates = [];
18154
18158
  for (const [url, chunks] of pageGroups) {
18155
- const sorted = [...chunks].sort(
18156
- (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
18157
- );
18158
- const title = sorted[0].hit.metadata.title;
18159
- const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
18160
- pageCandidates.push({ id: url, text: `${title}
18161
-
18162
- ${body}` });
18159
+ const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
18160
+ const bestScore = byScore[0].finalScore;
18161
+ const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
18162
+ const selected = byScore.filter(
18163
+ (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
18164
+ ).slice(0, MAX_CHUNKS_PER_PAGE);
18165
+ selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
18166
+ const first = selected[0].hit.metadata;
18167
+ const parts = [first.title];
18168
+ if (first.description) {
18169
+ parts.push(first.description);
18170
+ }
18171
+ if (first.keywords && first.keywords.length > 0) {
18172
+ parts.push(first.keywords.join(", "));
18173
+ }
18174
+ const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
18175
+ parts.push(body);
18176
+ pageCandidates.push({ id: url, text: parts.join("\n\n") });
18163
18177
  }
18164
18178
  const reranked = await this.reranker.rerank(
18165
18179
  query,
@@ -18669,7 +18683,9 @@ function chunkMirrorPage(page, config, scope) {
18669
18683
  incomingLinks: page.incomingLinks,
18670
18684
  routeFile: page.routeFile,
18671
18685
  tags: page.tags,
18672
- contentHash: ""
18686
+ contentHash: "",
18687
+ description: page.description,
18688
+ keywords: page.keywords
18673
18689
  };
18674
18690
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
18675
18691
  summaryChunk.contentHash = sha256(normalizeText(embeddingText));
@@ -18696,7 +18712,9 @@ function chunkMirrorPage(page, config, scope) {
18696
18712
  incomingLinks: page.incomingLinks,
18697
18713
  routeFile: page.routeFile,
18698
18714
  tags: page.tags,
18699
- contentHash: ""
18715
+ contentHash: "",
18716
+ description: page.description,
18717
+ keywords: page.keywords
18700
18718
  };
18701
18719
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
18702
18720
  chunk.contentHash = sha256(normalizeText(embeddingText));
@@ -19551,7 +19569,7 @@ function extractFromHtml(url, html, config) {
19551
19569
  const $ = load(html);
19552
19570
  const normalizedUrl = normalizeUrlPath(url);
19553
19571
  const pageBaseUrl = new URL(`https://searchsocket.local${normalizedUrl}`);
19554
- const title = normalizeText($("title").first().text() || "") || normalizeText($(`${config.extract.mainSelector} h1`).first().text() || "") || normalizedUrl;
19572
+ const title = $("meta[property='og:title']").attr("content")?.trim() || normalizeText($(`${config.extract.mainSelector} h1`).first().text() || "") || $("meta[name='twitter:title']").attr("content")?.trim() || normalizeText($("title").first().text() || "") || normalizedUrl;
19555
19573
  if (config.extract.respectRobotsNoindex) {
19556
19574
  const robots = $("meta[name='robots']").attr("content") ?? "";
19557
19575
  if (/\bnoindex\b/i.test(robots)) {
@@ -20659,7 +20677,9 @@ var IndexPipeline = class _IndexPipeline {
20659
20677
  depth: chunk.depth,
20660
20678
  incomingLinks: chunk.incomingLinks,
20661
20679
  routeFile: chunk.routeFile,
20662
- tags: chunk.tags
20680
+ tags: chunk.tags,
20681
+ description: chunk.description,
20682
+ keywords: chunk.keywords
20663
20683
  }
20664
20684
  });
20665
20685
  }
@@ -261,6 +261,8 @@ interface Chunk {
261
261
  routeFile: string;
262
262
  tags: string[];
263
263
  contentHash: string;
264
+ description?: string;
265
+ keywords?: string[];
264
266
  }
265
267
  interface VectorRecord {
266
268
  id: string;
@@ -282,6 +284,8 @@ interface VectorRecord {
282
284
  incomingLinks: number;
283
285
  routeFile: string;
284
286
  tags: string[];
287
+ description?: string;
288
+ keywords?: string[];
285
289
  };
286
290
  }
287
291
  interface QueryOpts {
@@ -261,6 +261,8 @@ interface Chunk {
261
261
  routeFile: string;
262
262
  tags: string[];
263
263
  contentHash: string;
264
+ description?: string;
265
+ keywords?: string[];
264
266
  }
265
267
  interface VectorRecord {
266
268
  id: string;
@@ -282,6 +284,8 @@ interface VectorRecord {
282
284
  incomingLinks: number;
283
285
  routeFile: string;
284
286
  tags: string[];
287
+ description?: string;
288
+ keywords?: string[];
285
289
  };
286
290
  }
287
291
  interface QueryOpts {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "searchsocket",
3
- "version": "0.3.0",
3
+ "version": "0.3.2",
4
4
  "description": "Semantic site search and MCP retrieval for SvelteKit static sites",
5
5
  "license": "MIT",
6
6
  "author": "Greg Priday <greg@siteorigin.com>",