searchsocket 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +47 -28
- package/dist/client.d.cts +1 -1
- package/dist/client.d.ts +1 -1
- package/dist/index.cjs +46 -27
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +46 -27
- package/dist/sveltekit.cjs +46 -27
- package/dist/sveltekit.d.cts +1 -1
- package/dist/sveltekit.d.ts +1 -1
- package/dist/sveltekit.js +46 -27
- package/dist/{types-DAXk6A3Y.d.cts → types-BrG6XTUU.d.cts} +4 -0
- package/dist/{types-DAXk6A3Y.d.ts → types-BrG6XTUU.d.ts} +4 -0
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -12,7 +12,7 @@ import { Command } from "commander";
|
|
|
12
12
|
// package.json
|
|
13
13
|
var package_default = {
|
|
14
14
|
name: "searchsocket",
|
|
15
|
-
version: "0.3.
|
|
15
|
+
version: "0.3.1",
|
|
16
16
|
description: "Semantic site search and MCP retrieval for SvelteKit static sites",
|
|
17
17
|
license: "MIT",
|
|
18
18
|
author: "Greg Priday <greg@siteorigin.com>",
|
|
@@ -913,23 +913,12 @@ var TursoVectorStore = class {
|
|
|
913
913
|
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
914
914
|
route_file TEXT NOT NULL DEFAULT '',
|
|
915
915
|
tags TEXT NOT NULL DEFAULT '[]',
|
|
916
|
+
description TEXT NOT NULL DEFAULT '',
|
|
917
|
+
keywords TEXT NOT NULL DEFAULT '[]',
|
|
916
918
|
embedding F32_BLOB(${dim})
|
|
917
919
|
)`,
|
|
918
920
|
`CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
|
|
919
921
|
]);
|
|
920
|
-
const chunkMigrationCols = [
|
|
921
|
-
{ name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
|
|
922
|
-
{ name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
|
|
923
|
-
];
|
|
924
|
-
for (const col of chunkMigrationCols) {
|
|
925
|
-
try {
|
|
926
|
-
await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
|
|
927
|
-
} catch (error) {
|
|
928
|
-
if (error instanceof Error && !error.message.includes("duplicate column")) {
|
|
929
|
-
throw error;
|
|
930
|
-
}
|
|
931
|
-
}
|
|
932
|
-
}
|
|
933
922
|
this.chunksReady = true;
|
|
934
923
|
}
|
|
935
924
|
async ensurePages() {
|
|
@@ -1007,8 +996,8 @@ var TursoVectorStore = class {
|
|
|
1007
996
|
sql: `INSERT OR REPLACE INTO chunks
|
|
1008
997
|
(id, project_id, scope_name, url, path, title, section_title,
|
|
1009
998
|
heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
|
|
1010
|
-
incoming_links, route_file, tags, embedding)
|
|
1011
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
999
|
+
incoming_links, route_file, tags, description, keywords, embedding)
|
|
1000
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
1012
1001
|
args: [
|
|
1013
1002
|
r.id,
|
|
1014
1003
|
r.metadata.projectId,
|
|
@@ -1027,6 +1016,8 @@ var TursoVectorStore = class {
|
|
|
1027
1016
|
r.metadata.incomingLinks,
|
|
1028
1017
|
r.metadata.routeFile,
|
|
1029
1018
|
JSON.stringify(r.metadata.tags),
|
|
1019
|
+
r.metadata.description ?? "",
|
|
1020
|
+
JSON.stringify(r.metadata.keywords ?? []),
|
|
1030
1021
|
JSON.stringify(r.vector)
|
|
1031
1022
|
]
|
|
1032
1023
|
}));
|
|
@@ -1042,6 +1033,7 @@ var TursoVectorStore = class {
|
|
|
1042
1033
|
c.section_title, c.heading_path, c.snippet, c.chunk_text,
|
|
1043
1034
|
c.ordinal, c.content_hash,
|
|
1044
1035
|
c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
|
|
1036
|
+
c.description, c.keywords,
|
|
1045
1037
|
vector_distance_cos(c.embedding, vector(?)) AS distance
|
|
1046
1038
|
FROM vector_top_k('idx', vector(?), ?) AS v
|
|
1047
1039
|
JOIN chunks AS c ON c.rowid = v.id`,
|
|
@@ -1072,6 +1064,12 @@ var TursoVectorStore = class {
|
|
|
1072
1064
|
}
|
|
1073
1065
|
const distance = row.distance;
|
|
1074
1066
|
const score = 1 - distance;
|
|
1067
|
+
const description = row.description || void 0;
|
|
1068
|
+
const keywords = (() => {
|
|
1069
|
+
const raw = row.keywords || "[]";
|
|
1070
|
+
const parsed = JSON.parse(raw);
|
|
1071
|
+
return parsed.length > 0 ? parsed : void 0;
|
|
1072
|
+
})();
|
|
1075
1073
|
hits.push({
|
|
1076
1074
|
id: row.id,
|
|
1077
1075
|
score,
|
|
@@ -1091,7 +1089,9 @@ var TursoVectorStore = class {
|
|
|
1091
1089
|
depth: row.depth,
|
|
1092
1090
|
incomingLinks: row.incoming_links,
|
|
1093
1091
|
routeFile: row.route_file,
|
|
1094
|
-
tags
|
|
1092
|
+
tags,
|
|
1093
|
+
description,
|
|
1094
|
+
keywords
|
|
1095
1095
|
}
|
|
1096
1096
|
});
|
|
1097
1097
|
}
|
|
@@ -1636,7 +1636,9 @@ function chunkMirrorPage(page, config, scope) {
|
|
|
1636
1636
|
incomingLinks: page.incomingLinks,
|
|
1637
1637
|
routeFile: page.routeFile,
|
|
1638
1638
|
tags: page.tags,
|
|
1639
|
-
contentHash: ""
|
|
1639
|
+
contentHash: "",
|
|
1640
|
+
description: page.description,
|
|
1641
|
+
keywords: page.keywords
|
|
1640
1642
|
};
|
|
1641
1643
|
const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
|
|
1642
1644
|
summaryChunk.contentHash = sha256(normalizeText(embeddingText));
|
|
@@ -1663,7 +1665,9 @@ function chunkMirrorPage(page, config, scope) {
|
|
|
1663
1665
|
incomingLinks: page.incomingLinks,
|
|
1664
1666
|
routeFile: page.routeFile,
|
|
1665
1667
|
tags: page.tags,
|
|
1666
|
-
contentHash: ""
|
|
1668
|
+
contentHash: "",
|
|
1669
|
+
description: page.description,
|
|
1670
|
+
keywords: page.keywords
|
|
1667
1671
|
};
|
|
1668
1672
|
const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
|
|
1669
1673
|
chunk.contentHash = sha256(normalizeText(embeddingText));
|
|
@@ -2774,7 +2778,9 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2774
2778
|
depth: chunk.depth,
|
|
2775
2779
|
incomingLinks: chunk.incomingLinks,
|
|
2776
2780
|
routeFile: chunk.routeFile,
|
|
2777
|
-
tags: chunk.tags
|
|
2781
|
+
tags: chunk.tags,
|
|
2782
|
+
description: chunk.description,
|
|
2783
|
+
keywords: chunk.keywords
|
|
2778
2784
|
}
|
|
2779
2785
|
});
|
|
2780
2786
|
}
|
|
@@ -3226,16 +3232,29 @@ var SearchEngine = class _SearchEngine {
|
|
|
3226
3232
|
if (group) group.push(entry);
|
|
3227
3233
|
else pageGroups.set(url, [entry]);
|
|
3228
3234
|
}
|
|
3235
|
+
const MAX_CHUNKS_PER_PAGE = 5;
|
|
3236
|
+
const MIN_CHUNKS_PER_PAGE = 1;
|
|
3237
|
+
const MIN_CHUNK_SCORE_RATIO = 0.5;
|
|
3229
3238
|
const pageCandidates = [];
|
|
3230
3239
|
for (const [url, chunks] of pageGroups) {
|
|
3231
|
-
const
|
|
3232
|
-
|
|
3233
|
-
);
|
|
3234
|
-
const
|
|
3235
|
-
|
|
3236
|
-
|
|
3237
|
-
|
|
3238
|
-
|
|
3240
|
+
const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
|
|
3241
|
+
const bestScore = byScore[0].finalScore;
|
|
3242
|
+
const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
|
|
3243
|
+
const selected = byScore.filter(
|
|
3244
|
+
(c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
|
|
3245
|
+
).slice(0, MAX_CHUNKS_PER_PAGE);
|
|
3246
|
+
selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
|
|
3247
|
+
const first = selected[0].hit.metadata;
|
|
3248
|
+
const parts = [first.title];
|
|
3249
|
+
if (first.description) {
|
|
3250
|
+
parts.push(first.description);
|
|
3251
|
+
}
|
|
3252
|
+
if (first.keywords && first.keywords.length > 0) {
|
|
3253
|
+
parts.push(first.keywords.join(", "));
|
|
3254
|
+
}
|
|
3255
|
+
const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
|
|
3256
|
+
parts.push(body);
|
|
3257
|
+
pageCandidates.push({ id: url, text: parts.join("\n\n") });
|
|
3239
3258
|
}
|
|
3240
3259
|
const reranked = await this.reranker.rerank(
|
|
3241
3260
|
query,
|
package/dist/client.d.cts
CHANGED
package/dist/client.d.ts
CHANGED
package/dist/index.cjs
CHANGED
|
@@ -17435,23 +17435,12 @@ var TursoVectorStore = class {
|
|
|
17435
17435
|
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
17436
17436
|
route_file TEXT NOT NULL DEFAULT '',
|
|
17437
17437
|
tags TEXT NOT NULL DEFAULT '[]',
|
|
17438
|
+
description TEXT NOT NULL DEFAULT '',
|
|
17439
|
+
keywords TEXT NOT NULL DEFAULT '[]',
|
|
17438
17440
|
embedding F32_BLOB(${dim})
|
|
17439
17441
|
)`,
|
|
17440
17442
|
`CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
|
|
17441
17443
|
]);
|
|
17442
|
-
const chunkMigrationCols = [
|
|
17443
|
-
{ name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
|
|
17444
|
-
{ name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
|
|
17445
|
-
];
|
|
17446
|
-
for (const col of chunkMigrationCols) {
|
|
17447
|
-
try {
|
|
17448
|
-
await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
|
|
17449
|
-
} catch (error) {
|
|
17450
|
-
if (error instanceof Error && !error.message.includes("duplicate column")) {
|
|
17451
|
-
throw error;
|
|
17452
|
-
}
|
|
17453
|
-
}
|
|
17454
|
-
}
|
|
17455
17444
|
this.chunksReady = true;
|
|
17456
17445
|
}
|
|
17457
17446
|
async ensurePages() {
|
|
@@ -17529,8 +17518,8 @@ var TursoVectorStore = class {
|
|
|
17529
17518
|
sql: `INSERT OR REPLACE INTO chunks
|
|
17530
17519
|
(id, project_id, scope_name, url, path, title, section_title,
|
|
17531
17520
|
heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
|
|
17532
|
-
incoming_links, route_file, tags, embedding)
|
|
17533
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
17521
|
+
incoming_links, route_file, tags, description, keywords, embedding)
|
|
17522
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
17534
17523
|
args: [
|
|
17535
17524
|
r.id,
|
|
17536
17525
|
r.metadata.projectId,
|
|
@@ -17549,6 +17538,8 @@ var TursoVectorStore = class {
|
|
|
17549
17538
|
r.metadata.incomingLinks,
|
|
17550
17539
|
r.metadata.routeFile,
|
|
17551
17540
|
JSON.stringify(r.metadata.tags),
|
|
17541
|
+
r.metadata.description ?? "",
|
|
17542
|
+
JSON.stringify(r.metadata.keywords ?? []),
|
|
17552
17543
|
JSON.stringify(r.vector)
|
|
17553
17544
|
]
|
|
17554
17545
|
}));
|
|
@@ -17564,6 +17555,7 @@ var TursoVectorStore = class {
|
|
|
17564
17555
|
c.section_title, c.heading_path, c.snippet, c.chunk_text,
|
|
17565
17556
|
c.ordinal, c.content_hash,
|
|
17566
17557
|
c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
|
|
17558
|
+
c.description, c.keywords,
|
|
17567
17559
|
vector_distance_cos(c.embedding, vector(?)) AS distance
|
|
17568
17560
|
FROM vector_top_k('idx', vector(?), ?) AS v
|
|
17569
17561
|
JOIN chunks AS c ON c.rowid = v.id`,
|
|
@@ -17594,6 +17586,12 @@ var TursoVectorStore = class {
|
|
|
17594
17586
|
}
|
|
17595
17587
|
const distance = row.distance;
|
|
17596
17588
|
const score = 1 - distance;
|
|
17589
|
+
const description = row.description || void 0;
|
|
17590
|
+
const keywords = (() => {
|
|
17591
|
+
const raw = row.keywords || "[]";
|
|
17592
|
+
const parsed = JSON.parse(raw);
|
|
17593
|
+
return parsed.length > 0 ? parsed : void 0;
|
|
17594
|
+
})();
|
|
17597
17595
|
hits.push({
|
|
17598
17596
|
id: row.id,
|
|
17599
17597
|
score,
|
|
@@ -17613,7 +17611,9 @@ var TursoVectorStore = class {
|
|
|
17613
17611
|
depth: row.depth,
|
|
17614
17612
|
incomingLinks: row.incoming_links,
|
|
17615
17613
|
routeFile: row.route_file,
|
|
17616
|
-
tags
|
|
17614
|
+
tags,
|
|
17615
|
+
description,
|
|
17616
|
+
keywords
|
|
17617
17617
|
}
|
|
17618
17618
|
});
|
|
17619
17619
|
}
|
|
@@ -18152,7 +18152,9 @@ function chunkMirrorPage(page, config, scope) {
|
|
|
18152
18152
|
incomingLinks: page.incomingLinks,
|
|
18153
18153
|
routeFile: page.routeFile,
|
|
18154
18154
|
tags: page.tags,
|
|
18155
|
-
contentHash: ""
|
|
18155
|
+
contentHash: "",
|
|
18156
|
+
description: page.description,
|
|
18157
|
+
keywords: page.keywords
|
|
18156
18158
|
};
|
|
18157
18159
|
const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
|
|
18158
18160
|
summaryChunk.contentHash = sha256(normalizeText(embeddingText));
|
|
@@ -18179,7 +18181,9 @@ function chunkMirrorPage(page, config, scope) {
|
|
|
18179
18181
|
incomingLinks: page.incomingLinks,
|
|
18180
18182
|
routeFile: page.routeFile,
|
|
18181
18183
|
tags: page.tags,
|
|
18182
|
-
contentHash: ""
|
|
18184
|
+
contentHash: "",
|
|
18185
|
+
description: page.description,
|
|
18186
|
+
keywords: page.keywords
|
|
18183
18187
|
};
|
|
18184
18188
|
const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
|
|
18185
18189
|
chunk.contentHash = sha256(normalizeText(embeddingText));
|
|
@@ -20150,7 +20154,9 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20150
20154
|
depth: chunk.depth,
|
|
20151
20155
|
incomingLinks: chunk.incomingLinks,
|
|
20152
20156
|
routeFile: chunk.routeFile,
|
|
20153
|
-
tags: chunk.tags
|
|
20157
|
+
tags: chunk.tags,
|
|
20158
|
+
description: chunk.description,
|
|
20159
|
+
keywords: chunk.keywords
|
|
20154
20160
|
}
|
|
20155
20161
|
});
|
|
20156
20162
|
}
|
|
@@ -20495,16 +20501,29 @@ var SearchEngine = class _SearchEngine {
|
|
|
20495
20501
|
if (group) group.push(entry);
|
|
20496
20502
|
else pageGroups.set(url, [entry]);
|
|
20497
20503
|
}
|
|
20504
|
+
const MAX_CHUNKS_PER_PAGE = 5;
|
|
20505
|
+
const MIN_CHUNKS_PER_PAGE = 1;
|
|
20506
|
+
const MIN_CHUNK_SCORE_RATIO = 0.5;
|
|
20498
20507
|
const pageCandidates = [];
|
|
20499
20508
|
for (const [url, chunks] of pageGroups) {
|
|
20500
|
-
const
|
|
20501
|
-
|
|
20502
|
-
);
|
|
20503
|
-
const
|
|
20504
|
-
|
|
20505
|
-
|
|
20506
|
-
|
|
20507
|
-
|
|
20509
|
+
const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
|
|
20510
|
+
const bestScore = byScore[0].finalScore;
|
|
20511
|
+
const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
|
|
20512
|
+
const selected = byScore.filter(
|
|
20513
|
+
(c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
|
|
20514
|
+
).slice(0, MAX_CHUNKS_PER_PAGE);
|
|
20515
|
+
selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
|
|
20516
|
+
const first = selected[0].hit.metadata;
|
|
20517
|
+
const parts = [first.title];
|
|
20518
|
+
if (first.description) {
|
|
20519
|
+
parts.push(first.description);
|
|
20520
|
+
}
|
|
20521
|
+
if (first.keywords && first.keywords.length > 0) {
|
|
20522
|
+
parts.push(first.keywords.join(", "));
|
|
20523
|
+
}
|
|
20524
|
+
const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
|
|
20525
|
+
parts.push(body);
|
|
20526
|
+
pageCandidates.push({ id: url, text: parts.join("\n\n") });
|
|
20508
20527
|
}
|
|
20509
20528
|
const reranked = await this.reranker.rerank(
|
|
20510
20529
|
query,
|
package/dist/index.d.cts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig, c as Scope, E as EmbeddingsProvider, d as Reranker, e as RerankCandidate, V as VectorStore, I as IndexOptions, f as IndexStats, S as SearchRequest, a as SearchResponse } from './types-
|
|
2
|
-
export { C as Chunk, Q as QueryOpts, g as VectorHit, h as VectorRecord } from './types-
|
|
1
|
+
import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig, c as Scope, E as EmbeddingsProvider, d as Reranker, e as RerankCandidate, V as VectorStore, I as IndexOptions, f as IndexStats, S as SearchRequest, a as SearchResponse } from './types-BrG6XTUU.cjs';
|
|
2
|
+
export { C as Chunk, Q as QueryOpts, g as VectorHit, h as VectorRecord } from './types-BrG6XTUU.cjs';
|
|
3
3
|
export { searchsocketHandle, searchsocketVitePlugin } from './sveltekit.cjs';
|
|
4
4
|
export { createSearchClient } from './client.cjs';
|
|
5
5
|
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig, c as Scope, E as EmbeddingsProvider, d as Reranker, e as RerankCandidate, V as VectorStore, I as IndexOptions, f as IndexStats, S as SearchRequest, a as SearchResponse } from './types-
|
|
2
|
-
export { C as Chunk, Q as QueryOpts, g as VectorHit, h as VectorRecord } from './types-
|
|
1
|
+
import { R as ResolvedSearchSocketConfig, b as SearchSocketConfig, c as Scope, E as EmbeddingsProvider, d as Reranker, e as RerankCandidate, V as VectorStore, I as IndexOptions, f as IndexStats, S as SearchRequest, a as SearchResponse } from './types-BrG6XTUU.js';
|
|
2
|
+
export { C as Chunk, Q as QueryOpts, g as VectorHit, h as VectorRecord } from './types-BrG6XTUU.js';
|
|
3
3
|
export { searchsocketHandle, searchsocketVitePlugin } from './sveltekit.js';
|
|
4
4
|
export { createSearchClient } from './client.js';
|
|
5
5
|
|
package/dist/index.js
CHANGED
|
@@ -17423,23 +17423,12 @@ var TursoVectorStore = class {
|
|
|
17423
17423
|
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
17424
17424
|
route_file TEXT NOT NULL DEFAULT '',
|
|
17425
17425
|
tags TEXT NOT NULL DEFAULT '[]',
|
|
17426
|
+
description TEXT NOT NULL DEFAULT '',
|
|
17427
|
+
keywords TEXT NOT NULL DEFAULT '[]',
|
|
17426
17428
|
embedding F32_BLOB(${dim})
|
|
17427
17429
|
)`,
|
|
17428
17430
|
`CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
|
|
17429
17431
|
]);
|
|
17430
|
-
const chunkMigrationCols = [
|
|
17431
|
-
{ name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
|
|
17432
|
-
{ name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
|
|
17433
|
-
];
|
|
17434
|
-
for (const col of chunkMigrationCols) {
|
|
17435
|
-
try {
|
|
17436
|
-
await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
|
|
17437
|
-
} catch (error) {
|
|
17438
|
-
if (error instanceof Error && !error.message.includes("duplicate column")) {
|
|
17439
|
-
throw error;
|
|
17440
|
-
}
|
|
17441
|
-
}
|
|
17442
|
-
}
|
|
17443
17432
|
this.chunksReady = true;
|
|
17444
17433
|
}
|
|
17445
17434
|
async ensurePages() {
|
|
@@ -17517,8 +17506,8 @@ var TursoVectorStore = class {
|
|
|
17517
17506
|
sql: `INSERT OR REPLACE INTO chunks
|
|
17518
17507
|
(id, project_id, scope_name, url, path, title, section_title,
|
|
17519
17508
|
heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
|
|
17520
|
-
incoming_links, route_file, tags, embedding)
|
|
17521
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
17509
|
+
incoming_links, route_file, tags, description, keywords, embedding)
|
|
17510
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
17522
17511
|
args: [
|
|
17523
17512
|
r.id,
|
|
17524
17513
|
r.metadata.projectId,
|
|
@@ -17537,6 +17526,8 @@ var TursoVectorStore = class {
|
|
|
17537
17526
|
r.metadata.incomingLinks,
|
|
17538
17527
|
r.metadata.routeFile,
|
|
17539
17528
|
JSON.stringify(r.metadata.tags),
|
|
17529
|
+
r.metadata.description ?? "",
|
|
17530
|
+
JSON.stringify(r.metadata.keywords ?? []),
|
|
17540
17531
|
JSON.stringify(r.vector)
|
|
17541
17532
|
]
|
|
17542
17533
|
}));
|
|
@@ -17552,6 +17543,7 @@ var TursoVectorStore = class {
|
|
|
17552
17543
|
c.section_title, c.heading_path, c.snippet, c.chunk_text,
|
|
17553
17544
|
c.ordinal, c.content_hash,
|
|
17554
17545
|
c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
|
|
17546
|
+
c.description, c.keywords,
|
|
17555
17547
|
vector_distance_cos(c.embedding, vector(?)) AS distance
|
|
17556
17548
|
FROM vector_top_k('idx', vector(?), ?) AS v
|
|
17557
17549
|
JOIN chunks AS c ON c.rowid = v.id`,
|
|
@@ -17582,6 +17574,12 @@ var TursoVectorStore = class {
|
|
|
17582
17574
|
}
|
|
17583
17575
|
const distance = row.distance;
|
|
17584
17576
|
const score = 1 - distance;
|
|
17577
|
+
const description = row.description || void 0;
|
|
17578
|
+
const keywords = (() => {
|
|
17579
|
+
const raw = row.keywords || "[]";
|
|
17580
|
+
const parsed = JSON.parse(raw);
|
|
17581
|
+
return parsed.length > 0 ? parsed : void 0;
|
|
17582
|
+
})();
|
|
17585
17583
|
hits.push({
|
|
17586
17584
|
id: row.id,
|
|
17587
17585
|
score,
|
|
@@ -17601,7 +17599,9 @@ var TursoVectorStore = class {
|
|
|
17601
17599
|
depth: row.depth,
|
|
17602
17600
|
incomingLinks: row.incoming_links,
|
|
17603
17601
|
routeFile: row.route_file,
|
|
17604
|
-
tags
|
|
17602
|
+
tags,
|
|
17603
|
+
description,
|
|
17604
|
+
keywords
|
|
17605
17605
|
}
|
|
17606
17606
|
});
|
|
17607
17607
|
}
|
|
@@ -18140,7 +18140,9 @@ function chunkMirrorPage(page, config, scope) {
|
|
|
18140
18140
|
incomingLinks: page.incomingLinks,
|
|
18141
18141
|
routeFile: page.routeFile,
|
|
18142
18142
|
tags: page.tags,
|
|
18143
|
-
contentHash: ""
|
|
18143
|
+
contentHash: "",
|
|
18144
|
+
description: page.description,
|
|
18145
|
+
keywords: page.keywords
|
|
18144
18146
|
};
|
|
18145
18147
|
const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
|
|
18146
18148
|
summaryChunk.contentHash = sha256(normalizeText(embeddingText));
|
|
@@ -18167,7 +18169,9 @@ function chunkMirrorPage(page, config, scope) {
|
|
|
18167
18169
|
incomingLinks: page.incomingLinks,
|
|
18168
18170
|
routeFile: page.routeFile,
|
|
18169
18171
|
tags: page.tags,
|
|
18170
|
-
contentHash: ""
|
|
18172
|
+
contentHash: "",
|
|
18173
|
+
description: page.description,
|
|
18174
|
+
keywords: page.keywords
|
|
18171
18175
|
};
|
|
18172
18176
|
const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
|
|
18173
18177
|
chunk.contentHash = sha256(normalizeText(embeddingText));
|
|
@@ -20138,7 +20142,9 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20138
20142
|
depth: chunk.depth,
|
|
20139
20143
|
incomingLinks: chunk.incomingLinks,
|
|
20140
20144
|
routeFile: chunk.routeFile,
|
|
20141
|
-
tags: chunk.tags
|
|
20145
|
+
tags: chunk.tags,
|
|
20146
|
+
description: chunk.description,
|
|
20147
|
+
keywords: chunk.keywords
|
|
20142
20148
|
}
|
|
20143
20149
|
});
|
|
20144
20150
|
}
|
|
@@ -20483,16 +20489,29 @@ var SearchEngine = class _SearchEngine {
|
|
|
20483
20489
|
if (group) group.push(entry);
|
|
20484
20490
|
else pageGroups.set(url, [entry]);
|
|
20485
20491
|
}
|
|
20492
|
+
const MAX_CHUNKS_PER_PAGE = 5;
|
|
20493
|
+
const MIN_CHUNKS_PER_PAGE = 1;
|
|
20494
|
+
const MIN_CHUNK_SCORE_RATIO = 0.5;
|
|
20486
20495
|
const pageCandidates = [];
|
|
20487
20496
|
for (const [url, chunks] of pageGroups) {
|
|
20488
|
-
const
|
|
20489
|
-
|
|
20490
|
-
);
|
|
20491
|
-
const
|
|
20492
|
-
|
|
20493
|
-
|
|
20494
|
-
|
|
20495
|
-
|
|
20497
|
+
const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
|
|
20498
|
+
const bestScore = byScore[0].finalScore;
|
|
20499
|
+
const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
|
|
20500
|
+
const selected = byScore.filter(
|
|
20501
|
+
(c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
|
|
20502
|
+
).slice(0, MAX_CHUNKS_PER_PAGE);
|
|
20503
|
+
selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
|
|
20504
|
+
const first = selected[0].hit.metadata;
|
|
20505
|
+
const parts = [first.title];
|
|
20506
|
+
if (first.description) {
|
|
20507
|
+
parts.push(first.description);
|
|
20508
|
+
}
|
|
20509
|
+
if (first.keywords && first.keywords.length > 0) {
|
|
20510
|
+
parts.push(first.keywords.join(", "));
|
|
20511
|
+
}
|
|
20512
|
+
const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
|
|
20513
|
+
parts.push(body);
|
|
20514
|
+
pageCandidates.push({ id: url, text: parts.join("\n\n") });
|
|
20496
20515
|
}
|
|
20497
20516
|
const reranked = await this.reranker.rerank(
|
|
20498
20517
|
query,
|
package/dist/sveltekit.cjs
CHANGED
|
@@ -17468,23 +17468,12 @@ var TursoVectorStore = class {
|
|
|
17468
17468
|
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
17469
17469
|
route_file TEXT NOT NULL DEFAULT '',
|
|
17470
17470
|
tags TEXT NOT NULL DEFAULT '[]',
|
|
17471
|
+
description TEXT NOT NULL DEFAULT '',
|
|
17472
|
+
keywords TEXT NOT NULL DEFAULT '[]',
|
|
17471
17473
|
embedding F32_BLOB(${dim})
|
|
17472
17474
|
)`,
|
|
17473
17475
|
`CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
|
|
17474
17476
|
]);
|
|
17475
|
-
const chunkMigrationCols = [
|
|
17476
|
-
{ name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
|
|
17477
|
-
{ name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
|
|
17478
|
-
];
|
|
17479
|
-
for (const col of chunkMigrationCols) {
|
|
17480
|
-
try {
|
|
17481
|
-
await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
|
|
17482
|
-
} catch (error) {
|
|
17483
|
-
if (error instanceof Error && !error.message.includes("duplicate column")) {
|
|
17484
|
-
throw error;
|
|
17485
|
-
}
|
|
17486
|
-
}
|
|
17487
|
-
}
|
|
17488
17477
|
this.chunksReady = true;
|
|
17489
17478
|
}
|
|
17490
17479
|
async ensurePages() {
|
|
@@ -17562,8 +17551,8 @@ var TursoVectorStore = class {
|
|
|
17562
17551
|
sql: `INSERT OR REPLACE INTO chunks
|
|
17563
17552
|
(id, project_id, scope_name, url, path, title, section_title,
|
|
17564
17553
|
heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
|
|
17565
|
-
incoming_links, route_file, tags, embedding)
|
|
17566
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
17554
|
+
incoming_links, route_file, tags, description, keywords, embedding)
|
|
17555
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
17567
17556
|
args: [
|
|
17568
17557
|
r.id,
|
|
17569
17558
|
r.metadata.projectId,
|
|
@@ -17582,6 +17571,8 @@ var TursoVectorStore = class {
|
|
|
17582
17571
|
r.metadata.incomingLinks,
|
|
17583
17572
|
r.metadata.routeFile,
|
|
17584
17573
|
JSON.stringify(r.metadata.tags),
|
|
17574
|
+
r.metadata.description ?? "",
|
|
17575
|
+
JSON.stringify(r.metadata.keywords ?? []),
|
|
17585
17576
|
JSON.stringify(r.vector)
|
|
17586
17577
|
]
|
|
17587
17578
|
}));
|
|
@@ -17597,6 +17588,7 @@ var TursoVectorStore = class {
|
|
|
17597
17588
|
c.section_title, c.heading_path, c.snippet, c.chunk_text,
|
|
17598
17589
|
c.ordinal, c.content_hash,
|
|
17599
17590
|
c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
|
|
17591
|
+
c.description, c.keywords,
|
|
17600
17592
|
vector_distance_cos(c.embedding, vector(?)) AS distance
|
|
17601
17593
|
FROM vector_top_k('idx', vector(?), ?) AS v
|
|
17602
17594
|
JOIN chunks AS c ON c.rowid = v.id`,
|
|
@@ -17627,6 +17619,12 @@ var TursoVectorStore = class {
|
|
|
17627
17619
|
}
|
|
17628
17620
|
const distance = row.distance;
|
|
17629
17621
|
const score = 1 - distance;
|
|
17622
|
+
const description = row.description || void 0;
|
|
17623
|
+
const keywords = (() => {
|
|
17624
|
+
const raw = row.keywords || "[]";
|
|
17625
|
+
const parsed = JSON.parse(raw);
|
|
17626
|
+
return parsed.length > 0 ? parsed : void 0;
|
|
17627
|
+
})();
|
|
17630
17628
|
hits.push({
|
|
17631
17629
|
id: row.id,
|
|
17632
17630
|
score,
|
|
@@ -17646,7 +17644,9 @@ var TursoVectorStore = class {
|
|
|
17646
17644
|
depth: row.depth,
|
|
17647
17645
|
incomingLinks: row.incoming_links,
|
|
17648
17646
|
routeFile: row.route_file,
|
|
17649
|
-
tags
|
|
17647
|
+
tags,
|
|
17648
|
+
description,
|
|
17649
|
+
keywords
|
|
17650
17650
|
}
|
|
17651
17651
|
});
|
|
17652
17652
|
}
|
|
@@ -18162,16 +18162,29 @@ var SearchEngine = class _SearchEngine {
|
|
|
18162
18162
|
if (group) group.push(entry);
|
|
18163
18163
|
else pageGroups.set(url, [entry]);
|
|
18164
18164
|
}
|
|
18165
|
+
const MAX_CHUNKS_PER_PAGE = 5;
|
|
18166
|
+
const MIN_CHUNKS_PER_PAGE = 1;
|
|
18167
|
+
const MIN_CHUNK_SCORE_RATIO = 0.5;
|
|
18165
18168
|
const pageCandidates = [];
|
|
18166
18169
|
for (const [url, chunks] of pageGroups) {
|
|
18167
|
-
const
|
|
18168
|
-
|
|
18169
|
-
);
|
|
18170
|
-
const
|
|
18171
|
-
|
|
18172
|
-
|
|
18173
|
-
|
|
18174
|
-
|
|
18170
|
+
const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
|
|
18171
|
+
const bestScore = byScore[0].finalScore;
|
|
18172
|
+
const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
|
|
18173
|
+
const selected = byScore.filter(
|
|
18174
|
+
(c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
|
|
18175
|
+
).slice(0, MAX_CHUNKS_PER_PAGE);
|
|
18176
|
+
selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
|
|
18177
|
+
const first = selected[0].hit.metadata;
|
|
18178
|
+
const parts = [first.title];
|
|
18179
|
+
if (first.description) {
|
|
18180
|
+
parts.push(first.description);
|
|
18181
|
+
}
|
|
18182
|
+
if (first.keywords && first.keywords.length > 0) {
|
|
18183
|
+
parts.push(first.keywords.join(", "));
|
|
18184
|
+
}
|
|
18185
|
+
const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
|
|
18186
|
+
parts.push(body);
|
|
18187
|
+
pageCandidates.push({ id: url, text: parts.join("\n\n") });
|
|
18175
18188
|
}
|
|
18176
18189
|
const reranked = await this.reranker.rerank(
|
|
18177
18190
|
query,
|
|
@@ -18681,7 +18694,9 @@ function chunkMirrorPage(page, config, scope) {
|
|
|
18681
18694
|
incomingLinks: page.incomingLinks,
|
|
18682
18695
|
routeFile: page.routeFile,
|
|
18683
18696
|
tags: page.tags,
|
|
18684
|
-
contentHash: ""
|
|
18697
|
+
contentHash: "",
|
|
18698
|
+
description: page.description,
|
|
18699
|
+
keywords: page.keywords
|
|
18685
18700
|
};
|
|
18686
18701
|
const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
|
|
18687
18702
|
summaryChunk.contentHash = sha256(normalizeText(embeddingText));
|
|
@@ -18708,7 +18723,9 @@ function chunkMirrorPage(page, config, scope) {
|
|
|
18708
18723
|
incomingLinks: page.incomingLinks,
|
|
18709
18724
|
routeFile: page.routeFile,
|
|
18710
18725
|
tags: page.tags,
|
|
18711
|
-
contentHash: ""
|
|
18726
|
+
contentHash: "",
|
|
18727
|
+
description: page.description,
|
|
18728
|
+
keywords: page.keywords
|
|
18712
18729
|
};
|
|
18713
18730
|
const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
|
|
18714
18731
|
chunk.contentHash = sha256(normalizeText(embeddingText));
|
|
@@ -20671,7 +20688,9 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20671
20688
|
depth: chunk.depth,
|
|
20672
20689
|
incomingLinks: chunk.incomingLinks,
|
|
20673
20690
|
routeFile: chunk.routeFile,
|
|
20674
|
-
tags: chunk.tags
|
|
20691
|
+
tags: chunk.tags,
|
|
20692
|
+
description: chunk.description,
|
|
20693
|
+
keywords: chunk.keywords
|
|
20675
20694
|
}
|
|
20676
20695
|
});
|
|
20677
20696
|
}
|
package/dist/sveltekit.d.cts
CHANGED
package/dist/sveltekit.d.ts
CHANGED
package/dist/sveltekit.js
CHANGED
|
@@ -17456,23 +17456,12 @@ var TursoVectorStore = class {
|
|
|
17456
17456
|
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
17457
17457
|
route_file TEXT NOT NULL DEFAULT '',
|
|
17458
17458
|
tags TEXT NOT NULL DEFAULT '[]',
|
|
17459
|
+
description TEXT NOT NULL DEFAULT '',
|
|
17460
|
+
keywords TEXT NOT NULL DEFAULT '[]',
|
|
17459
17461
|
embedding F32_BLOB(${dim})
|
|
17460
17462
|
)`,
|
|
17461
17463
|
`CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
|
|
17462
17464
|
]);
|
|
17463
|
-
const chunkMigrationCols = [
|
|
17464
|
-
{ name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
|
|
17465
|
-
{ name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
|
|
17466
|
-
];
|
|
17467
|
-
for (const col of chunkMigrationCols) {
|
|
17468
|
-
try {
|
|
17469
|
-
await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
|
|
17470
|
-
} catch (error) {
|
|
17471
|
-
if (error instanceof Error && !error.message.includes("duplicate column")) {
|
|
17472
|
-
throw error;
|
|
17473
|
-
}
|
|
17474
|
-
}
|
|
17475
|
-
}
|
|
17476
17465
|
this.chunksReady = true;
|
|
17477
17466
|
}
|
|
17478
17467
|
async ensurePages() {
|
|
@@ -17550,8 +17539,8 @@ var TursoVectorStore = class {
|
|
|
17550
17539
|
sql: `INSERT OR REPLACE INTO chunks
|
|
17551
17540
|
(id, project_id, scope_name, url, path, title, section_title,
|
|
17552
17541
|
heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
|
|
17553
|
-
incoming_links, route_file, tags, embedding)
|
|
17554
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
17542
|
+
incoming_links, route_file, tags, description, keywords, embedding)
|
|
17543
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
17555
17544
|
args: [
|
|
17556
17545
|
r.id,
|
|
17557
17546
|
r.metadata.projectId,
|
|
@@ -17570,6 +17559,8 @@ var TursoVectorStore = class {
|
|
|
17570
17559
|
r.metadata.incomingLinks,
|
|
17571
17560
|
r.metadata.routeFile,
|
|
17572
17561
|
JSON.stringify(r.metadata.tags),
|
|
17562
|
+
r.metadata.description ?? "",
|
|
17563
|
+
JSON.stringify(r.metadata.keywords ?? []),
|
|
17573
17564
|
JSON.stringify(r.vector)
|
|
17574
17565
|
]
|
|
17575
17566
|
}));
|
|
@@ -17585,6 +17576,7 @@ var TursoVectorStore = class {
|
|
|
17585
17576
|
c.section_title, c.heading_path, c.snippet, c.chunk_text,
|
|
17586
17577
|
c.ordinal, c.content_hash,
|
|
17587
17578
|
c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
|
|
17579
|
+
c.description, c.keywords,
|
|
17588
17580
|
vector_distance_cos(c.embedding, vector(?)) AS distance
|
|
17589
17581
|
FROM vector_top_k('idx', vector(?), ?) AS v
|
|
17590
17582
|
JOIN chunks AS c ON c.rowid = v.id`,
|
|
@@ -17615,6 +17607,12 @@ var TursoVectorStore = class {
|
|
|
17615
17607
|
}
|
|
17616
17608
|
const distance = row.distance;
|
|
17617
17609
|
const score = 1 - distance;
|
|
17610
|
+
const description = row.description || void 0;
|
|
17611
|
+
const keywords = (() => {
|
|
17612
|
+
const raw = row.keywords || "[]";
|
|
17613
|
+
const parsed = JSON.parse(raw);
|
|
17614
|
+
return parsed.length > 0 ? parsed : void 0;
|
|
17615
|
+
})();
|
|
17618
17616
|
hits.push({
|
|
17619
17617
|
id: row.id,
|
|
17620
17618
|
score,
|
|
@@ -17634,7 +17632,9 @@ var TursoVectorStore = class {
|
|
|
17634
17632
|
depth: row.depth,
|
|
17635
17633
|
incomingLinks: row.incoming_links,
|
|
17636
17634
|
routeFile: row.route_file,
|
|
17637
|
-
tags
|
|
17635
|
+
tags,
|
|
17636
|
+
description,
|
|
17637
|
+
keywords
|
|
17638
17638
|
}
|
|
17639
17639
|
});
|
|
17640
17640
|
}
|
|
@@ -18150,16 +18150,29 @@ var SearchEngine = class _SearchEngine {
|
|
|
18150
18150
|
if (group) group.push(entry);
|
|
18151
18151
|
else pageGroups.set(url, [entry]);
|
|
18152
18152
|
}
|
|
18153
|
+
const MAX_CHUNKS_PER_PAGE = 5;
|
|
18154
|
+
const MIN_CHUNKS_PER_PAGE = 1;
|
|
18155
|
+
const MIN_CHUNK_SCORE_RATIO = 0.5;
|
|
18153
18156
|
const pageCandidates = [];
|
|
18154
18157
|
for (const [url, chunks] of pageGroups) {
|
|
18155
|
-
const
|
|
18156
|
-
|
|
18157
|
-
);
|
|
18158
|
-
const
|
|
18159
|
-
|
|
18160
|
-
|
|
18161
|
-
|
|
18162
|
-
|
|
18158
|
+
const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
|
|
18159
|
+
const bestScore = byScore[0].finalScore;
|
|
18160
|
+
const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
|
|
18161
|
+
const selected = byScore.filter(
|
|
18162
|
+
(c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
|
|
18163
|
+
).slice(0, MAX_CHUNKS_PER_PAGE);
|
|
18164
|
+
selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
|
|
18165
|
+
const first = selected[0].hit.metadata;
|
|
18166
|
+
const parts = [first.title];
|
|
18167
|
+
if (first.description) {
|
|
18168
|
+
parts.push(first.description);
|
|
18169
|
+
}
|
|
18170
|
+
if (first.keywords && first.keywords.length > 0) {
|
|
18171
|
+
parts.push(first.keywords.join(", "));
|
|
18172
|
+
}
|
|
18173
|
+
const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
|
|
18174
|
+
parts.push(body);
|
|
18175
|
+
pageCandidates.push({ id: url, text: parts.join("\n\n") });
|
|
18163
18176
|
}
|
|
18164
18177
|
const reranked = await this.reranker.rerank(
|
|
18165
18178
|
query,
|
|
@@ -18669,7 +18682,9 @@ function chunkMirrorPage(page, config, scope) {
|
|
|
18669
18682
|
incomingLinks: page.incomingLinks,
|
|
18670
18683
|
routeFile: page.routeFile,
|
|
18671
18684
|
tags: page.tags,
|
|
18672
|
-
contentHash: ""
|
|
18685
|
+
contentHash: "",
|
|
18686
|
+
description: page.description,
|
|
18687
|
+
keywords: page.keywords
|
|
18673
18688
|
};
|
|
18674
18689
|
const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
|
|
18675
18690
|
summaryChunk.contentHash = sha256(normalizeText(embeddingText));
|
|
@@ -18696,7 +18711,9 @@ function chunkMirrorPage(page, config, scope) {
|
|
|
18696
18711
|
incomingLinks: page.incomingLinks,
|
|
18697
18712
|
routeFile: page.routeFile,
|
|
18698
18713
|
tags: page.tags,
|
|
18699
|
-
contentHash: ""
|
|
18714
|
+
contentHash: "",
|
|
18715
|
+
description: page.description,
|
|
18716
|
+
keywords: page.keywords
|
|
18700
18717
|
};
|
|
18701
18718
|
const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
|
|
18702
18719
|
chunk.contentHash = sha256(normalizeText(embeddingText));
|
|
@@ -20659,7 +20676,9 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20659
20676
|
depth: chunk.depth,
|
|
20660
20677
|
incomingLinks: chunk.incomingLinks,
|
|
20661
20678
|
routeFile: chunk.routeFile,
|
|
20662
|
-
tags: chunk.tags
|
|
20679
|
+
tags: chunk.tags,
|
|
20680
|
+
description: chunk.description,
|
|
20681
|
+
keywords: chunk.keywords
|
|
20663
20682
|
}
|
|
20664
20683
|
});
|
|
20665
20684
|
}
|
|
@@ -261,6 +261,8 @@ interface Chunk {
|
|
|
261
261
|
routeFile: string;
|
|
262
262
|
tags: string[];
|
|
263
263
|
contentHash: string;
|
|
264
|
+
description?: string;
|
|
265
|
+
keywords?: string[];
|
|
264
266
|
}
|
|
265
267
|
interface VectorRecord {
|
|
266
268
|
id: string;
|
|
@@ -282,6 +284,8 @@ interface VectorRecord {
|
|
|
282
284
|
incomingLinks: number;
|
|
283
285
|
routeFile: string;
|
|
284
286
|
tags: string[];
|
|
287
|
+
description?: string;
|
|
288
|
+
keywords?: string[];
|
|
285
289
|
};
|
|
286
290
|
}
|
|
287
291
|
interface QueryOpts {
|
|
@@ -261,6 +261,8 @@ interface Chunk {
|
|
|
261
261
|
routeFile: string;
|
|
262
262
|
tags: string[];
|
|
263
263
|
contentHash: string;
|
|
264
|
+
description?: string;
|
|
265
|
+
keywords?: string[];
|
|
264
266
|
}
|
|
265
267
|
interface VectorRecord {
|
|
266
268
|
id: string;
|
|
@@ -282,6 +284,8 @@ interface VectorRecord {
|
|
|
282
284
|
incomingLinks: number;
|
|
283
285
|
routeFile: string;
|
|
284
286
|
tags: string[];
|
|
287
|
+
description?: string;
|
|
288
|
+
keywords?: string[];
|
|
285
289
|
};
|
|
286
290
|
}
|
|
287
291
|
interface QueryOpts {
|