searchsocket 0.6.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +58 -5
- package/dist/index.cjs +57 -4
- package/dist/index.d.cts +10 -1
- package/dist/index.d.ts +10 -1
- package/dist/index.js +57 -4
- package/dist/sveltekit.cjs +57 -4
- package/dist/sveltekit.js +57 -4
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -12,7 +12,7 @@ import { Command, Option } from "commander";
|
|
|
12
12
|
// package.json
|
|
13
13
|
var package_default = {
|
|
14
14
|
name: "searchsocket",
|
|
15
|
-
version: "0.6.
|
|
15
|
+
version: "0.6.3",
|
|
16
16
|
description: "Semantic site search and MCP retrieval for SvelteKit static sites",
|
|
17
17
|
license: "MIT",
|
|
18
18
|
author: "Greg Priday <greg@siteorigin.com>",
|
|
@@ -863,6 +863,26 @@ import path11 from "path";
|
|
|
863
863
|
|
|
864
864
|
// src/vector/upstash.ts
|
|
865
865
|
import { QueryMode, FusionAlgorithm } from "@upstash/vector";
|
|
866
|
+
function reconstructMarkdownFromChunks(chunks, pageTitle) {
|
|
867
|
+
if (chunks.length === 0) return "";
|
|
868
|
+
const parts = [];
|
|
869
|
+
for (const chunk of chunks) {
|
|
870
|
+
let text2 = chunk.chunkText;
|
|
871
|
+
const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
|
|
872
|
+
|
|
873
|
+
`;
|
|
874
|
+
const prefixWithoutSection = `${pageTitle}
|
|
875
|
+
|
|
876
|
+
`;
|
|
877
|
+
if (chunk.sectionTitle && text2.startsWith(prefixWithSection)) {
|
|
878
|
+
text2 = text2.slice(prefixWithSection.length);
|
|
879
|
+
} else if (text2.startsWith(prefixWithoutSection)) {
|
|
880
|
+
text2 = text2.slice(prefixWithoutSection.length);
|
|
881
|
+
}
|
|
882
|
+
parts.push(text2.trim());
|
|
883
|
+
}
|
|
884
|
+
return parts.join("\n\n");
|
|
885
|
+
}
|
|
866
886
|
var UpstashSearchStore = class {
|
|
867
887
|
index;
|
|
868
888
|
pagesNs;
|
|
@@ -1242,10 +1262,12 @@ var UpstashSearchStore = class {
|
|
|
1242
1262
|
});
|
|
1243
1263
|
const doc = results[0];
|
|
1244
1264
|
if (!doc || !doc.metadata) return null;
|
|
1265
|
+
const chunks = await this.getChunksForPage(url, scope);
|
|
1266
|
+
const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
|
|
1245
1267
|
return {
|
|
1246
1268
|
url: doc.metadata.url,
|
|
1247
1269
|
title: doc.metadata.title,
|
|
1248
|
-
markdown
|
|
1270
|
+
markdown,
|
|
1249
1271
|
projectId: doc.metadata.projectId,
|
|
1250
1272
|
scopeName: doc.metadata.scopeName,
|
|
1251
1273
|
routeFile: doc.metadata.routeFile,
|
|
@@ -1265,6 +1287,37 @@ var UpstashSearchStore = class {
|
|
|
1265
1287
|
return null;
|
|
1266
1288
|
}
|
|
1267
1289
|
}
|
|
1290
|
+
/**
|
|
1291
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
1292
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
1293
|
+
*/
|
|
1294
|
+
async getChunksForPage(url, scope) {
|
|
1295
|
+
const chunks = [];
|
|
1296
|
+
let cursor = "0";
|
|
1297
|
+
try {
|
|
1298
|
+
for (; ; ) {
|
|
1299
|
+
const result = await this.chunksNs.range({
|
|
1300
|
+
cursor,
|
|
1301
|
+
limit: 100,
|
|
1302
|
+
includeMetadata: true
|
|
1303
|
+
});
|
|
1304
|
+
for (const doc of result.vectors) {
|
|
1305
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
|
|
1306
|
+
chunks.push({
|
|
1307
|
+
chunkText: doc.metadata.chunkText ?? "",
|
|
1308
|
+
ordinal: doc.metadata.ordinal ?? 0,
|
|
1309
|
+
sectionTitle: doc.metadata.sectionTitle ?? "",
|
|
1310
|
+
headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
|
|
1311
|
+
});
|
|
1312
|
+
}
|
|
1313
|
+
}
|
|
1314
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1315
|
+
cursor = result.nextCursor;
|
|
1316
|
+
}
|
|
1317
|
+
} catch {
|
|
1318
|
+
}
|
|
1319
|
+
return chunks.sort((a, b) => a.ordinal - b.ordinal);
|
|
1320
|
+
}
|
|
1268
1321
|
async fetchPageWithVector(url, scope) {
|
|
1269
1322
|
try {
|
|
1270
1323
|
const results = await this.pagesNs.fetch([url], {
|
|
@@ -3737,7 +3790,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
3737
3790
|
keywords: r.keywords ?? [],
|
|
3738
3791
|
summary: r.summary ?? "",
|
|
3739
3792
|
tags: r.tags,
|
|
3740
|
-
markdown: r.markdown,
|
|
3741
3793
|
routeFile: r.routeFile,
|
|
3742
3794
|
routeResolution: r.routeResolution,
|
|
3743
3795
|
incomingLinks: r.incomingLinks,
|
|
@@ -3764,7 +3816,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
3764
3816
|
keywords: r.keywords ?? [],
|
|
3765
3817
|
summary: r.summary ?? "",
|
|
3766
3818
|
tags: r.tags,
|
|
3767
|
-
markdown: r.markdown,
|
|
3768
3819
|
routeFile: r.routeFile,
|
|
3769
3820
|
routeResolution: r.routeResolution,
|
|
3770
3821
|
incomingLinks: r.incomingLinks,
|
|
@@ -3848,6 +3899,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
3848
3899
|
let documentsUpserted = 0;
|
|
3849
3900
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
3850
3901
|
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
|
|
3902
|
+
const CHUNK_TEXT_MAX_CHARS = 3e4;
|
|
3851
3903
|
const docs = changedChunks.map((chunk) => {
|
|
3852
3904
|
const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
|
|
3853
3905
|
if (embeddingText.length > 2e3) {
|
|
@@ -3855,6 +3907,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
3855
3907
|
`Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
|
|
3856
3908
|
);
|
|
3857
3909
|
}
|
|
3910
|
+
const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
|
|
3858
3911
|
return {
|
|
3859
3912
|
id: chunk.chunkKey,
|
|
3860
3913
|
data: embeddingText,
|
|
@@ -3865,7 +3918,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
3865
3918
|
sectionTitle: chunk.sectionTitle ?? "",
|
|
3866
3919
|
headingPath: chunk.headingPath.join(" > "),
|
|
3867
3920
|
snippet: chunk.snippet,
|
|
3868
|
-
chunkText:
|
|
3921
|
+
chunkText: cappedText,
|
|
3869
3922
|
tags: chunk.tags,
|
|
3870
3923
|
ordinal: chunk.ordinal,
|
|
3871
3924
|
contentHash: chunk.contentHash,
|
package/dist/index.cjs
CHANGED
|
@@ -17274,6 +17274,26 @@ function ensureStateDirs(cwd, stateDir, scope) {
|
|
|
17274
17274
|
fs__default.default.mkdirSync(statePath, { recursive: true });
|
|
17275
17275
|
return { statePath };
|
|
17276
17276
|
}
|
|
17277
|
+
function reconstructMarkdownFromChunks(chunks, pageTitle) {
|
|
17278
|
+
if (chunks.length === 0) return "";
|
|
17279
|
+
const parts = [];
|
|
17280
|
+
for (const chunk of chunks) {
|
|
17281
|
+
let text = chunk.chunkText;
|
|
17282
|
+
const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
|
|
17283
|
+
|
|
17284
|
+
`;
|
|
17285
|
+
const prefixWithoutSection = `${pageTitle}
|
|
17286
|
+
|
|
17287
|
+
`;
|
|
17288
|
+
if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
|
|
17289
|
+
text = text.slice(prefixWithSection.length);
|
|
17290
|
+
} else if (text.startsWith(prefixWithoutSection)) {
|
|
17291
|
+
text = text.slice(prefixWithoutSection.length);
|
|
17292
|
+
}
|
|
17293
|
+
parts.push(text.trim());
|
|
17294
|
+
}
|
|
17295
|
+
return parts.join("\n\n");
|
|
17296
|
+
}
|
|
17277
17297
|
var UpstashSearchStore = class {
|
|
17278
17298
|
index;
|
|
17279
17299
|
pagesNs;
|
|
@@ -17653,10 +17673,12 @@ var UpstashSearchStore = class {
|
|
|
17653
17673
|
});
|
|
17654
17674
|
const doc = results[0];
|
|
17655
17675
|
if (!doc || !doc.metadata) return null;
|
|
17676
|
+
const chunks = await this.getChunksForPage(url, scope);
|
|
17677
|
+
const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
|
|
17656
17678
|
return {
|
|
17657
17679
|
url: doc.metadata.url,
|
|
17658
17680
|
title: doc.metadata.title,
|
|
17659
|
-
markdown
|
|
17681
|
+
markdown,
|
|
17660
17682
|
projectId: doc.metadata.projectId,
|
|
17661
17683
|
scopeName: doc.metadata.scopeName,
|
|
17662
17684
|
routeFile: doc.metadata.routeFile,
|
|
@@ -17676,6 +17698,37 @@ var UpstashSearchStore = class {
|
|
|
17676
17698
|
return null;
|
|
17677
17699
|
}
|
|
17678
17700
|
}
|
|
17701
|
+
/**
|
|
17702
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
17703
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
17704
|
+
*/
|
|
17705
|
+
async getChunksForPage(url, scope) {
|
|
17706
|
+
const chunks = [];
|
|
17707
|
+
let cursor = "0";
|
|
17708
|
+
try {
|
|
17709
|
+
for (; ; ) {
|
|
17710
|
+
const result = await this.chunksNs.range({
|
|
17711
|
+
cursor,
|
|
17712
|
+
limit: 100,
|
|
17713
|
+
includeMetadata: true
|
|
17714
|
+
});
|
|
17715
|
+
for (const doc of result.vectors) {
|
|
17716
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
|
|
17717
|
+
chunks.push({
|
|
17718
|
+
chunkText: doc.metadata.chunkText ?? "",
|
|
17719
|
+
ordinal: doc.metadata.ordinal ?? 0,
|
|
17720
|
+
sectionTitle: doc.metadata.sectionTitle ?? "",
|
|
17721
|
+
headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
|
|
17722
|
+
});
|
|
17723
|
+
}
|
|
17724
|
+
}
|
|
17725
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
17726
|
+
cursor = result.nextCursor;
|
|
17727
|
+
}
|
|
17728
|
+
} catch {
|
|
17729
|
+
}
|
|
17730
|
+
return chunks.sort((a, b) => a.ordinal - b.ordinal);
|
|
17731
|
+
}
|
|
17679
17732
|
async fetchPageWithVector(url, scope) {
|
|
17680
17733
|
try {
|
|
17681
17734
|
const results = await this.pagesNs.fetch([url], {
|
|
@@ -20996,7 +21049,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20996
21049
|
keywords: r.keywords ?? [],
|
|
20997
21050
|
summary: r.summary ?? "",
|
|
20998
21051
|
tags: r.tags,
|
|
20999
|
-
markdown: r.markdown,
|
|
21000
21052
|
routeFile: r.routeFile,
|
|
21001
21053
|
routeResolution: r.routeResolution,
|
|
21002
21054
|
incomingLinks: r.incomingLinks,
|
|
@@ -21023,7 +21075,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21023
21075
|
keywords: r.keywords ?? [],
|
|
21024
21076
|
summary: r.summary ?? "",
|
|
21025
21077
|
tags: r.tags,
|
|
21026
|
-
markdown: r.markdown,
|
|
21027
21078
|
routeFile: r.routeFile,
|
|
21028
21079
|
routeResolution: r.routeResolution,
|
|
21029
21080
|
incomingLinks: r.incomingLinks,
|
|
@@ -21107,6 +21158,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21107
21158
|
let documentsUpserted = 0;
|
|
21108
21159
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
21109
21160
|
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
|
|
21161
|
+
const CHUNK_TEXT_MAX_CHARS = 3e4;
|
|
21110
21162
|
const docs = changedChunks.map((chunk) => {
|
|
21111
21163
|
const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
|
|
21112
21164
|
if (embeddingText.length > 2e3) {
|
|
@@ -21114,6 +21166,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21114
21166
|
`Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
|
|
21115
21167
|
);
|
|
21116
21168
|
}
|
|
21169
|
+
const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
|
|
21117
21170
|
return {
|
|
21118
21171
|
id: chunk.chunkKey,
|
|
21119
21172
|
data: embeddingText,
|
|
@@ -21124,7 +21177,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21124
21177
|
sectionTitle: chunk.sectionTitle ?? "",
|
|
21125
21178
|
headingPath: chunk.headingPath.join(" > "),
|
|
21126
21179
|
snippet: chunk.snippet,
|
|
21127
|
-
chunkText:
|
|
21180
|
+
chunkText: cappedText,
|
|
21128
21181
|
tags: chunk.tags,
|
|
21129
21182
|
ordinal: chunk.ordinal,
|
|
21130
21183
|
contentHash: chunk.contentHash,
|
package/dist/index.d.cts
CHANGED
|
@@ -40,7 +40,6 @@ interface PageVectorMetadata {
|
|
|
40
40
|
keywords: string[];
|
|
41
41
|
summary: string;
|
|
42
42
|
tags: string[];
|
|
43
|
-
markdown: string;
|
|
44
43
|
routeFile: string;
|
|
45
44
|
routeResolution: string;
|
|
46
45
|
incomingLinks: number;
|
|
@@ -121,6 +120,16 @@ declare class UpstashSearchStore {
|
|
|
121
120
|
metadata: Record<string, unknown>;
|
|
122
121
|
}>, scope: Scope): Promise<void>;
|
|
123
122
|
getPage(url: string, scope: Scope): Promise<PageRecord | null>;
|
|
123
|
+
/**
|
|
124
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
125
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
126
|
+
*/
|
|
127
|
+
getChunksForPage(url: string, scope: Scope): Promise<Array<{
|
|
128
|
+
chunkText: string;
|
|
129
|
+
ordinal: number;
|
|
130
|
+
sectionTitle: string;
|
|
131
|
+
headingPath: string[];
|
|
132
|
+
}>>;
|
|
124
133
|
fetchPageWithVector(url: string, scope: Scope): Promise<{
|
|
125
134
|
metadata: PageVectorMetadata;
|
|
126
135
|
vector: number[];
|
package/dist/index.d.ts
CHANGED
|
@@ -40,7 +40,6 @@ interface PageVectorMetadata {
|
|
|
40
40
|
keywords: string[];
|
|
41
41
|
summary: string;
|
|
42
42
|
tags: string[];
|
|
43
|
-
markdown: string;
|
|
44
43
|
routeFile: string;
|
|
45
44
|
routeResolution: string;
|
|
46
45
|
incomingLinks: number;
|
|
@@ -121,6 +120,16 @@ declare class UpstashSearchStore {
|
|
|
121
120
|
metadata: Record<string, unknown>;
|
|
122
121
|
}>, scope: Scope): Promise<void>;
|
|
123
122
|
getPage(url: string, scope: Scope): Promise<PageRecord | null>;
|
|
123
|
+
/**
|
|
124
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
125
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
126
|
+
*/
|
|
127
|
+
getChunksForPage(url: string, scope: Scope): Promise<Array<{
|
|
128
|
+
chunkText: string;
|
|
129
|
+
ordinal: number;
|
|
130
|
+
sectionTitle: string;
|
|
131
|
+
headingPath: string[];
|
|
132
|
+
}>>;
|
|
124
133
|
fetchPageWithVector(url: string, scope: Scope): Promise<{
|
|
125
134
|
metadata: PageVectorMetadata;
|
|
126
135
|
vector: number[];
|
package/dist/index.js
CHANGED
|
@@ -17262,6 +17262,26 @@ function ensureStateDirs(cwd, stateDir, scope) {
|
|
|
17262
17262
|
fs.mkdirSync(statePath, { recursive: true });
|
|
17263
17263
|
return { statePath };
|
|
17264
17264
|
}
|
|
17265
|
+
function reconstructMarkdownFromChunks(chunks, pageTitle) {
|
|
17266
|
+
if (chunks.length === 0) return "";
|
|
17267
|
+
const parts = [];
|
|
17268
|
+
for (const chunk of chunks) {
|
|
17269
|
+
let text = chunk.chunkText;
|
|
17270
|
+
const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
|
|
17271
|
+
|
|
17272
|
+
`;
|
|
17273
|
+
const prefixWithoutSection = `${pageTitle}
|
|
17274
|
+
|
|
17275
|
+
`;
|
|
17276
|
+
if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
|
|
17277
|
+
text = text.slice(prefixWithSection.length);
|
|
17278
|
+
} else if (text.startsWith(prefixWithoutSection)) {
|
|
17279
|
+
text = text.slice(prefixWithoutSection.length);
|
|
17280
|
+
}
|
|
17281
|
+
parts.push(text.trim());
|
|
17282
|
+
}
|
|
17283
|
+
return parts.join("\n\n");
|
|
17284
|
+
}
|
|
17265
17285
|
var UpstashSearchStore = class {
|
|
17266
17286
|
index;
|
|
17267
17287
|
pagesNs;
|
|
@@ -17641,10 +17661,12 @@ var UpstashSearchStore = class {
|
|
|
17641
17661
|
});
|
|
17642
17662
|
const doc = results[0];
|
|
17643
17663
|
if (!doc || !doc.metadata) return null;
|
|
17664
|
+
const chunks = await this.getChunksForPage(url, scope);
|
|
17665
|
+
const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
|
|
17644
17666
|
return {
|
|
17645
17667
|
url: doc.metadata.url,
|
|
17646
17668
|
title: doc.metadata.title,
|
|
17647
|
-
markdown
|
|
17669
|
+
markdown,
|
|
17648
17670
|
projectId: doc.metadata.projectId,
|
|
17649
17671
|
scopeName: doc.metadata.scopeName,
|
|
17650
17672
|
routeFile: doc.metadata.routeFile,
|
|
@@ -17664,6 +17686,37 @@ var UpstashSearchStore = class {
|
|
|
17664
17686
|
return null;
|
|
17665
17687
|
}
|
|
17666
17688
|
}
|
|
17689
|
+
/**
|
|
17690
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
17691
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
17692
|
+
*/
|
|
17693
|
+
async getChunksForPage(url, scope) {
|
|
17694
|
+
const chunks = [];
|
|
17695
|
+
let cursor = "0";
|
|
17696
|
+
try {
|
|
17697
|
+
for (; ; ) {
|
|
17698
|
+
const result = await this.chunksNs.range({
|
|
17699
|
+
cursor,
|
|
17700
|
+
limit: 100,
|
|
17701
|
+
includeMetadata: true
|
|
17702
|
+
});
|
|
17703
|
+
for (const doc of result.vectors) {
|
|
17704
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
|
|
17705
|
+
chunks.push({
|
|
17706
|
+
chunkText: doc.metadata.chunkText ?? "",
|
|
17707
|
+
ordinal: doc.metadata.ordinal ?? 0,
|
|
17708
|
+
sectionTitle: doc.metadata.sectionTitle ?? "",
|
|
17709
|
+
headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
|
|
17710
|
+
});
|
|
17711
|
+
}
|
|
17712
|
+
}
|
|
17713
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
17714
|
+
cursor = result.nextCursor;
|
|
17715
|
+
}
|
|
17716
|
+
} catch {
|
|
17717
|
+
}
|
|
17718
|
+
return chunks.sort((a, b) => a.ordinal - b.ordinal);
|
|
17719
|
+
}
|
|
17667
17720
|
async fetchPageWithVector(url, scope) {
|
|
17668
17721
|
try {
|
|
17669
17722
|
const results = await this.pagesNs.fetch([url], {
|
|
@@ -20984,7 +21037,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20984
21037
|
keywords: r.keywords ?? [],
|
|
20985
21038
|
summary: r.summary ?? "",
|
|
20986
21039
|
tags: r.tags,
|
|
20987
|
-
markdown: r.markdown,
|
|
20988
21040
|
routeFile: r.routeFile,
|
|
20989
21041
|
routeResolution: r.routeResolution,
|
|
20990
21042
|
incomingLinks: r.incomingLinks,
|
|
@@ -21011,7 +21063,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21011
21063
|
keywords: r.keywords ?? [],
|
|
21012
21064
|
summary: r.summary ?? "",
|
|
21013
21065
|
tags: r.tags,
|
|
21014
|
-
markdown: r.markdown,
|
|
21015
21066
|
routeFile: r.routeFile,
|
|
21016
21067
|
routeResolution: r.routeResolution,
|
|
21017
21068
|
incomingLinks: r.incomingLinks,
|
|
@@ -21095,6 +21146,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21095
21146
|
let documentsUpserted = 0;
|
|
21096
21147
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
21097
21148
|
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
|
|
21149
|
+
const CHUNK_TEXT_MAX_CHARS = 3e4;
|
|
21098
21150
|
const docs = changedChunks.map((chunk) => {
|
|
21099
21151
|
const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
|
|
21100
21152
|
if (embeddingText.length > 2e3) {
|
|
@@ -21102,6 +21154,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21102
21154
|
`Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
|
|
21103
21155
|
);
|
|
21104
21156
|
}
|
|
21157
|
+
const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
|
|
21105
21158
|
return {
|
|
21106
21159
|
id: chunk.chunkKey,
|
|
21107
21160
|
data: embeddingText,
|
|
@@ -21112,7 +21165,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21112
21165
|
sectionTitle: chunk.sectionTitle ?? "",
|
|
21113
21166
|
headingPath: chunk.headingPath.join(" > "),
|
|
21114
21167
|
snippet: chunk.snippet,
|
|
21115
|
-
chunkText:
|
|
21168
|
+
chunkText: cappedText,
|
|
21116
21169
|
tags: chunk.tags,
|
|
21117
21170
|
ordinal: chunk.ordinal,
|
|
21118
21171
|
contentHash: chunk.contentHash,
|
package/dist/sveltekit.cjs
CHANGED
|
@@ -17305,6 +17305,26 @@ function joinUrl(baseUrl, route) {
|
|
|
17305
17305
|
const routePart = ensureLeadingSlash(route);
|
|
17306
17306
|
return `${base}${routePart}`;
|
|
17307
17307
|
}
|
|
17308
|
+
function reconstructMarkdownFromChunks(chunks, pageTitle) {
|
|
17309
|
+
if (chunks.length === 0) return "";
|
|
17310
|
+
const parts = [];
|
|
17311
|
+
for (const chunk of chunks) {
|
|
17312
|
+
let text = chunk.chunkText;
|
|
17313
|
+
const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
|
|
17314
|
+
|
|
17315
|
+
`;
|
|
17316
|
+
const prefixWithoutSection = `${pageTitle}
|
|
17317
|
+
|
|
17318
|
+
`;
|
|
17319
|
+
if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
|
|
17320
|
+
text = text.slice(prefixWithSection.length);
|
|
17321
|
+
} else if (text.startsWith(prefixWithoutSection)) {
|
|
17322
|
+
text = text.slice(prefixWithoutSection.length);
|
|
17323
|
+
}
|
|
17324
|
+
parts.push(text.trim());
|
|
17325
|
+
}
|
|
17326
|
+
return parts.join("\n\n");
|
|
17327
|
+
}
|
|
17308
17328
|
var UpstashSearchStore = class {
|
|
17309
17329
|
index;
|
|
17310
17330
|
pagesNs;
|
|
@@ -17684,10 +17704,12 @@ var UpstashSearchStore = class {
|
|
|
17684
17704
|
});
|
|
17685
17705
|
const doc = results[0];
|
|
17686
17706
|
if (!doc || !doc.metadata) return null;
|
|
17707
|
+
const chunks = await this.getChunksForPage(url, scope);
|
|
17708
|
+
const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
|
|
17687
17709
|
return {
|
|
17688
17710
|
url: doc.metadata.url,
|
|
17689
17711
|
title: doc.metadata.title,
|
|
17690
|
-
markdown
|
|
17712
|
+
markdown,
|
|
17691
17713
|
projectId: doc.metadata.projectId,
|
|
17692
17714
|
scopeName: doc.metadata.scopeName,
|
|
17693
17715
|
routeFile: doc.metadata.routeFile,
|
|
@@ -17707,6 +17729,37 @@ var UpstashSearchStore = class {
|
|
|
17707
17729
|
return null;
|
|
17708
17730
|
}
|
|
17709
17731
|
}
|
|
17732
|
+
/**
|
|
17733
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
17734
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
17735
|
+
*/
|
|
17736
|
+
async getChunksForPage(url, scope) {
|
|
17737
|
+
const chunks = [];
|
|
17738
|
+
let cursor = "0";
|
|
17739
|
+
try {
|
|
17740
|
+
for (; ; ) {
|
|
17741
|
+
const result = await this.chunksNs.range({
|
|
17742
|
+
cursor,
|
|
17743
|
+
limit: 100,
|
|
17744
|
+
includeMetadata: true
|
|
17745
|
+
});
|
|
17746
|
+
for (const doc of result.vectors) {
|
|
17747
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
|
|
17748
|
+
chunks.push({
|
|
17749
|
+
chunkText: doc.metadata.chunkText ?? "",
|
|
17750
|
+
ordinal: doc.metadata.ordinal ?? 0,
|
|
17751
|
+
sectionTitle: doc.metadata.sectionTitle ?? "",
|
|
17752
|
+
headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
|
|
17753
|
+
});
|
|
17754
|
+
}
|
|
17755
|
+
}
|
|
17756
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
17757
|
+
cursor = result.nextCursor;
|
|
17758
|
+
}
|
|
17759
|
+
} catch {
|
|
17760
|
+
}
|
|
17761
|
+
return chunks.sort((a, b) => a.ordinal - b.ordinal);
|
|
17762
|
+
}
|
|
17710
17763
|
async fetchPageWithVector(url, scope) {
|
|
17711
17764
|
try {
|
|
17712
17765
|
const results = await this.pagesNs.fetch([url], {
|
|
@@ -22138,7 +22191,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22138
22191
|
keywords: r.keywords ?? [],
|
|
22139
22192
|
summary: r.summary ?? "",
|
|
22140
22193
|
tags: r.tags,
|
|
22141
|
-
markdown: r.markdown,
|
|
22142
22194
|
routeFile: r.routeFile,
|
|
22143
22195
|
routeResolution: r.routeResolution,
|
|
22144
22196
|
incomingLinks: r.incomingLinks,
|
|
@@ -22165,7 +22217,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22165
22217
|
keywords: r.keywords ?? [],
|
|
22166
22218
|
summary: r.summary ?? "",
|
|
22167
22219
|
tags: r.tags,
|
|
22168
|
-
markdown: r.markdown,
|
|
22169
22220
|
routeFile: r.routeFile,
|
|
22170
22221
|
routeResolution: r.routeResolution,
|
|
22171
22222
|
incomingLinks: r.incomingLinks,
|
|
@@ -22249,6 +22300,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22249
22300
|
let documentsUpserted = 0;
|
|
22250
22301
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
22251
22302
|
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
|
|
22303
|
+
const CHUNK_TEXT_MAX_CHARS = 3e4;
|
|
22252
22304
|
const docs = changedChunks.map((chunk) => {
|
|
22253
22305
|
const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
|
|
22254
22306
|
if (embeddingText.length > 2e3) {
|
|
@@ -22256,6 +22308,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22256
22308
|
`Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
|
|
22257
22309
|
);
|
|
22258
22310
|
}
|
|
22311
|
+
const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
|
|
22259
22312
|
return {
|
|
22260
22313
|
id: chunk.chunkKey,
|
|
22261
22314
|
data: embeddingText,
|
|
@@ -22266,7 +22319,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22266
22319
|
sectionTitle: chunk.sectionTitle ?? "",
|
|
22267
22320
|
headingPath: chunk.headingPath.join(" > "),
|
|
22268
22321
|
snippet: chunk.snippet,
|
|
22269
|
-
chunkText:
|
|
22322
|
+
chunkText: cappedText,
|
|
22270
22323
|
tags: chunk.tags,
|
|
22271
22324
|
ordinal: chunk.ordinal,
|
|
22272
22325
|
contentHash: chunk.contentHash,
|
package/dist/sveltekit.js
CHANGED
|
@@ -17293,6 +17293,26 @@ function joinUrl(baseUrl, route) {
|
|
|
17293
17293
|
const routePart = ensureLeadingSlash(route);
|
|
17294
17294
|
return `${base}${routePart}`;
|
|
17295
17295
|
}
|
|
17296
|
+
function reconstructMarkdownFromChunks(chunks, pageTitle) {
|
|
17297
|
+
if (chunks.length === 0) return "";
|
|
17298
|
+
const parts = [];
|
|
17299
|
+
for (const chunk of chunks) {
|
|
17300
|
+
let text = chunk.chunkText;
|
|
17301
|
+
const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
|
|
17302
|
+
|
|
17303
|
+
`;
|
|
17304
|
+
const prefixWithoutSection = `${pageTitle}
|
|
17305
|
+
|
|
17306
|
+
`;
|
|
17307
|
+
if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
|
|
17308
|
+
text = text.slice(prefixWithSection.length);
|
|
17309
|
+
} else if (text.startsWith(prefixWithoutSection)) {
|
|
17310
|
+
text = text.slice(prefixWithoutSection.length);
|
|
17311
|
+
}
|
|
17312
|
+
parts.push(text.trim());
|
|
17313
|
+
}
|
|
17314
|
+
return parts.join("\n\n");
|
|
17315
|
+
}
|
|
17296
17316
|
var UpstashSearchStore = class {
|
|
17297
17317
|
index;
|
|
17298
17318
|
pagesNs;
|
|
@@ -17672,10 +17692,12 @@ var UpstashSearchStore = class {
|
|
|
17672
17692
|
});
|
|
17673
17693
|
const doc = results[0];
|
|
17674
17694
|
if (!doc || !doc.metadata) return null;
|
|
17695
|
+
const chunks = await this.getChunksForPage(url, scope);
|
|
17696
|
+
const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
|
|
17675
17697
|
return {
|
|
17676
17698
|
url: doc.metadata.url,
|
|
17677
17699
|
title: doc.metadata.title,
|
|
17678
|
-
markdown
|
|
17700
|
+
markdown,
|
|
17679
17701
|
projectId: doc.metadata.projectId,
|
|
17680
17702
|
scopeName: doc.metadata.scopeName,
|
|
17681
17703
|
routeFile: doc.metadata.routeFile,
|
|
@@ -17695,6 +17717,37 @@ var UpstashSearchStore = class {
|
|
|
17695
17717
|
return null;
|
|
17696
17718
|
}
|
|
17697
17719
|
}
|
|
17720
|
+
/**
|
|
17721
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
17722
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
17723
|
+
*/
|
|
17724
|
+
async getChunksForPage(url, scope) {
|
|
17725
|
+
const chunks = [];
|
|
17726
|
+
let cursor = "0";
|
|
17727
|
+
try {
|
|
17728
|
+
for (; ; ) {
|
|
17729
|
+
const result = await this.chunksNs.range({
|
|
17730
|
+
cursor,
|
|
17731
|
+
limit: 100,
|
|
17732
|
+
includeMetadata: true
|
|
17733
|
+
});
|
|
17734
|
+
for (const doc of result.vectors) {
|
|
17735
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
|
|
17736
|
+
chunks.push({
|
|
17737
|
+
chunkText: doc.metadata.chunkText ?? "",
|
|
17738
|
+
ordinal: doc.metadata.ordinal ?? 0,
|
|
17739
|
+
sectionTitle: doc.metadata.sectionTitle ?? "",
|
|
17740
|
+
headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
|
|
17741
|
+
});
|
|
17742
|
+
}
|
|
17743
|
+
}
|
|
17744
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
17745
|
+
cursor = result.nextCursor;
|
|
17746
|
+
}
|
|
17747
|
+
} catch {
|
|
17748
|
+
}
|
|
17749
|
+
return chunks.sort((a, b) => a.ordinal - b.ordinal);
|
|
17750
|
+
}
|
|
17698
17751
|
async fetchPageWithVector(url, scope) {
|
|
17699
17752
|
try {
|
|
17700
17753
|
const results = await this.pagesNs.fetch([url], {
|
|
@@ -22126,7 +22179,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22126
22179
|
keywords: r.keywords ?? [],
|
|
22127
22180
|
summary: r.summary ?? "",
|
|
22128
22181
|
tags: r.tags,
|
|
22129
|
-
markdown: r.markdown,
|
|
22130
22182
|
routeFile: r.routeFile,
|
|
22131
22183
|
routeResolution: r.routeResolution,
|
|
22132
22184
|
incomingLinks: r.incomingLinks,
|
|
@@ -22153,7 +22205,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22153
22205
|
keywords: r.keywords ?? [],
|
|
22154
22206
|
summary: r.summary ?? "",
|
|
22155
22207
|
tags: r.tags,
|
|
22156
|
-
markdown: r.markdown,
|
|
22157
22208
|
routeFile: r.routeFile,
|
|
22158
22209
|
routeResolution: r.routeResolution,
|
|
22159
22210
|
incomingLinks: r.incomingLinks,
|
|
@@ -22237,6 +22288,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22237
22288
|
let documentsUpserted = 0;
|
|
22238
22289
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
22239
22290
|
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
|
|
22291
|
+
const CHUNK_TEXT_MAX_CHARS = 3e4;
|
|
22240
22292
|
const docs = changedChunks.map((chunk) => {
|
|
22241
22293
|
const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
|
|
22242
22294
|
if (embeddingText.length > 2e3) {
|
|
@@ -22244,6 +22296,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22244
22296
|
`Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
|
|
22245
22297
|
);
|
|
22246
22298
|
}
|
|
22299
|
+
const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
|
|
22247
22300
|
return {
|
|
22248
22301
|
id: chunk.chunkKey,
|
|
22249
22302
|
data: embeddingText,
|
|
@@ -22254,7 +22307,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22254
22307
|
sectionTitle: chunk.sectionTitle ?? "",
|
|
22255
22308
|
headingPath: chunk.headingPath.join(" > "),
|
|
22256
22309
|
snippet: chunk.snippet,
|
|
22257
|
-
chunkText:
|
|
22310
|
+
chunkText: cappedText,
|
|
22258
22311
|
tags: chunk.tags,
|
|
22259
22312
|
ordinal: chunk.ordinal,
|
|
22260
22313
|
contentHash: chunk.contentHash,
|