searchsocket 0.6.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -12,7 +12,7 @@ import { Command, Option } from "commander";
12
12
  // package.json
13
13
  var package_default = {
14
14
  name: "searchsocket",
15
- version: "0.6.2",
15
+ version: "0.6.3",
16
16
  description: "Semantic site search and MCP retrieval for SvelteKit static sites",
17
17
  license: "MIT",
18
18
  author: "Greg Priday <greg@siteorigin.com>",
@@ -863,6 +863,26 @@ import path11 from "path";
863
863
 
864
864
  // src/vector/upstash.ts
865
865
  import { QueryMode, FusionAlgorithm } from "@upstash/vector";
866
+ function reconstructMarkdownFromChunks(chunks, pageTitle) {
867
+ if (chunks.length === 0) return "";
868
+ const parts = [];
869
+ for (const chunk of chunks) {
870
+ let text2 = chunk.chunkText;
871
+ const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
872
+
873
+ `;
874
+ const prefixWithoutSection = `${pageTitle}
875
+
876
+ `;
877
+ if (chunk.sectionTitle && text2.startsWith(prefixWithSection)) {
878
+ text2 = text2.slice(prefixWithSection.length);
879
+ } else if (text2.startsWith(prefixWithoutSection)) {
880
+ text2 = text2.slice(prefixWithoutSection.length);
881
+ }
882
+ parts.push(text2.trim());
883
+ }
884
+ return parts.join("\n\n");
885
+ }
866
886
  var UpstashSearchStore = class {
867
887
  index;
868
888
  pagesNs;
@@ -1242,10 +1262,12 @@ var UpstashSearchStore = class {
1242
1262
  });
1243
1263
  const doc = results[0];
1244
1264
  if (!doc || !doc.metadata) return null;
1265
+ const chunks = await this.getChunksForPage(url, scope);
1266
+ const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
1245
1267
  return {
1246
1268
  url: doc.metadata.url,
1247
1269
  title: doc.metadata.title,
1248
- markdown: doc.metadata.markdown,
1270
+ markdown,
1249
1271
  projectId: doc.metadata.projectId,
1250
1272
  scopeName: doc.metadata.scopeName,
1251
1273
  routeFile: doc.metadata.routeFile,
@@ -1265,6 +1287,37 @@ var UpstashSearchStore = class {
1265
1287
  return null;
1266
1288
  }
1267
1289
  }
1290
+ /**
1291
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
1292
+ * Used to reconstruct full page markdown from chunk content.
1293
+ */
1294
+ async getChunksForPage(url, scope) {
1295
+ const chunks = [];
1296
+ let cursor = "0";
1297
+ try {
1298
+ for (; ; ) {
1299
+ const result = await this.chunksNs.range({
1300
+ cursor,
1301
+ limit: 100,
1302
+ includeMetadata: true
1303
+ });
1304
+ for (const doc of result.vectors) {
1305
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
1306
+ chunks.push({
1307
+ chunkText: doc.metadata.chunkText ?? "",
1308
+ ordinal: doc.metadata.ordinal ?? 0,
1309
+ sectionTitle: doc.metadata.sectionTitle ?? "",
1310
+ headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
1311
+ });
1312
+ }
1313
+ }
1314
+ if (!result.nextCursor || result.nextCursor === "0") break;
1315
+ cursor = result.nextCursor;
1316
+ }
1317
+ } catch {
1318
+ }
1319
+ return chunks.sort((a, b) => a.ordinal - b.ordinal);
1320
+ }
1268
1321
  async fetchPageWithVector(url, scope) {
1269
1322
  try {
1270
1323
  const results = await this.pagesNs.fetch([url], {
@@ -3737,7 +3790,6 @@ var IndexPipeline = class _IndexPipeline {
3737
3790
  keywords: r.keywords ?? [],
3738
3791
  summary: r.summary ?? "",
3739
3792
  tags: r.tags,
3740
- markdown: r.markdown,
3741
3793
  routeFile: r.routeFile,
3742
3794
  routeResolution: r.routeResolution,
3743
3795
  incomingLinks: r.incomingLinks,
@@ -3764,7 +3816,6 @@ var IndexPipeline = class _IndexPipeline {
3764
3816
  keywords: r.keywords ?? [],
3765
3817
  summary: r.summary ?? "",
3766
3818
  tags: r.tags,
3767
- markdown: r.markdown,
3768
3819
  routeFile: r.routeFile,
3769
3820
  routeResolution: r.routeResolution,
3770
3821
  incomingLinks: r.incomingLinks,
@@ -3848,6 +3899,7 @@ var IndexPipeline = class _IndexPipeline {
3848
3899
  let documentsUpserted = 0;
3849
3900
  if (!options.dryRun && changedChunks.length > 0) {
3850
3901
  this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
3902
+ const CHUNK_TEXT_MAX_CHARS = 3e4;
3851
3903
  const docs = changedChunks.map((chunk) => {
3852
3904
  const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
3853
3905
  if (embeddingText.length > 2e3) {
@@ -3855,6 +3907,7 @@ var IndexPipeline = class _IndexPipeline {
3855
3907
  `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
3856
3908
  );
3857
3909
  }
3910
+ const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
3858
3911
  return {
3859
3912
  id: chunk.chunkKey,
3860
3913
  data: embeddingText,
@@ -3865,7 +3918,7 @@ var IndexPipeline = class _IndexPipeline {
3865
3918
  sectionTitle: chunk.sectionTitle ?? "",
3866
3919
  headingPath: chunk.headingPath.join(" > "),
3867
3920
  snippet: chunk.snippet,
3868
- chunkText: embeddingText,
3921
+ chunkText: cappedText,
3869
3922
  tags: chunk.tags,
3870
3923
  ordinal: chunk.ordinal,
3871
3924
  contentHash: chunk.contentHash,
package/dist/index.cjs CHANGED
@@ -17274,6 +17274,26 @@ function ensureStateDirs(cwd, stateDir, scope) {
17274
17274
  fs__default.default.mkdirSync(statePath, { recursive: true });
17275
17275
  return { statePath };
17276
17276
  }
17277
+ function reconstructMarkdownFromChunks(chunks, pageTitle) {
17278
+ if (chunks.length === 0) return "";
17279
+ const parts = [];
17280
+ for (const chunk of chunks) {
17281
+ let text = chunk.chunkText;
17282
+ const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
17283
+
17284
+ `;
17285
+ const prefixWithoutSection = `${pageTitle}
17286
+
17287
+ `;
17288
+ if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
17289
+ text = text.slice(prefixWithSection.length);
17290
+ } else if (text.startsWith(prefixWithoutSection)) {
17291
+ text = text.slice(prefixWithoutSection.length);
17292
+ }
17293
+ parts.push(text.trim());
17294
+ }
17295
+ return parts.join("\n\n");
17296
+ }
17277
17297
  var UpstashSearchStore = class {
17278
17298
  index;
17279
17299
  pagesNs;
@@ -17653,10 +17673,12 @@ var UpstashSearchStore = class {
17653
17673
  });
17654
17674
  const doc = results[0];
17655
17675
  if (!doc || !doc.metadata) return null;
17676
+ const chunks = await this.getChunksForPage(url, scope);
17677
+ const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
17656
17678
  return {
17657
17679
  url: doc.metadata.url,
17658
17680
  title: doc.metadata.title,
17659
- markdown: doc.metadata.markdown,
17681
+ markdown,
17660
17682
  projectId: doc.metadata.projectId,
17661
17683
  scopeName: doc.metadata.scopeName,
17662
17684
  routeFile: doc.metadata.routeFile,
@@ -17676,6 +17698,37 @@ var UpstashSearchStore = class {
17676
17698
  return null;
17677
17699
  }
17678
17700
  }
17701
+ /**
17702
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
17703
+ * Used to reconstruct full page markdown from chunk content.
17704
+ */
17705
+ async getChunksForPage(url, scope) {
17706
+ const chunks = [];
17707
+ let cursor = "0";
17708
+ try {
17709
+ for (; ; ) {
17710
+ const result = await this.chunksNs.range({
17711
+ cursor,
17712
+ limit: 100,
17713
+ includeMetadata: true
17714
+ });
17715
+ for (const doc of result.vectors) {
17716
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
17717
+ chunks.push({
17718
+ chunkText: doc.metadata.chunkText ?? "",
17719
+ ordinal: doc.metadata.ordinal ?? 0,
17720
+ sectionTitle: doc.metadata.sectionTitle ?? "",
17721
+ headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
17722
+ });
17723
+ }
17724
+ }
17725
+ if (!result.nextCursor || result.nextCursor === "0") break;
17726
+ cursor = result.nextCursor;
17727
+ }
17728
+ } catch {
17729
+ }
17730
+ return chunks.sort((a, b) => a.ordinal - b.ordinal);
17731
+ }
17679
17732
  async fetchPageWithVector(url, scope) {
17680
17733
  try {
17681
17734
  const results = await this.pagesNs.fetch([url], {
@@ -20996,7 +21049,6 @@ var IndexPipeline = class _IndexPipeline {
20996
21049
  keywords: r.keywords ?? [],
20997
21050
  summary: r.summary ?? "",
20998
21051
  tags: r.tags,
20999
- markdown: r.markdown,
21000
21052
  routeFile: r.routeFile,
21001
21053
  routeResolution: r.routeResolution,
21002
21054
  incomingLinks: r.incomingLinks,
@@ -21023,7 +21075,6 @@ var IndexPipeline = class _IndexPipeline {
21023
21075
  keywords: r.keywords ?? [],
21024
21076
  summary: r.summary ?? "",
21025
21077
  tags: r.tags,
21026
- markdown: r.markdown,
21027
21078
  routeFile: r.routeFile,
21028
21079
  routeResolution: r.routeResolution,
21029
21080
  incomingLinks: r.incomingLinks,
@@ -21107,6 +21158,7 @@ var IndexPipeline = class _IndexPipeline {
21107
21158
  let documentsUpserted = 0;
21108
21159
  if (!options.dryRun && changedChunks.length > 0) {
21109
21160
  this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
21161
+ const CHUNK_TEXT_MAX_CHARS = 3e4;
21110
21162
  const docs = changedChunks.map((chunk) => {
21111
21163
  const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
21112
21164
  if (embeddingText.length > 2e3) {
@@ -21114,6 +21166,7 @@ var IndexPipeline = class _IndexPipeline {
21114
21166
  `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
21115
21167
  );
21116
21168
  }
21169
+ const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
21117
21170
  return {
21118
21171
  id: chunk.chunkKey,
21119
21172
  data: embeddingText,
@@ -21124,7 +21177,7 @@ var IndexPipeline = class _IndexPipeline {
21124
21177
  sectionTitle: chunk.sectionTitle ?? "",
21125
21178
  headingPath: chunk.headingPath.join(" > "),
21126
21179
  snippet: chunk.snippet,
21127
- chunkText: embeddingText,
21180
+ chunkText: cappedText,
21128
21181
  tags: chunk.tags,
21129
21182
  ordinal: chunk.ordinal,
21130
21183
  contentHash: chunk.contentHash,
package/dist/index.d.cts CHANGED
@@ -40,7 +40,6 @@ interface PageVectorMetadata {
40
40
  keywords: string[];
41
41
  summary: string;
42
42
  tags: string[];
43
- markdown: string;
44
43
  routeFile: string;
45
44
  routeResolution: string;
46
45
  incomingLinks: number;
@@ -121,6 +120,16 @@ declare class UpstashSearchStore {
121
120
  metadata: Record<string, unknown>;
122
121
  }>, scope: Scope): Promise<void>;
123
122
  getPage(url: string, scope: Scope): Promise<PageRecord | null>;
123
+ /**
124
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
125
+ * Used to reconstruct full page markdown from chunk content.
126
+ */
127
+ getChunksForPage(url: string, scope: Scope): Promise<Array<{
128
+ chunkText: string;
129
+ ordinal: number;
130
+ sectionTitle: string;
131
+ headingPath: string[];
132
+ }>>;
124
133
  fetchPageWithVector(url: string, scope: Scope): Promise<{
125
134
  metadata: PageVectorMetadata;
126
135
  vector: number[];
package/dist/index.d.ts CHANGED
@@ -40,7 +40,6 @@ interface PageVectorMetadata {
40
40
  keywords: string[];
41
41
  summary: string;
42
42
  tags: string[];
43
- markdown: string;
44
43
  routeFile: string;
45
44
  routeResolution: string;
46
45
  incomingLinks: number;
@@ -121,6 +120,16 @@ declare class UpstashSearchStore {
121
120
  metadata: Record<string, unknown>;
122
121
  }>, scope: Scope): Promise<void>;
123
122
  getPage(url: string, scope: Scope): Promise<PageRecord | null>;
123
+ /**
124
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
125
+ * Used to reconstruct full page markdown from chunk content.
126
+ */
127
+ getChunksForPage(url: string, scope: Scope): Promise<Array<{
128
+ chunkText: string;
129
+ ordinal: number;
130
+ sectionTitle: string;
131
+ headingPath: string[];
132
+ }>>;
124
133
  fetchPageWithVector(url: string, scope: Scope): Promise<{
125
134
  metadata: PageVectorMetadata;
126
135
  vector: number[];
package/dist/index.js CHANGED
@@ -17262,6 +17262,26 @@ function ensureStateDirs(cwd, stateDir, scope) {
17262
17262
  fs.mkdirSync(statePath, { recursive: true });
17263
17263
  return { statePath };
17264
17264
  }
17265
+ function reconstructMarkdownFromChunks(chunks, pageTitle) {
17266
+ if (chunks.length === 0) return "";
17267
+ const parts = [];
17268
+ for (const chunk of chunks) {
17269
+ let text = chunk.chunkText;
17270
+ const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
17271
+
17272
+ `;
17273
+ const prefixWithoutSection = `${pageTitle}
17274
+
17275
+ `;
17276
+ if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
17277
+ text = text.slice(prefixWithSection.length);
17278
+ } else if (text.startsWith(prefixWithoutSection)) {
17279
+ text = text.slice(prefixWithoutSection.length);
17280
+ }
17281
+ parts.push(text.trim());
17282
+ }
17283
+ return parts.join("\n\n");
17284
+ }
17265
17285
  var UpstashSearchStore = class {
17266
17286
  index;
17267
17287
  pagesNs;
@@ -17641,10 +17661,12 @@ var UpstashSearchStore = class {
17641
17661
  });
17642
17662
  const doc = results[0];
17643
17663
  if (!doc || !doc.metadata) return null;
17664
+ const chunks = await this.getChunksForPage(url, scope);
17665
+ const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
17644
17666
  return {
17645
17667
  url: doc.metadata.url,
17646
17668
  title: doc.metadata.title,
17647
- markdown: doc.metadata.markdown,
17669
+ markdown,
17648
17670
  projectId: doc.metadata.projectId,
17649
17671
  scopeName: doc.metadata.scopeName,
17650
17672
  routeFile: doc.metadata.routeFile,
@@ -17664,6 +17686,37 @@ var UpstashSearchStore = class {
17664
17686
  return null;
17665
17687
  }
17666
17688
  }
17689
+ /**
17690
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
17691
+ * Used to reconstruct full page markdown from chunk content.
17692
+ */
17693
+ async getChunksForPage(url, scope) {
17694
+ const chunks = [];
17695
+ let cursor = "0";
17696
+ try {
17697
+ for (; ; ) {
17698
+ const result = await this.chunksNs.range({
17699
+ cursor,
17700
+ limit: 100,
17701
+ includeMetadata: true
17702
+ });
17703
+ for (const doc of result.vectors) {
17704
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
17705
+ chunks.push({
17706
+ chunkText: doc.metadata.chunkText ?? "",
17707
+ ordinal: doc.metadata.ordinal ?? 0,
17708
+ sectionTitle: doc.metadata.sectionTitle ?? "",
17709
+ headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
17710
+ });
17711
+ }
17712
+ }
17713
+ if (!result.nextCursor || result.nextCursor === "0") break;
17714
+ cursor = result.nextCursor;
17715
+ }
17716
+ } catch {
17717
+ }
17718
+ return chunks.sort((a, b) => a.ordinal - b.ordinal);
17719
+ }
17667
17720
  async fetchPageWithVector(url, scope) {
17668
17721
  try {
17669
17722
  const results = await this.pagesNs.fetch([url], {
@@ -20984,7 +21037,6 @@ var IndexPipeline = class _IndexPipeline {
20984
21037
  keywords: r.keywords ?? [],
20985
21038
  summary: r.summary ?? "",
20986
21039
  tags: r.tags,
20987
- markdown: r.markdown,
20988
21040
  routeFile: r.routeFile,
20989
21041
  routeResolution: r.routeResolution,
20990
21042
  incomingLinks: r.incomingLinks,
@@ -21011,7 +21063,6 @@ var IndexPipeline = class _IndexPipeline {
21011
21063
  keywords: r.keywords ?? [],
21012
21064
  summary: r.summary ?? "",
21013
21065
  tags: r.tags,
21014
- markdown: r.markdown,
21015
21066
  routeFile: r.routeFile,
21016
21067
  routeResolution: r.routeResolution,
21017
21068
  incomingLinks: r.incomingLinks,
@@ -21095,6 +21146,7 @@ var IndexPipeline = class _IndexPipeline {
21095
21146
  let documentsUpserted = 0;
21096
21147
  if (!options.dryRun && changedChunks.length > 0) {
21097
21148
  this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
21149
+ const CHUNK_TEXT_MAX_CHARS = 3e4;
21098
21150
  const docs = changedChunks.map((chunk) => {
21099
21151
  const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
21100
21152
  if (embeddingText.length > 2e3) {
@@ -21102,6 +21154,7 @@ var IndexPipeline = class _IndexPipeline {
21102
21154
  `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
21103
21155
  );
21104
21156
  }
21157
+ const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
21105
21158
  return {
21106
21159
  id: chunk.chunkKey,
21107
21160
  data: embeddingText,
@@ -21112,7 +21165,7 @@ var IndexPipeline = class _IndexPipeline {
21112
21165
  sectionTitle: chunk.sectionTitle ?? "",
21113
21166
  headingPath: chunk.headingPath.join(" > "),
21114
21167
  snippet: chunk.snippet,
21115
- chunkText: embeddingText,
21168
+ chunkText: cappedText,
21116
21169
  tags: chunk.tags,
21117
21170
  ordinal: chunk.ordinal,
21118
21171
  contentHash: chunk.contentHash,
@@ -17305,6 +17305,26 @@ function joinUrl(baseUrl, route) {
17305
17305
  const routePart = ensureLeadingSlash(route);
17306
17306
  return `${base}${routePart}`;
17307
17307
  }
17308
+ function reconstructMarkdownFromChunks(chunks, pageTitle) {
17309
+ if (chunks.length === 0) return "";
17310
+ const parts = [];
17311
+ for (const chunk of chunks) {
17312
+ let text = chunk.chunkText;
17313
+ const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
17314
+
17315
+ `;
17316
+ const prefixWithoutSection = `${pageTitle}
17317
+
17318
+ `;
17319
+ if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
17320
+ text = text.slice(prefixWithSection.length);
17321
+ } else if (text.startsWith(prefixWithoutSection)) {
17322
+ text = text.slice(prefixWithoutSection.length);
17323
+ }
17324
+ parts.push(text.trim());
17325
+ }
17326
+ return parts.join("\n\n");
17327
+ }
17308
17328
  var UpstashSearchStore = class {
17309
17329
  index;
17310
17330
  pagesNs;
@@ -17684,10 +17704,12 @@ var UpstashSearchStore = class {
17684
17704
  });
17685
17705
  const doc = results[0];
17686
17706
  if (!doc || !doc.metadata) return null;
17707
+ const chunks = await this.getChunksForPage(url, scope);
17708
+ const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
17687
17709
  return {
17688
17710
  url: doc.metadata.url,
17689
17711
  title: doc.metadata.title,
17690
- markdown: doc.metadata.markdown,
17712
+ markdown,
17691
17713
  projectId: doc.metadata.projectId,
17692
17714
  scopeName: doc.metadata.scopeName,
17693
17715
  routeFile: doc.metadata.routeFile,
@@ -17707,6 +17729,37 @@ var UpstashSearchStore = class {
17707
17729
  return null;
17708
17730
  }
17709
17731
  }
17732
+ /**
17733
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
17734
+ * Used to reconstruct full page markdown from chunk content.
17735
+ */
17736
+ async getChunksForPage(url, scope) {
17737
+ const chunks = [];
17738
+ let cursor = "0";
17739
+ try {
17740
+ for (; ; ) {
17741
+ const result = await this.chunksNs.range({
17742
+ cursor,
17743
+ limit: 100,
17744
+ includeMetadata: true
17745
+ });
17746
+ for (const doc of result.vectors) {
17747
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
17748
+ chunks.push({
17749
+ chunkText: doc.metadata.chunkText ?? "",
17750
+ ordinal: doc.metadata.ordinal ?? 0,
17751
+ sectionTitle: doc.metadata.sectionTitle ?? "",
17752
+ headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
17753
+ });
17754
+ }
17755
+ }
17756
+ if (!result.nextCursor || result.nextCursor === "0") break;
17757
+ cursor = result.nextCursor;
17758
+ }
17759
+ } catch {
17760
+ }
17761
+ return chunks.sort((a, b) => a.ordinal - b.ordinal);
17762
+ }
17710
17763
  async fetchPageWithVector(url, scope) {
17711
17764
  try {
17712
17765
  const results = await this.pagesNs.fetch([url], {
@@ -22138,7 +22191,6 @@ var IndexPipeline = class _IndexPipeline {
22138
22191
  keywords: r.keywords ?? [],
22139
22192
  summary: r.summary ?? "",
22140
22193
  tags: r.tags,
22141
- markdown: r.markdown,
22142
22194
  routeFile: r.routeFile,
22143
22195
  routeResolution: r.routeResolution,
22144
22196
  incomingLinks: r.incomingLinks,
@@ -22165,7 +22217,6 @@ var IndexPipeline = class _IndexPipeline {
22165
22217
  keywords: r.keywords ?? [],
22166
22218
  summary: r.summary ?? "",
22167
22219
  tags: r.tags,
22168
- markdown: r.markdown,
22169
22220
  routeFile: r.routeFile,
22170
22221
  routeResolution: r.routeResolution,
22171
22222
  incomingLinks: r.incomingLinks,
@@ -22249,6 +22300,7 @@ var IndexPipeline = class _IndexPipeline {
22249
22300
  let documentsUpserted = 0;
22250
22301
  if (!options.dryRun && changedChunks.length > 0) {
22251
22302
  this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
22303
+ const CHUNK_TEXT_MAX_CHARS = 3e4;
22252
22304
  const docs = changedChunks.map((chunk) => {
22253
22305
  const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
22254
22306
  if (embeddingText.length > 2e3) {
@@ -22256,6 +22308,7 @@ var IndexPipeline = class _IndexPipeline {
22256
22308
  `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
22257
22309
  );
22258
22310
  }
22311
+ const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
22259
22312
  return {
22260
22313
  id: chunk.chunkKey,
22261
22314
  data: embeddingText,
@@ -22266,7 +22319,7 @@ var IndexPipeline = class _IndexPipeline {
22266
22319
  sectionTitle: chunk.sectionTitle ?? "",
22267
22320
  headingPath: chunk.headingPath.join(" > "),
22268
22321
  snippet: chunk.snippet,
22269
- chunkText: embeddingText,
22322
+ chunkText: cappedText,
22270
22323
  tags: chunk.tags,
22271
22324
  ordinal: chunk.ordinal,
22272
22325
  contentHash: chunk.contentHash,
package/dist/sveltekit.js CHANGED
@@ -17293,6 +17293,26 @@ function joinUrl(baseUrl, route) {
17293
17293
  const routePart = ensureLeadingSlash(route);
17294
17294
  return `${base}${routePart}`;
17295
17295
  }
17296
+ function reconstructMarkdownFromChunks(chunks, pageTitle) {
17297
+ if (chunks.length === 0) return "";
17298
+ const parts = [];
17299
+ for (const chunk of chunks) {
17300
+ let text = chunk.chunkText;
17301
+ const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
17302
+
17303
+ `;
17304
+ const prefixWithoutSection = `${pageTitle}
17305
+
17306
+ `;
17307
+ if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
17308
+ text = text.slice(prefixWithSection.length);
17309
+ } else if (text.startsWith(prefixWithoutSection)) {
17310
+ text = text.slice(prefixWithoutSection.length);
17311
+ }
17312
+ parts.push(text.trim());
17313
+ }
17314
+ return parts.join("\n\n");
17315
+ }
17296
17316
  var UpstashSearchStore = class {
17297
17317
  index;
17298
17318
  pagesNs;
@@ -17672,10 +17692,12 @@ var UpstashSearchStore = class {
17672
17692
  });
17673
17693
  const doc = results[0];
17674
17694
  if (!doc || !doc.metadata) return null;
17695
+ const chunks = await this.getChunksForPage(url, scope);
17696
+ const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
17675
17697
  return {
17676
17698
  url: doc.metadata.url,
17677
17699
  title: doc.metadata.title,
17678
- markdown: doc.metadata.markdown,
17700
+ markdown,
17679
17701
  projectId: doc.metadata.projectId,
17680
17702
  scopeName: doc.metadata.scopeName,
17681
17703
  routeFile: doc.metadata.routeFile,
@@ -17695,6 +17717,37 @@ var UpstashSearchStore = class {
17695
17717
  return null;
17696
17718
  }
17697
17719
  }
17720
+ /**
17721
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
17722
+ * Used to reconstruct full page markdown from chunk content.
17723
+ */
17724
+ async getChunksForPage(url, scope) {
17725
+ const chunks = [];
17726
+ let cursor = "0";
17727
+ try {
17728
+ for (; ; ) {
17729
+ const result = await this.chunksNs.range({
17730
+ cursor,
17731
+ limit: 100,
17732
+ includeMetadata: true
17733
+ });
17734
+ for (const doc of result.vectors) {
17735
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
17736
+ chunks.push({
17737
+ chunkText: doc.metadata.chunkText ?? "",
17738
+ ordinal: doc.metadata.ordinal ?? 0,
17739
+ sectionTitle: doc.metadata.sectionTitle ?? "",
17740
+ headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
17741
+ });
17742
+ }
17743
+ }
17744
+ if (!result.nextCursor || result.nextCursor === "0") break;
17745
+ cursor = result.nextCursor;
17746
+ }
17747
+ } catch {
17748
+ }
17749
+ return chunks.sort((a, b) => a.ordinal - b.ordinal);
17750
+ }
17698
17751
  async fetchPageWithVector(url, scope) {
17699
17752
  try {
17700
17753
  const results = await this.pagesNs.fetch([url], {
@@ -22126,7 +22179,6 @@ var IndexPipeline = class _IndexPipeline {
22126
22179
  keywords: r.keywords ?? [],
22127
22180
  summary: r.summary ?? "",
22128
22181
  tags: r.tags,
22129
- markdown: r.markdown,
22130
22182
  routeFile: r.routeFile,
22131
22183
  routeResolution: r.routeResolution,
22132
22184
  incomingLinks: r.incomingLinks,
@@ -22153,7 +22205,6 @@ var IndexPipeline = class _IndexPipeline {
22153
22205
  keywords: r.keywords ?? [],
22154
22206
  summary: r.summary ?? "",
22155
22207
  tags: r.tags,
22156
- markdown: r.markdown,
22157
22208
  routeFile: r.routeFile,
22158
22209
  routeResolution: r.routeResolution,
22159
22210
  incomingLinks: r.incomingLinks,
@@ -22237,6 +22288,7 @@ var IndexPipeline = class _IndexPipeline {
22237
22288
  let documentsUpserted = 0;
22238
22289
  if (!options.dryRun && changedChunks.length > 0) {
22239
22290
  this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
22291
+ const CHUNK_TEXT_MAX_CHARS = 3e4;
22240
22292
  const docs = changedChunks.map((chunk) => {
22241
22293
  const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
22242
22294
  if (embeddingText.length > 2e3) {
@@ -22244,6 +22296,7 @@ var IndexPipeline = class _IndexPipeline {
22244
22296
  `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
22245
22297
  );
22246
22298
  }
22299
+ const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
22247
22300
  return {
22248
22301
  id: chunk.chunkKey,
22249
22302
  data: embeddingText,
@@ -22254,7 +22307,7 @@ var IndexPipeline = class _IndexPipeline {
22254
22307
  sectionTitle: chunk.sectionTitle ?? "",
22255
22308
  headingPath: chunk.headingPath.join(" > "),
22256
22309
  snippet: chunk.snippet,
22257
- chunkText: embeddingText,
22310
+ chunkText: cappedText,
22258
22311
  tags: chunk.tags,
22259
22312
  ordinal: chunk.ordinal,
22260
22313
  contentHash: chunk.contentHash,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "searchsocket",
3
- "version": "0.6.2",
3
+ "version": "0.6.3",
4
4
  "description": "Semantic site search and MCP retrieval for SvelteKit static sites",
5
5
  "license": "MIT",
6
6
  "author": "Greg Priday <greg@siteorigin.com>",