searchsocket 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,9 +1,9 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/cli.ts
4
- import fs10 from "fs";
4
+ import fs8 from "fs";
5
5
  import fsp from "fs/promises";
6
- import path14 from "path";
6
+ import path12 from "path";
7
7
  import { execSync as execSync2 } from "child_process";
8
8
  import { config as dotenvConfig } from "dotenv";
9
9
  import chokidar from "chokidar";
@@ -12,7 +12,7 @@ import { Command } from "commander";
12
12
  // package.json
13
13
  var package_default = {
14
14
  name: "searchsocket",
15
- version: "0.4.0",
15
+ version: "0.5.0",
16
16
  description: "Semantic site search and MCP retrieval for SvelteKit static sites",
17
17
  license: "MIT",
18
18
  author: "Greg Priday <greg@siteorigin.com>",
@@ -58,6 +58,11 @@ var package_default = {
58
58
  types: "./dist/client.d.ts",
59
59
  import: "./dist/client.js",
60
60
  require: "./dist/client.cjs"
61
+ },
62
+ "./scroll": {
63
+ types: "./dist/scroll.d.ts",
64
+ import: "./dist/scroll.js",
65
+ require: "./dist/scroll.cjs"
61
66
  }
62
67
  },
63
68
  scripts: {
@@ -65,15 +70,16 @@ var package_default = {
65
70
  clean: "rm -rf dist",
66
71
  typecheck: "tsc --noEmit",
67
72
  test: "vitest run",
68
- "test:watch": "vitest"
73
+ "test:watch": "vitest",
74
+ "test:quality": "SEARCHSOCKET_QUALITY_TESTS=1 vitest run tests/quality.test.ts"
69
75
  },
70
76
  engines: {
71
77
  node: ">=20"
72
78
  },
73
79
  packageManager: "pnpm@10.29.2",
74
80
  dependencies: {
75
- "@libsql/client": "^0.17.0",
76
81
  "@modelcontextprotocol/sdk": "^1.26.0",
82
+ "@upstash/search": "^0.1.7",
77
83
  cheerio: "^1.2.0",
78
84
  chokidar: "^5.0.0",
79
85
  commander: "^14.0.3",
@@ -91,6 +97,7 @@ var package_default = {
91
97
  "@types/express": "^5.0.6",
92
98
  "@types/node": "^25.2.2",
93
99
  "@types/turndown": "^5.0.6",
100
+ jsdom: "^28.1.0",
94
101
  tsup: "^8.5.1",
95
102
  typescript: "^5.9.3",
96
103
  vitest: "^4.0.18"
@@ -164,29 +171,18 @@ var searchSocketConfigSchema = z.object({
164
171
  prependTitle: z.boolean().optional(),
165
172
  pageSummaryChunk: z.boolean().optional()
166
173
  }).optional(),
167
- embeddings: z.object({
168
- provider: z.literal("jina").optional(),
169
- model: z.string().min(1).optional(),
170
- apiKey: z.string().min(1).optional(),
171
- apiKeyEnv: z.string().min(1).optional(),
172
- batchSize: z.number().int().positive().optional(),
173
- concurrency: z.number().int().positive().optional(),
174
- pricePer1kTokens: z.number().positive().optional()
175
- }).optional(),
176
- vector: z.object({
177
- dimension: z.number().int().positive().optional(),
178
- turso: z.object({
179
- url: z.string().url().optional(),
180
- authToken: z.string().min(1).optional(),
181
- urlEnv: z.string().optional(),
182
- authTokenEnv: z.string().optional(),
183
- localPath: z.string().optional()
184
- }).optional()
174
+ upstash: z.object({
175
+ url: z.string().url().optional(),
176
+ token: z.string().min(1).optional(),
177
+ urlEnv: z.string().min(1).optional(),
178
+ tokenEnv: z.string().min(1).optional()
185
179
  }).optional(),
186
- rerank: z.object({
187
- enabled: z.boolean().optional(),
188
- topN: z.number().int().positive().optional(),
189
- model: z.string().optional()
180
+ search: z.object({
181
+ semanticWeight: z.number().min(0).max(1).optional(),
182
+ inputEnrichment: z.boolean().optional(),
183
+ reranking: z.boolean().optional(),
184
+ dualSearch: z.boolean().optional(),
185
+ pageSearchWeight: z.number().min(0).max(1).optional()
190
186
  }).optional(),
191
187
  ranking: z.object({
192
188
  enableIncomingLinkBoost: z.boolean().optional(),
@@ -196,11 +192,12 @@ var searchSocketConfigSchema = z.object({
196
192
  aggregationDecay: z.number().min(0).max(1).optional(),
197
193
  minChunkScoreRatio: z.number().min(0).max(1).optional(),
198
194
  minScore: z.number().min(0).max(1).optional(),
195
+ scoreGapThreshold: z.number().min(0).max(1).optional(),
199
196
  weights: z.object({
200
197
  incomingLinks: z.number().optional(),
201
198
  depth: z.number().optional(),
202
- rerank: z.number().optional(),
203
- aggregation: z.number().optional()
199
+ aggregation: z.number().optional(),
200
+ titleMatch: z.number().optional()
204
201
  }).optional()
205
202
  }).optional(),
206
203
  api: z.object({
@@ -222,8 +219,7 @@ var searchSocketConfigSchema = z.object({
222
219
  }).optional()
223
220
  }).optional(),
224
221
  state: z.object({
225
- dir: z.string().optional(),
226
- writeMirror: z.boolean().optional()
222
+ dir: z.string().optional()
227
223
  }).optional()
228
224
  });
229
225
 
@@ -277,24 +273,16 @@ function createDefaultConfig(projectId) {
277
273
  prependTitle: true,
278
274
  pageSummaryChunk: true
279
275
  },
280
- embeddings: {
281
- provider: "jina",
282
- model: "jina-embeddings-v5-text-small",
283
- apiKeyEnv: "JINA_API_KEY",
284
- batchSize: 64,
285
- concurrency: 4
276
+ upstash: {
277
+ urlEnv: "UPSTASH_SEARCH_REST_URL",
278
+ tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
286
279
  },
287
- vector: {
288
- turso: {
289
- urlEnv: "TURSO_DATABASE_URL",
290
- authTokenEnv: "TURSO_AUTH_TOKEN",
291
- localPath: ".searchsocket/vectors.db"
292
- }
293
- },
294
- rerank: {
295
- enabled: true,
296
- topN: 20,
297
- model: "jina-reranker-v3"
280
+ search: {
281
+ semanticWeight: 0.75,
282
+ inputEnrichment: true,
283
+ reranking: true,
284
+ dualSearch: true,
285
+ pageSearchWeight: 0.3
298
286
  },
299
287
  ranking: {
300
288
  enableIncomingLinkBoost: true,
@@ -303,12 +291,13 @@ function createDefaultConfig(projectId) {
303
291
  aggregationCap: 5,
304
292
  aggregationDecay: 0.5,
305
293
  minChunkScoreRatio: 0.5,
306
- minScore: 0,
294
+ minScore: 0.3,
295
+ scoreGapThreshold: 0.4,
307
296
  weights: {
308
297
  incomingLinks: 0.05,
309
298
  depth: 0.03,
310
- rerank: 1,
311
- aggregation: 0.1
299
+ aggregation: 0.1,
300
+ titleMatch: 0.15
312
301
  }
313
302
  },
314
303
  api: {
@@ -326,8 +315,7 @@ function createDefaultConfig(projectId) {
326
315
  }
327
316
  },
328
317
  state: {
329
- dir: ".searchsocket",
330
- writeMirror: false
318
+ dir: ".searchsocket"
331
319
  }
332
320
  };
333
321
  }
@@ -435,21 +423,13 @@ ${issues}`
435
423
  ...defaults.chunking,
436
424
  ...parsed.chunking
437
425
  },
438
- embeddings: {
439
- ...defaults.embeddings,
440
- ...parsed.embeddings
426
+ upstash: {
427
+ ...defaults.upstash,
428
+ ...parsed.upstash
441
429
  },
442
- vector: {
443
- ...defaults.vector,
444
- ...parsed.vector,
445
- turso: {
446
- ...defaults.vector.turso,
447
- ...parsed.vector?.turso
448
- }
449
- },
450
- rerank: {
451
- ...defaults.rerank,
452
- ...parsed.rerank
430
+ search: {
431
+ ...defaults.search,
432
+ ...parsed.search
453
433
  },
454
434
  ranking: {
455
435
  ...defaults.ranking,
@@ -541,7 +521,8 @@ function writeMinimalConfig(cwd) {
541
521
  return target;
542
522
  }
543
523
  const content = `export default {
544
- embeddings: { apiKeyEnv: "JINA_API_KEY" }
524
+ // Upstash Search credentials (set via env vars or directly here)
525
+ // upstash: { urlEnv: "UPSTASH_SEARCH_REST_URL", tokenEnv: "UPSTASH_SEARCH_REST_TOKEN" }
545
526
  };
546
527
  `;
547
528
  fs.writeFileSync(target, content, "utf8");
@@ -704,576 +685,246 @@ import fs2 from "fs";
704
685
  import path2 from "path";
705
686
  function ensureStateDirs(cwd, stateDir, scope) {
706
687
  const statePath = path2.resolve(cwd, stateDir);
707
- const pagesPath = path2.join(statePath, "pages", scope.scopeName);
708
- fs2.mkdirSync(pagesPath, { recursive: true });
709
- return { statePath, pagesPath };
710
- }
711
-
712
- // src/embeddings/jina.ts
713
- import pLimit from "p-limit";
714
- function sleep(ms) {
715
- return new Promise((resolve) => {
716
- setTimeout(resolve, ms);
717
- });
718
- }
719
- var JinaEmbeddingsProvider = class {
720
- apiKey;
721
- batchSize;
722
- concurrency;
723
- defaultTask;
724
- constructor(options) {
725
- if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
726
- throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
727
- }
728
- if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
729
- throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
730
- }
731
- this.apiKey = options.apiKey;
732
- this.batchSize = options.batchSize;
733
- this.concurrency = options.concurrency;
734
- this.defaultTask = options.task ?? "retrieval.passage";
735
- }
736
- estimateTokens(text) {
737
- const normalized = text.trim();
738
- if (!normalized) {
739
- return 0;
740
- }
741
- const wordCount = normalized.match(/[A-Za-z0-9_]+/g)?.length ?? 0;
742
- const punctuationCount = normalized.match(/[^\s\w]/g)?.length ?? 0;
743
- const cjkCount = normalized.match(/[\u3400-\u9fff]/g)?.length ?? 0;
744
- const charEstimate = Math.ceil(normalized.length / 4);
745
- const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
746
- return Math.max(1, Math.max(charEstimate, lexicalEstimate));
747
- }
748
- async embedTexts(texts, modelId, task) {
749
- if (texts.length === 0) {
750
- return [];
751
- }
752
- const batches = [];
753
- for (let i = 0; i < texts.length; i += this.batchSize) {
754
- batches.push({
755
- index: i,
756
- values: texts.slice(i, i + this.batchSize)
757
- });
758
- }
759
- const outputs = new Array(batches.length);
760
- const limit = pLimit(this.concurrency);
761
- await Promise.all(
762
- batches.map(
763
- (batch, position) => limit(async () => {
764
- outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
765
- })
766
- )
767
- );
768
- return outputs.flat();
769
- }
770
- async embedWithRetry(texts, modelId, task) {
771
- const maxAttempts = 5;
772
- let attempt = 0;
773
- while (attempt < maxAttempts) {
774
- attempt += 1;
775
- let response;
776
- try {
777
- response = await fetch("https://api.jina.ai/v1/embeddings", {
778
- method: "POST",
779
- headers: {
780
- "content-type": "application/json",
781
- authorization: `Bearer ${this.apiKey}`
782
- },
783
- body: JSON.stringify({
784
- model: modelId,
785
- input: texts,
786
- task
787
- })
788
- });
789
- } catch (error) {
790
- if (attempt >= maxAttempts) {
791
- throw error;
792
- }
793
- await sleep(Math.min(2 ** attempt * 300, 5e3));
794
- continue;
795
- }
796
- if (!response.ok) {
797
- const retryable = response.status === 429 || response.status >= 500;
798
- if (!retryable || attempt >= maxAttempts) {
799
- const errorBody = await response.text();
800
- throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
801
- }
802
- await sleep(Math.min(2 ** attempt * 300, 5e3));
803
- continue;
804
- }
805
- const payload = await response.json();
806
- if (!payload.data || !Array.isArray(payload.data)) {
807
- throw new Error("Invalid Jina embeddings response format");
808
- }
809
- return payload.data.map((entry) => entry.embedding);
810
- }
811
- throw new Error("Unreachable retry state");
812
- }
813
- };
814
-
815
- // src/embeddings/factory.ts
816
- function createEmbeddingsProvider(config) {
817
- if (config.embeddings.provider !== "jina") {
818
- throw new SearchSocketError(
819
- "CONFIG_MISSING",
820
- `Unsupported embeddings provider ${config.embeddings.provider}`
821
- );
822
- }
823
- const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
824
- if (!apiKey) {
825
- throw new SearchSocketError(
826
- "CONFIG_MISSING",
827
- `Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
828
- );
829
- }
830
- return new JinaEmbeddingsProvider({
831
- apiKey,
832
- batchSize: config.embeddings.batchSize,
833
- concurrency: config.embeddings.concurrency
834
- });
688
+ fs2.mkdirSync(statePath, { recursive: true });
689
+ return { statePath };
835
690
  }
836
691
 
837
692
  // src/indexing/pipeline.ts
838
- import path12 from "path";
839
-
840
- // src/vector/factory.ts
841
- import fs3 from "fs";
842
- import path3 from "path";
693
+ import path10 from "path";
843
694
 
844
- // src/core/serverless.ts
845
- function isServerless() {
846
- return !!(process.env.VERCEL || process.env.NETLIFY || process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.FUNCTIONS_WORKER || process.env.CF_PAGES);
695
+ // src/vector/upstash.ts
696
+ function chunkIndexName(scope) {
697
+ return `${scope.projectId}--${scope.scopeName}`;
847
698
  }
848
-
849
- // src/vector/turso.ts
850
- var TursoVectorStore = class {
699
+ function pageIndexName(scope) {
700
+ return `${scope.projectId}--${scope.scopeName}--pages`;
701
+ }
702
+ var UpstashSearchStore = class {
851
703
  client;
852
- dimension;
853
- chunksReady = false;
854
- registryReady = false;
855
- pagesReady = false;
856
704
  constructor(opts) {
857
705
  this.client = opts.client;
858
- this.dimension = opts.dimension;
859
- }
860
- async ensureRegistry() {
861
- if (this.registryReady) return;
862
- await this.client.execute(`
863
- CREATE TABLE IF NOT EXISTS registry (
864
- scope_key TEXT PRIMARY KEY,
865
- project_id TEXT NOT NULL,
866
- scope_name TEXT NOT NULL,
867
- model_id TEXT NOT NULL,
868
- last_indexed_at TEXT NOT NULL,
869
- vector_count INTEGER,
870
- last_estimate_tokens INTEGER,
871
- last_estimate_cost_usd REAL,
872
- last_estimate_changed_chunks INTEGER
873
- )
874
- `);
875
- const estimateCols = [
876
- { name: "last_estimate_tokens", def: "INTEGER" },
877
- { name: "last_estimate_cost_usd", def: "REAL" },
878
- { name: "last_estimate_changed_chunks", def: "INTEGER" }
879
- ];
880
- for (const col of estimateCols) {
881
- try {
882
- await this.client.execute(`ALTER TABLE registry ADD COLUMN ${col.name} ${col.def}`);
883
- } catch (error) {
884
- if (error instanceof Error && !error.message.includes("duplicate column")) {
885
- throw error;
886
- }
887
- }
888
- }
889
- this.registryReady = true;
890
- }
891
- async ensureChunks(dim) {
892
- if (this.chunksReady) return;
893
- const exists = await this.chunksTableExists();
894
- if (exists) {
895
- const currentDim = await this.getChunksDimension();
896
- if (currentDim !== null && currentDim !== dim) {
897
- await this.client.batch([
898
- "DROP INDEX IF EXISTS idx",
899
- "DROP TABLE IF EXISTS chunks"
900
- ]);
901
- }
902
- }
903
- await this.client.batch([
904
- `CREATE TABLE IF NOT EXISTS chunks (
905
- id TEXT PRIMARY KEY,
906
- project_id TEXT NOT NULL,
907
- scope_name TEXT NOT NULL,
908
- url TEXT NOT NULL,
909
- path TEXT NOT NULL,
910
- title TEXT NOT NULL,
911
- section_title TEXT NOT NULL DEFAULT '',
912
- heading_path TEXT NOT NULL DEFAULT '[]',
913
- snippet TEXT NOT NULL DEFAULT '',
914
- chunk_text TEXT NOT NULL DEFAULT '',
915
- ordinal INTEGER NOT NULL DEFAULT 0,
916
- content_hash TEXT NOT NULL DEFAULT '',
917
- model_id TEXT NOT NULL DEFAULT '',
918
- depth INTEGER NOT NULL DEFAULT 0,
919
- incoming_links INTEGER NOT NULL DEFAULT 0,
920
- route_file TEXT NOT NULL DEFAULT '',
921
- tags TEXT NOT NULL DEFAULT '[]',
922
- description TEXT NOT NULL DEFAULT '',
923
- keywords TEXT NOT NULL DEFAULT '[]',
924
- embedding F32_BLOB(${dim})
925
- )`,
926
- `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
927
- ]);
928
- this.chunksReady = true;
929
- }
930
- async ensurePages() {
931
- if (this.pagesReady) return;
932
- await this.client.execute(`
933
- CREATE TABLE IF NOT EXISTS pages (
934
- project_id TEXT NOT NULL,
935
- scope_name TEXT NOT NULL,
936
- url TEXT NOT NULL,
937
- title TEXT NOT NULL,
938
- markdown TEXT NOT NULL,
939
- route_file TEXT NOT NULL DEFAULT '',
940
- route_resolution TEXT NOT NULL DEFAULT 'exact',
941
- incoming_links INTEGER NOT NULL DEFAULT 0,
942
- outgoing_links INTEGER NOT NULL DEFAULT 0,
943
- depth INTEGER NOT NULL DEFAULT 0,
944
- tags TEXT NOT NULL DEFAULT '[]',
945
- indexed_at TEXT NOT NULL,
946
- PRIMARY KEY (project_id, scope_name, url)
947
- )
948
- `);
949
- this.pagesReady = true;
950
706
  }
951
- async chunksTableExists() {
952
- try {
953
- await this.client.execute("SELECT 1 FROM chunks LIMIT 0");
954
- return true;
955
- } catch (error) {
956
- if (error instanceof Error && error.message.includes("no such table")) {
957
- return false;
958
- }
959
- throw error;
960
- }
707
+ chunkIndex(scope) {
708
+ return this.client.index(chunkIndexName(scope));
961
709
  }
962
- /**
963
- * Read the current F32_BLOB dimension from the chunks table schema.
964
- * Returns null if the table doesn't exist or the dimension can't be parsed.
965
- */
966
- async getChunksDimension() {
967
- try {
968
- const rs = await this.client.execute(
969
- "SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
970
- );
971
- if (rs.rows.length === 0) return null;
972
- const sql = rs.rows[0].sql;
973
- const match = sql.match(/F32_BLOB\((\d+)\)/i);
974
- return match ? parseInt(match[1], 10) : null;
975
- } catch {
976
- return null;
977
- }
710
+ pageIndex(scope) {
711
+ return this.client.index(pageIndexName(scope));
978
712
  }
979
- /**
980
- * Drop all SearchSocket tables (chunks, registry, pages) and their indexes.
981
- * Used by `clean --remote` for a full reset.
982
- */
983
- async dropAllTables() {
984
- await this.client.batch([
985
- "DROP INDEX IF EXISTS idx",
986
- "DROP TABLE IF EXISTS chunks",
987
- "DROP TABLE IF EXISTS registry",
988
- "DROP TABLE IF EXISTS pages"
989
- ]);
990
- this.chunksReady = false;
991
- this.registryReady = false;
992
- this.pagesReady = false;
993
- }
994
- async upsert(records, _scope) {
995
- if (records.length === 0) return;
996
- const dim = this.dimension ?? records[0].vector.length;
997
- await this.ensureChunks(dim);
713
+ async upsertChunks(chunks, scope) {
714
+ if (chunks.length === 0) return;
715
+ const index = this.chunkIndex(scope);
998
716
  const BATCH_SIZE = 100;
999
- for (let i = 0; i < records.length; i += BATCH_SIZE) {
1000
- const batch = records.slice(i, i + BATCH_SIZE);
1001
- const stmts = batch.map((r) => ({
1002
- sql: `INSERT OR REPLACE INTO chunks
1003
- (id, project_id, scope_name, url, path, title, section_title,
1004
- heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
1005
- incoming_links, route_file, tags, description, keywords, embedding)
1006
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
1007
- args: [
1008
- r.id,
1009
- r.metadata.projectId,
1010
- r.metadata.scopeName,
1011
- r.metadata.url,
1012
- r.metadata.path,
1013
- r.metadata.title,
1014
- r.metadata.sectionTitle,
1015
- JSON.stringify(r.metadata.headingPath),
1016
- r.metadata.snippet,
1017
- r.metadata.chunkText,
1018
- r.metadata.ordinal,
1019
- r.metadata.contentHash,
1020
- r.metadata.modelId,
1021
- r.metadata.depth,
1022
- r.metadata.incomingLinks,
1023
- r.metadata.routeFile,
1024
- JSON.stringify(r.metadata.tags),
1025
- r.metadata.description ?? "",
1026
- JSON.stringify(r.metadata.keywords ?? []),
1027
- JSON.stringify(r.vector)
1028
- ]
1029
- }));
1030
- await this.client.batch(stmts);
1031
- }
1032
- }
1033
- async query(queryVector, opts, scope) {
1034
- const dim = this.dimension ?? queryVector.length;
1035
- await this.ensureChunks(dim);
1036
- const queryJson = JSON.stringify(queryVector);
1037
- const rs = await this.client.execute({
1038
- sql: `SELECT c.id, c.project_id, c.scope_name, c.url, c.path, c.title,
1039
- c.section_title, c.heading_path, c.snippet, c.chunk_text,
1040
- c.ordinal, c.content_hash,
1041
- c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
1042
- c.description, c.keywords,
1043
- vector_distance_cos(c.embedding, vector(?)) AS distance
1044
- FROM vector_top_k('idx', vector(?), ?) AS v
1045
- JOIN chunks AS c ON c.rowid = v.id`,
1046
- args: [queryJson, queryJson, opts.topK]
717
+ for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
718
+ const batch = chunks.slice(i, i + BATCH_SIZE);
719
+ await index.upsert(batch);
720
+ }
721
+ }
722
+ async search(query, opts, scope) {
723
+ const index = this.chunkIndex(scope);
724
+ const results = await index.search({
725
+ query,
726
+ limit: opts.limit,
727
+ semanticWeight: opts.semanticWeight,
728
+ inputEnrichment: opts.inputEnrichment,
729
+ reranking: opts.reranking,
730
+ filter: opts.filter
1047
731
  });
1048
- let hits = [];
1049
- for (const row of rs.rows) {
1050
- const projectId = row.project_id;
1051
- const scopeName = row.scope_name;
1052
- if (projectId !== scope.projectId || scopeName !== scope.scopeName) {
1053
- continue;
1054
- }
1055
- const rowPath = row.path;
1056
- if (opts.pathPrefix) {
1057
- const rawPrefix = opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}`;
1058
- const prefix = rawPrefix.endsWith("/") ? rawPrefix : `${rawPrefix}/`;
1059
- const normalizedPath = rowPath.replace(/\/$/, "");
1060
- const normalizedPrefix = rawPrefix.replace(/\/$/, "");
1061
- if (normalizedPath !== normalizedPrefix && !rowPath.startsWith(prefix)) {
1062
- continue;
1063
- }
1064
- }
1065
- const tags = JSON.parse(row.tags || "[]");
1066
- if (opts.tags && opts.tags.length > 0) {
1067
- if (!opts.tags.every((t) => tags.includes(t))) {
1068
- continue;
1069
- }
732
+ return results.map((doc) => ({
733
+ id: doc.id,
734
+ score: doc.score,
735
+ metadata: {
736
+ projectId: doc.metadata?.projectId ?? "",
737
+ scopeName: doc.metadata?.scopeName ?? "",
738
+ url: doc.content.url,
739
+ path: doc.metadata?.path ?? "",
740
+ title: doc.content.title,
741
+ sectionTitle: doc.content.sectionTitle,
742
+ headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
743
+ snippet: doc.metadata?.snippet ?? "",
744
+ chunkText: doc.content.text,
745
+ ordinal: doc.metadata?.ordinal ?? 0,
746
+ contentHash: doc.metadata?.contentHash ?? "",
747
+ depth: doc.metadata?.depth ?? 0,
748
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
749
+ routeFile: doc.metadata?.routeFile ?? "",
750
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
751
+ description: doc.metadata?.description || void 0,
752
+ keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
1070
753
  }
1071
- const distance = row.distance;
1072
- const score = 1 - distance;
1073
- const description = row.description || void 0;
1074
- const keywords = (() => {
1075
- const raw = row.keywords || "[]";
1076
- const parsed = JSON.parse(raw);
1077
- return parsed.length > 0 ? parsed : void 0;
1078
- })();
1079
- hits.push({
1080
- id: row.id,
1081
- score,
1082
- metadata: {
1083
- projectId,
1084
- scopeName,
1085
- url: row.url,
1086
- path: rowPath,
1087
- title: row.title,
1088
- sectionTitle: row.section_title,
1089
- headingPath: JSON.parse(row.heading_path || "[]"),
1090
- snippet: row.snippet,
1091
- chunkText: row.chunk_text || "",
1092
- ordinal: row.ordinal || 0,
1093
- contentHash: row.content_hash,
1094
- modelId: row.model_id,
1095
- depth: row.depth,
1096
- incomingLinks: row.incoming_links,
1097
- routeFile: row.route_file,
1098
- tags,
1099
- description,
1100
- keywords
1101
- }
754
+ }));
755
+ }
756
+ async searchPages(query, opts, scope) {
757
+ const index = this.pageIndex(scope);
758
+ let results;
759
+ try {
760
+ results = await index.search({
761
+ query,
762
+ limit: opts.limit,
763
+ semanticWeight: opts.semanticWeight,
764
+ inputEnrichment: opts.inputEnrichment,
765
+ reranking: true,
766
+ filter: opts.filter
1102
767
  });
768
+ } catch {
769
+ return [];
1103
770
  }
1104
- hits.sort((a, b) => b.score - a.score);
1105
- return hits;
771
+ return results.map((doc) => ({
772
+ id: doc.id,
773
+ score: doc.score,
774
+ title: doc.content.title,
775
+ url: doc.content.url,
776
+ description: doc.content.description ?? "",
777
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
778
+ depth: doc.metadata?.depth ?? 0,
779
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
780
+ routeFile: doc.metadata?.routeFile ?? ""
781
+ }));
1106
782
  }
1107
783
  async deleteByIds(ids, scope) {
1108
784
  if (ids.length === 0) return;
785
+ const index = this.chunkIndex(scope);
1109
786
  const BATCH_SIZE = 500;
1110
787
  for (let i = 0; i < ids.length; i += BATCH_SIZE) {
1111
788
  const batch = ids.slice(i, i + BATCH_SIZE);
1112
- const placeholders = batch.map(() => "?").join(", ");
1113
- await this.client.execute({
1114
- sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ? AND id IN (${placeholders})`,
1115
- args: [scope.projectId, scope.scopeName, ...batch]
1116
- });
789
+ await index.delete(batch);
1117
790
  }
1118
791
  }
1119
792
  async deleteScope(scope) {
1120
- await this.ensureRegistry();
1121
793
  try {
1122
- await this.client.execute({
1123
- sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ?`,
1124
- args: [scope.projectId, scope.scopeName]
1125
- });
1126
- } catch (error) {
1127
- if (error instanceof Error && !error.message.includes("no such table")) {
1128
- throw error;
1129
- }
794
+ const chunkIdx = this.chunkIndex(scope);
795
+ await chunkIdx.deleteIndex();
796
+ } catch {
1130
797
  }
1131
798
  try {
1132
- await this.client.execute({
1133
- sql: `DELETE FROM pages WHERE project_id = ? AND scope_name = ?`,
1134
- args: [scope.projectId, scope.scopeName]
1135
- });
1136
- } catch (error) {
1137
- if (error instanceof Error && !error.message.includes("no such table")) {
1138
- throw error;
1139
- }
799
+ const pageIdx = this.pageIndex(scope);
800
+ await pageIdx.deleteIndex();
801
+ } catch {
1140
802
  }
1141
- await this.client.execute({
1142
- sql: `DELETE FROM registry WHERE project_id = ? AND scope_name = ?`,
1143
- args: [scope.projectId, scope.scopeName]
1144
- });
1145
803
  }
1146
- async listScopes(scopeProjectId) {
1147
- await this.ensureRegistry();
1148
- const rs = await this.client.execute({
1149
- sql: `SELECT project_id, scope_name, model_id, last_indexed_at, vector_count,
1150
- last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks
1151
- FROM registry WHERE project_id = ?`,
1152
- args: [scopeProjectId]
1153
- });
1154
- return rs.rows.map((row) => ({
1155
- projectId: row.project_id,
1156
- scopeName: row.scope_name,
1157
- modelId: row.model_id,
1158
- lastIndexedAt: row.last_indexed_at,
1159
- vectorCount: row.vector_count,
1160
- lastEstimateTokens: row.last_estimate_tokens,
1161
- lastEstimateCostUSD: row.last_estimate_cost_usd,
1162
- lastEstimateChangedChunks: row.last_estimate_changed_chunks
1163
- }));
1164
- }
1165
- async recordScope(info) {
1166
- await this.ensureRegistry();
1167
- const key = `${info.projectId}:${info.scopeName}`;
1168
- await this.client.execute({
1169
- sql: `INSERT OR REPLACE INTO registry
1170
- (scope_key, project_id, scope_name, model_id, last_indexed_at, vector_count,
1171
- last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks)
1172
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1173
- args: [
1174
- key,
1175
- info.projectId,
1176
- info.scopeName,
1177
- info.modelId,
1178
- info.lastIndexedAt,
1179
- info.vectorCount ?? null,
1180
- info.lastEstimateTokens ?? null,
1181
- info.lastEstimateCostUSD ?? null,
1182
- info.lastEstimateChangedChunks ?? null
1183
- ]
1184
- });
804
+ async listScopes(projectId) {
805
+ const allIndexes = await this.client.listIndexes();
806
+ const prefix = `${projectId}--`;
807
+ const scopeNames = /* @__PURE__ */ new Set();
808
+ for (const name of allIndexes) {
809
+ if (name.startsWith(prefix) && !name.endsWith("--pages")) {
810
+ const scopeName = name.slice(prefix.length);
811
+ scopeNames.add(scopeName);
812
+ }
813
+ }
814
+ const scopes = [];
815
+ for (const scopeName of scopeNames) {
816
+ const scope = {
817
+ projectId,
818
+ scopeName,
819
+ scopeId: `${projectId}:${scopeName}`
820
+ };
821
+ try {
822
+ const info = await this.chunkIndex(scope).info();
823
+ scopes.push({
824
+ projectId,
825
+ scopeName,
826
+ lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
827
+ documentCount: info.documentCount
828
+ });
829
+ } catch {
830
+ scopes.push({
831
+ projectId,
832
+ scopeName,
833
+ lastIndexedAt: "unknown",
834
+ documentCount: 0
835
+ });
836
+ }
837
+ }
838
+ return scopes;
1185
839
  }
1186
840
  async getContentHashes(scope) {
1187
- const exists = await this.chunksTableExists();
1188
- if (!exists) return /* @__PURE__ */ new Map();
1189
- const rs = await this.client.execute({
1190
- sql: `SELECT id, content_hash FROM chunks WHERE project_id = ? AND scope_name = ?`,
1191
- args: [scope.projectId, scope.scopeName]
1192
- });
1193
841
  const map = /* @__PURE__ */ new Map();
1194
- for (const row of rs.rows) {
1195
- map.set(row.id, row.content_hash);
842
+ const index = this.chunkIndex(scope);
843
+ let cursor = "0";
844
+ try {
845
+ for (; ; ) {
846
+ const result = await index.range({ cursor, limit: 100 });
847
+ for (const doc of result.documents) {
848
+ if (doc.metadata?.contentHash) {
849
+ map.set(doc.id, doc.metadata.contentHash);
850
+ }
851
+ }
852
+ if (!result.nextCursor || result.nextCursor === "0") break;
853
+ cursor = result.nextCursor;
854
+ }
855
+ } catch {
1196
856
  }
1197
857
  return map;
1198
858
  }
1199
859
  async upsertPages(pages, scope) {
1200
860
  if (pages.length === 0) return;
1201
- await this.ensurePages();
1202
- for (const page of pages) {
1203
- if (page.projectId !== scope.projectId || page.scopeName !== scope.scopeName) {
1204
- throw new Error(
1205
- `Page scope mismatch: page has ${page.projectId}:${page.scopeName} but scope is ${scope.projectId}:${scope.scopeName}`
1206
- );
1207
- }
1208
- }
1209
- const BATCH_SIZE = 100;
861
+ const index = this.pageIndex(scope);
862
+ const BATCH_SIZE = 50;
1210
863
  for (let i = 0; i < pages.length; i += BATCH_SIZE) {
1211
864
  const batch = pages.slice(i, i + BATCH_SIZE);
1212
- const stmts = batch.map((p) => ({
1213
- sql: `INSERT OR REPLACE INTO pages
1214
- (project_id, scope_name, url, title, markdown, route_file,
1215
- route_resolution, incoming_links, outgoing_links, depth, tags, indexed_at)
1216
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1217
- args: [
1218
- p.projectId,
1219
- p.scopeName,
1220
- p.url,
1221
- p.title,
1222
- p.markdown,
1223
- p.routeFile,
1224
- p.routeResolution,
1225
- p.incomingLinks,
1226
- p.outgoingLinks,
1227
- p.depth,
1228
- JSON.stringify(p.tags),
1229
- p.indexedAt
1230
- ]
865
+ const docs = batch.map((p) => ({
866
+ id: p.url,
867
+ content: {
868
+ title: p.title,
869
+ url: p.url,
870
+ type: "page",
871
+ description: p.description ?? "",
872
+ keywords: (p.keywords ?? []).join(","),
873
+ summary: p.summary ?? "",
874
+ tags: p.tags.join(",")
875
+ },
876
+ metadata: {
877
+ markdown: p.markdown,
878
+ projectId: p.projectId,
879
+ scopeName: p.scopeName,
880
+ routeFile: p.routeFile,
881
+ routeResolution: p.routeResolution,
882
+ incomingLinks: p.incomingLinks,
883
+ outgoingLinks: p.outgoingLinks,
884
+ depth: p.depth,
885
+ indexedAt: p.indexedAt
886
+ }
1231
887
  }));
1232
- await this.client.batch(stmts);
888
+ await index.upsert(docs);
1233
889
  }
1234
890
  }
1235
891
  async getPage(url, scope) {
1236
- await this.ensurePages();
1237
- const rs = await this.client.execute({
1238
- sql: `SELECT * FROM pages WHERE project_id = ? AND scope_name = ? AND url = ?`,
1239
- args: [scope.projectId, scope.scopeName, url]
1240
- });
1241
- if (rs.rows.length === 0) return null;
1242
- const row = rs.rows[0];
1243
- return {
1244
- url: row.url,
1245
- title: row.title,
1246
- markdown: row.markdown,
1247
- projectId: row.project_id,
1248
- scopeName: row.scope_name,
1249
- routeFile: row.route_file,
1250
- routeResolution: row.route_resolution,
1251
- incomingLinks: row.incoming_links,
1252
- outgoingLinks: row.outgoing_links,
1253
- depth: row.depth,
1254
- tags: JSON.parse(row.tags || "[]"),
1255
- indexedAt: row.indexed_at
1256
- };
892
+ const index = this.pageIndex(scope);
893
+ try {
894
+ const results = await index.fetch([url]);
895
+ const doc = results[0];
896
+ if (!doc) return null;
897
+ return {
898
+ url: doc.content.url,
899
+ title: doc.content.title,
900
+ markdown: doc.metadata.markdown,
901
+ projectId: doc.metadata.projectId,
902
+ scopeName: doc.metadata.scopeName,
903
+ routeFile: doc.metadata.routeFile,
904
+ routeResolution: doc.metadata.routeResolution,
905
+ incomingLinks: doc.metadata.incomingLinks,
906
+ outgoingLinks: doc.metadata.outgoingLinks,
907
+ depth: doc.metadata.depth,
908
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
909
+ indexedAt: doc.metadata.indexedAt,
910
+ summary: doc.content.summary || void 0,
911
+ description: doc.content.description || void 0,
912
+ keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
913
+ };
914
+ } catch {
915
+ return null;
916
+ }
1257
917
  }
1258
918
  async deletePages(scope) {
1259
- await this.ensurePages();
1260
- await this.client.execute({
1261
- sql: `DELETE FROM pages WHERE project_id = ? AND scope_name = ?`,
1262
- args: [scope.projectId, scope.scopeName]
1263
- });
1264
- }
1265
- async getScopeModelId(scope) {
1266
- await this.ensureRegistry();
1267
- const rs = await this.client.execute({
1268
- sql: `SELECT model_id FROM registry WHERE project_id = ? AND scope_name = ?`,
1269
- args: [scope.projectId, scope.scopeName]
1270
- });
1271
- if (rs.rows.length === 0) return null;
1272
- return rs.rows[0].model_id;
919
+ try {
920
+ const index = this.pageIndex(scope);
921
+ await index.reset();
922
+ } catch {
923
+ }
1273
924
  }
1274
925
  async health() {
1275
926
  try {
1276
- await this.client.execute("SELECT 1");
927
+ await this.client.info();
1277
928
  return { ok: true };
1278
929
  } catch (error) {
1279
930
  return {
@@ -1282,40 +933,34 @@ var TursoVectorStore = class {
1282
933
  };
1283
934
  }
1284
935
  }
936
+ async dropAllIndexes(projectId) {
937
+ const allIndexes = await this.client.listIndexes();
938
+ const prefix = `${projectId}--`;
939
+ for (const name of allIndexes) {
940
+ if (name.startsWith(prefix)) {
941
+ try {
942
+ const index = this.client.index(name);
943
+ await index.deleteIndex();
944
+ } catch {
945
+ }
946
+ }
947
+ }
948
+ }
1285
949
  };
1286
950
 
1287
951
  // src/vector/factory.ts
1288
- async function createVectorStore(config, cwd) {
1289
- const turso = config.vector.turso;
1290
- const remoteUrl = turso.url ?? process.env[turso.urlEnv];
1291
- if (remoteUrl) {
1292
- const { createClient: createClient2 } = await import("@libsql/client/http");
1293
- const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
1294
- const client2 = createClient2({
1295
- url: remoteUrl,
1296
- authToken
1297
- });
1298
- return new TursoVectorStore({
1299
- client: client2,
1300
- dimension: config.vector.dimension
1301
- });
1302
- }
1303
- if (isServerless()) {
952
+ async function createUpstashStore(config) {
953
+ const url = config.upstash.url ?? process.env[config.upstash.urlEnv];
954
+ const token = config.upstash.token ?? process.env[config.upstash.tokenEnv];
955
+ if (!url || !token) {
1304
956
  throw new SearchSocketError(
1305
957
  "VECTOR_BACKEND_UNAVAILABLE",
1306
- `No remote vector database URL found (checked vector.turso.url and env var "${turso.urlEnv}"). Local SQLite storage is not available in serverless environments. Set ${turso.urlEnv} or pass vector.turso.url directly.`
958
+ `Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
1307
959
  );
1308
960
  }
1309
- const { createClient } = await import("@libsql/client");
1310
- const localPath = path3.resolve(cwd, turso.localPath);
1311
- fs3.mkdirSync(path3.dirname(localPath), { recursive: true });
1312
- const client = createClient({
1313
- url: `file:${localPath}`
1314
- });
1315
- return new TursoVectorStore({
1316
- client,
1317
- dimension: config.vector.dimension
1318
- });
961
+ const { Search } = await import("@upstash/search");
962
+ const client = new Search({ url, token });
963
+ return new UpstashSearchStore({ client });
1319
964
  }
1320
965
 
1321
966
  // src/utils/hash.ts
@@ -1328,7 +973,7 @@ function sha256(input) {
1328
973
  }
1329
974
 
1330
975
  // src/utils/path.ts
1331
- import path4 from "path";
976
+ import path3 from "path";
1332
977
  function normalizeUrlPath(rawPath) {
1333
978
  let out = rawPath.trim();
1334
979
  if (!out.startsWith("/")) {
@@ -1340,15 +985,8 @@ function normalizeUrlPath(rawPath) {
1340
985
  }
1341
986
  return out;
1342
987
  }
1343
- function urlPathToMirrorRelative(urlPath) {
1344
- const normalized = normalizeUrlPath(urlPath);
1345
- if (normalized === "/") {
1346
- return "index.md";
1347
- }
1348
- return `${normalized.slice(1)}.md`;
1349
- }
1350
988
  function staticHtmlFileToUrl(filePath, rootDir) {
1351
- const relative = path4.relative(rootDir, filePath).replace(/\\/g, "/");
989
+ const relative = path3.relative(rootDir, filePath).replace(/\\/g, "/");
1352
990
  if (relative === "index.html") {
1353
991
  return "/";
1354
992
  }
@@ -1621,7 +1259,7 @@ function buildEmbeddingText(chunk, prependTitle) {
1621
1259
 
1622
1260
  ${chunk.chunkText}`;
1623
1261
  }
1624
- function chunkMirrorPage(page, config, scope) {
1262
+ function chunkPage(page, config, scope) {
1625
1263
  const sections = parseHeadingSections(page.markdown, config.chunking.headingPathDepth);
1626
1264
  const rawChunks = sections.flatMap((section) => splitSection(section, config.chunking));
1627
1265
  const chunks = [];
@@ -1831,59 +1469,8 @@ function extractFromMarkdown(url, markdown, title) {
1831
1469
  };
1832
1470
  }
1833
1471
 
1834
- // src/indexing/mirror.ts
1835
- import fs4 from "fs/promises";
1836
- import path5 from "path";
1837
- function yamlString(value) {
1838
- return JSON.stringify(value);
1839
- }
1840
- function yamlArray(values) {
1841
- return `[${values.map((v) => JSON.stringify(v)).join(", ")}]`;
1842
- }
1843
- function buildMirrorMarkdown(page) {
1844
- const frontmatterLines = [
1845
- "---",
1846
- `url: ${yamlString(page.url)}`,
1847
- `title: ${yamlString(page.title)}`,
1848
- `scope: ${yamlString(page.scope)}`,
1849
- `routeFile: ${yamlString(page.routeFile)}`,
1850
- `routeResolution: ${yamlString(page.routeResolution)}`,
1851
- `generatedAt: ${yamlString(page.generatedAt)}`,
1852
- `incomingLinks: ${page.incomingLinks}`,
1853
- `outgoingLinks: ${page.outgoingLinks}`,
1854
- `depth: ${page.depth}`,
1855
- `tags: ${yamlArray(page.tags)}`,
1856
- "---",
1857
- ""
1858
- ];
1859
- return `${frontmatterLines.join("\n")}${normalizeMarkdown(page.markdown)}`;
1860
- }
1861
- function stripGeneratedAt(content) {
1862
- return content.replace(/^generatedAt: .*$/m, "");
1863
- }
1864
- async function writeMirrorPage(statePath, scope, page) {
1865
- const relative = urlPathToMirrorRelative(page.url);
1866
- const outputPath = path5.join(statePath, "pages", scope.scopeName, relative);
1867
- await fs4.mkdir(path5.dirname(outputPath), { recursive: true });
1868
- const newContent = buildMirrorMarkdown(page);
1869
- try {
1870
- const existing = await fs4.readFile(outputPath, "utf8");
1871
- if (stripGeneratedAt(existing) === stripGeneratedAt(newContent)) {
1872
- return outputPath;
1873
- }
1874
- } catch {
1875
- }
1876
- await fs4.writeFile(outputPath, newContent, "utf8");
1877
- return outputPath;
1878
- }
1879
- async function cleanMirrorForScope(statePath, scope) {
1880
- const target = path5.join(statePath, "pages", scope.scopeName);
1881
- await fs4.rm(target, { recursive: true, force: true });
1882
- await fs4.mkdir(target, { recursive: true });
1883
- }
1884
-
1885
1472
  // src/indexing/route-mapper.ts
1886
- import path6 from "path";
1473
+ import path4 from "path";
1887
1474
  import fg from "fast-glob";
1888
1475
  function segmentToRegex(segment) {
1889
1476
  if (segment.startsWith("(") && segment.endsWith(")")) {
@@ -1904,7 +1491,7 @@ function segmentToRegex(segment) {
1904
1491
  return { regex: `/${segment.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`, score: 10 };
1905
1492
  }
1906
1493
  function routeFileToPattern(routeFile, cwd) {
1907
- const relative = path6.relative(cwd, routeFile).replace(/\\/g, "/");
1494
+ const relative = path4.relative(cwd, routeFile).replace(/\\/g, "/");
1908
1495
  const withoutPrefix = relative.replace(/^src\/routes\/?/, "");
1909
1496
  const withoutPage = withoutPrefix.replace(/\/\+page\.[^/]+$/, "");
1910
1497
  const segments = withoutPage.split("/").filter(Boolean);
@@ -1959,11 +1546,11 @@ function mapUrlToRoute(urlPath, patterns) {
1959
1546
 
1960
1547
  // src/indexing/sources/build/index.ts
1961
1548
  import { load as cheerioLoad } from "cheerio";
1962
- import pLimit2 from "p-limit";
1549
+ import pLimit from "p-limit";
1963
1550
 
1964
1551
  // src/indexing/sources/build/manifest-parser.ts
1965
- import fs5 from "fs/promises";
1966
- import path7 from "path";
1552
+ import fs3 from "fs/promises";
1553
+ import path5 from "path";
1967
1554
 
1968
1555
  // src/utils/pattern.ts
1969
1556
  function matchUrlPattern(url, pattern) {
@@ -2007,10 +1594,10 @@ function routeIdToUrl(routeId) {
2007
1594
  return routeId.split("/").filter((seg) => !(seg.startsWith("(") && seg.endsWith(")"))).join("/") || "/";
2008
1595
  }
2009
1596
  async function parseManifest(cwd, outputDir) {
2010
- const manifestPath = path7.resolve(cwd, outputDir, "server", "manifest-full.js");
1597
+ const manifestPath = path5.resolve(cwd, outputDir, "server", "manifest-full.js");
2011
1598
  let content;
2012
1599
  try {
2013
- content = await fs5.readFile(manifestPath, "utf8");
1600
+ content = await fs3.readFile(manifestPath, "utf8");
2014
1601
  } catch {
2015
1602
  throw new SearchSocketError(
2016
1603
  "BUILD_MANIFEST_NOT_FOUND",
@@ -2074,8 +1661,8 @@ function isExcluded(url, patterns) {
2074
1661
 
2075
1662
  // src/indexing/sources/build/preview-server.ts
2076
1663
  import net from "net";
2077
- import path8 from "path";
2078
- import fs6 from "fs";
1664
+ import path6 from "path";
1665
+ import fs4 from "fs";
2079
1666
  import { spawn } from "child_process";
2080
1667
  function findFreePort() {
2081
1668
  return new Promise((resolve, reject) => {
@@ -2114,8 +1701,8 @@ async function waitForReady(url, timeout, child) {
2114
1701
  );
2115
1702
  }
2116
1703
  async function startPreviewServer(cwd, options, logger3) {
2117
- const viteBin = path8.join(cwd, "node_modules", ".bin", "vite");
2118
- if (!fs6.existsSync(viteBin)) {
1704
+ const viteBin = path6.join(cwd, "node_modules", ".bin", "vite");
1705
+ if (!fs4.existsSync(viteBin)) {
2119
1706
  throw new SearchSocketError(
2120
1707
  "BUILD_SERVER_FAILED",
2121
1708
  `vite binary not found at ${viteBin}. Ensure vite is installed.`
@@ -2189,7 +1776,7 @@ async function discoverPages(server, buildConfig, pipelineMaxPages) {
2189
1776
  const visited = /* @__PURE__ */ new Set();
2190
1777
  const pages = [];
2191
1778
  const queue = [];
2192
- const limit = pLimit2(8);
1779
+ const limit = pLimit(8);
2193
1780
  for (const seed of seedUrls) {
2194
1781
  const normalized = normalizeUrlPath(seed);
2195
1782
  if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
@@ -2271,7 +1858,7 @@ async function loadBuildPages(cwd, config, maxPages) {
2271
1858
  const selected = typeof maxCount === "number" ? expanded.slice(0, maxCount) : expanded;
2272
1859
  const server = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
2273
1860
  try {
2274
- const concurrencyLimit = pLimit2(8);
1861
+ const concurrencyLimit = pLimit(8);
2275
1862
  const results = await Promise.allSettled(
2276
1863
  selected.map(
2277
1864
  (route) => concurrencyLimit(async () => {
@@ -2311,11 +1898,11 @@ async function loadBuildPages(cwd, config, maxPages) {
2311
1898
  }
2312
1899
 
2313
1900
  // src/indexing/sources/content-files.ts
2314
- import fs7 from "fs/promises";
2315
- import path9 from "path";
1901
+ import fs5 from "fs/promises";
1902
+ import path7 from "path";
2316
1903
  import fg2 from "fast-glob";
2317
1904
  function filePathToUrl(filePath, baseDir) {
2318
- const relative = path9.relative(baseDir, filePath).replace(/\\/g, "/");
1905
+ const relative = path7.relative(baseDir, filePath).replace(/\\/g, "/");
2319
1906
  const segments = relative.split("/").filter(Boolean);
2320
1907
  if (/(^|\/)\+page\.svelte$/.test(relative)) {
2321
1908
  const routeSegments = segments.slice();
@@ -2340,7 +1927,7 @@ async function loadContentFilesPages(cwd, config, maxPages) {
2340
1927
  if (!contentConfig) {
2341
1928
  throw new Error("content-files config is missing");
2342
1929
  }
2343
- const baseDir = path9.resolve(cwd, contentConfig.baseDir);
1930
+ const baseDir = path7.resolve(cwd, contentConfig.baseDir);
2344
1931
  const files = await fg2(contentConfig.globs, {
2345
1932
  cwd: baseDir,
2346
1933
  absolute: true,
@@ -2350,12 +1937,12 @@ async function loadContentFilesPages(cwd, config, maxPages) {
2350
1937
  const selected = typeof limit === "number" ? files.slice(0, limit) : files;
2351
1938
  const pages = [];
2352
1939
  for (const filePath of selected) {
2353
- const raw = await fs7.readFile(filePath, "utf8");
1940
+ const raw = await fs5.readFile(filePath, "utf8");
2354
1941
  const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
2355
1942
  pages.push({
2356
1943
  url: filePathToUrl(filePath, baseDir),
2357
1944
  markdown,
2358
- sourcePath: path9.relative(cwd, filePath).replace(/\\/g, "/"),
1945
+ sourcePath: path7.relative(cwd, filePath).replace(/\\/g, "/"),
2359
1946
  outgoingLinks: []
2360
1947
  });
2361
1948
  }
@@ -2365,7 +1952,7 @@ async function loadContentFilesPages(cwd, config, maxPages) {
2365
1952
  // src/indexing/sources/crawl.ts
2366
1953
  import { gunzipSync } from "zlib";
2367
1954
  import { load as cheerioLoad2 } from "cheerio";
2368
- import pLimit3 from "p-limit";
1955
+ import pLimit2 from "p-limit";
2369
1956
  var logger2 = new Logger();
2370
1957
  function extractLocs(xml) {
2371
1958
  const $ = cheerioLoad2(xml, { xmlMode: true });
@@ -2450,7 +2037,7 @@ async function loadCrawledPages(config, maxPages) {
2450
2037
  const routes = await resolveRoutes(config);
2451
2038
  const maxCount = typeof maxPages === "number" ? Math.max(0, Math.floor(maxPages)) : void 0;
2452
2039
  const selected = typeof maxCount === "number" ? routes.slice(0, maxCount) : routes;
2453
- const concurrencyLimit = pLimit3(8);
2040
+ const concurrencyLimit = pLimit2(8);
2454
2041
  const results = await Promise.allSettled(
2455
2042
  selected.map(
2456
2043
  (route) => concurrencyLimit(async () => {
@@ -2483,11 +2070,11 @@ async function loadCrawledPages(config, maxPages) {
2483
2070
  }
2484
2071
 
2485
2072
  // src/indexing/sources/static-output.ts
2486
- import fs8 from "fs/promises";
2487
- import path10 from "path";
2073
+ import fs6 from "fs/promises";
2074
+ import path8 from "path";
2488
2075
  import fg3 from "fast-glob";
2489
2076
  async function loadStaticOutputPages(cwd, config, maxPages) {
2490
- const outputDir = path10.resolve(cwd, config.source.staticOutputDir);
2077
+ const outputDir = path8.resolve(cwd, config.source.staticOutputDir);
2491
2078
  const htmlFiles = await fg3(["**/*.html"], {
2492
2079
  cwd: outputDir,
2493
2080
  absolute: true
@@ -2496,11 +2083,11 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
2496
2083
  const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
2497
2084
  const pages = [];
2498
2085
  for (const filePath of selected) {
2499
- const html = await fs8.readFile(filePath, "utf8");
2086
+ const html = await fs6.readFile(filePath, "utf8");
2500
2087
  pages.push({
2501
2088
  url: staticHtmlFileToUrl(filePath, outputDir),
2502
2089
  html,
2503
- sourcePath: path10.relative(cwd, filePath).replace(/\\/g, "/"),
2090
+ sourcePath: path8.relative(cwd, filePath).replace(/\\/g, "/"),
2504
2091
  outgoingLinks: []
2505
2092
  });
2506
2093
  }
@@ -2508,8 +2095,8 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
2508
2095
  }
2509
2096
 
2510
2097
  // src/indexing/robots.ts
2511
- import fs9 from "fs/promises";
2512
- import path11 from "path";
2098
+ import fs7 from "fs/promises";
2099
+ import path9 from "path";
2513
2100
  function parseRobotsTxt(content, userAgent = "Searchsocket") {
2514
2101
  const lines = content.split(/\r?\n/);
2515
2102
  const agentGroups = /* @__PURE__ */ new Map();
@@ -2563,7 +2150,7 @@ function isBlockedByRobots(urlPath, rules) {
2563
2150
  }
2564
2151
  async function loadRobotsTxtFromDir(dir) {
2565
2152
  try {
2566
- const content = await fs9.readFile(path11.join(dir, "robots.txt"), "utf8");
2153
+ const content = await fs7.readFile(path9.join(dir, "robots.txt"), "utf8");
2567
2154
  return parseRobotsTxt(content);
2568
2155
  } catch {
2569
2156
  return null;
@@ -2588,7 +2175,12 @@ function nonNegativeOrZero(value) {
2588
2175
  }
2589
2176
  return Math.max(0, value);
2590
2177
  }
2591
- function rankHits(hits, config) {
2178
+ function normalizeForTitleMatch(text) {
2179
+ return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
2180
+ }
2181
+ function rankHits(hits, config, query) {
2182
+ const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
2183
+ const titleMatchWeight = config.ranking.weights.titleMatch;
2592
2184
  return hits.map((hit) => {
2593
2185
  let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
2594
2186
  if (config.ranking.enableIncomingLinkBoost) {
@@ -2599,6 +2191,12 @@ function rankHits(hits, config) {
2599
2191
  const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
2600
2192
  score += depthBoost * config.ranking.weights.depth;
2601
2193
  }
2194
+ if (normalizedQuery && titleMatchWeight > 0) {
2195
+ const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
2196
+ if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
2197
+ score += titleMatchWeight;
2198
+ }
2199
+ }
2602
2200
  return {
2603
2201
  hit,
2604
2202
  finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
@@ -2608,6 +2206,30 @@ function rankHits(hits, config) {
2608
2206
  return Number.isNaN(delta) ? 0 : delta;
2609
2207
  });
2610
2208
  }
2209
+ function trimByScoreGap(results, config) {
2210
+ if (results.length === 0) return results;
2211
+ const threshold = config.ranking.scoreGapThreshold;
2212
+ const minScore = config.ranking.minScore;
2213
+ if (minScore > 0 && results.length > 0) {
2214
+ const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
2215
+ const mid = Math.floor(sortedScores.length / 2);
2216
+ const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
2217
+ if (median < minScore) return [];
2218
+ }
2219
+ if (threshold > 0 && results.length > 1) {
2220
+ for (let i = 1; i < results.length; i++) {
2221
+ const prev = results[i - 1].pageScore;
2222
+ const current = results[i].pageScore;
2223
+ if (prev > 0) {
2224
+ const gap = (prev - current) / prev;
2225
+ if (gap >= threshold) {
2226
+ return results.slice(0, i);
2227
+ }
2228
+ }
2229
+ }
2230
+ }
2231
+ return results;
2232
+ }
2611
2233
  function findPageWeight(url, pageWeights) {
2612
2234
  let bestPattern = "";
2613
2235
  let bestWeight = 1;
@@ -2662,6 +2284,61 @@ function aggregateByPage(ranked, config) {
2662
2284
  return Number.isNaN(delta) ? 0 : delta;
2663
2285
  });
2664
2286
  }
2287
+ function mergePageAndChunkResults(pageHits, rankedChunks, config) {
2288
+ if (pageHits.length === 0) return rankedChunks;
2289
+ const w = config.search.pageSearchWeight;
2290
+ const pageScoreMap = /* @__PURE__ */ new Map();
2291
+ for (const ph of pageHits) {
2292
+ pageScoreMap.set(ph.url, ph);
2293
+ }
2294
+ const pagesWithChunks = /* @__PURE__ */ new Set();
2295
+ const merged = rankedChunks.map((ranked) => {
2296
+ const url = ranked.hit.metadata.url;
2297
+ const pageHit = pageScoreMap.get(url);
2298
+ if (pageHit) {
2299
+ pagesWithChunks.add(url);
2300
+ const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
2301
+ return {
2302
+ hit: ranked.hit,
2303
+ finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
2304
+ };
2305
+ }
2306
+ return ranked;
2307
+ });
2308
+ for (const [url, pageHit] of pageScoreMap) {
2309
+ if (pagesWithChunks.has(url)) continue;
2310
+ const syntheticScore = pageHit.score * w;
2311
+ const syntheticHit = {
2312
+ id: `page:${url}`,
2313
+ score: pageHit.score,
2314
+ metadata: {
2315
+ projectId: "",
2316
+ scopeName: "",
2317
+ url: pageHit.url,
2318
+ path: pageHit.url,
2319
+ title: pageHit.title,
2320
+ sectionTitle: "",
2321
+ headingPath: [],
2322
+ snippet: pageHit.description || pageHit.title,
2323
+ chunkText: pageHit.description || pageHit.title,
2324
+ ordinal: 0,
2325
+ contentHash: "",
2326
+ depth: pageHit.depth,
2327
+ incomingLinks: pageHit.incomingLinks,
2328
+ routeFile: pageHit.routeFile,
2329
+ tags: pageHit.tags
2330
+ }
2331
+ };
2332
+ merged.push({
2333
+ hit: syntheticHit,
2334
+ finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
2335
+ });
2336
+ }
2337
+ return merged.sort((a, b) => {
2338
+ const delta = b.finalScore - a.finalScore;
2339
+ return Number.isNaN(delta) ? 0 : delta;
2340
+ });
2341
+ }
2665
2342
 
2666
2343
  // src/utils/time.ts
2667
2344
  function nowIso() {
@@ -2672,34 +2349,41 @@ function hrTimeMs(start) {
2672
2349
  }
2673
2350
 
2674
2351
  // src/indexing/pipeline.ts
2675
- var EMBEDDING_PRICE_PER_1K_TOKENS_USD = {
2676
- "jina-embeddings-v3": 2e-5,
2677
- "jina-embeddings-v5-text-small": 5e-5
2678
- };
2679
- var DEFAULT_EMBEDDING_PRICE_PER_1K = 5e-5;
2352
+ function buildPageSummary(page, maxChars = 3500) {
2353
+ const parts = [page.title];
2354
+ if (page.description) {
2355
+ parts.push(page.description);
2356
+ }
2357
+ if (page.keywords && page.keywords.length > 0) {
2358
+ parts.push(page.keywords.join(", "));
2359
+ }
2360
+ const plainBody = page.markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/^#{1,6}\s+/gm, "").replace(/[>*_|~\-]/g, " ").replace(/\s+/g, " ").trim();
2361
+ if (plainBody) {
2362
+ parts.push(plainBody);
2363
+ }
2364
+ const joined = parts.join("\n\n");
2365
+ if (joined.length <= maxChars) return joined;
2366
+ return joined.slice(0, maxChars).trim();
2367
+ }
2680
2368
  var IndexPipeline = class _IndexPipeline {
2681
2369
  cwd;
2682
2370
  config;
2683
- embeddings;
2684
- vectorStore;
2371
+ store;
2685
2372
  logger;
2686
2373
  constructor(options) {
2687
2374
  this.cwd = options.cwd;
2688
2375
  this.config = options.config;
2689
- this.embeddings = options.embeddings;
2690
- this.vectorStore = options.vectorStore;
2376
+ this.store = options.store;
2691
2377
  this.logger = options.logger;
2692
2378
  }
2693
2379
  static async create(options = {}) {
2694
- const cwd = path12.resolve(options.cwd ?? process.cwd());
2380
+ const cwd = path10.resolve(options.cwd ?? process.cwd());
2695
2381
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
2696
- const embeddings = options.embeddingsProvider ?? createEmbeddingsProvider(config);
2697
- const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
2382
+ const store = options.store ?? await createUpstashStore(config);
2698
2383
  return new _IndexPipeline({
2699
2384
  cwd,
2700
2385
  config,
2701
- embeddings,
2702
- vectorStore,
2386
+ store,
2703
2387
  logger: options.logger ?? new Logger()
2704
2388
  });
2705
2389
  }
@@ -2719,25 +2403,17 @@ var IndexPipeline = class _IndexPipeline {
2719
2403
  stageTimingsMs[name] = Math.round(hrTimeMs(start));
2720
2404
  };
2721
2405
  const scope = resolveScope(this.config, options.scopeOverride);
2722
- const { statePath } = ensureStateDirs(this.cwd, this.config.state.dir, scope);
2406
+ ensureStateDirs(this.cwd, this.config.state.dir, scope);
2723
2407
  const sourceMode = options.sourceOverride ?? this.config.source.mode;
2724
- this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, model: ${this.config.embeddings.model})`);
2408
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
2725
2409
  if (options.force) {
2726
2410
  this.logger.info("Force mode enabled \u2014 full rebuild");
2727
- await cleanMirrorForScope(statePath, scope);
2728
2411
  }
2729
2412
  if (options.dryRun) {
2730
2413
  this.logger.info("Dry run \u2014 no writes will be performed");
2731
2414
  }
2732
2415
  const manifestStart = stageStart();
2733
- const existingHashes = await this.vectorStore.getContentHashes(scope);
2734
- const existingModelId = await this.vectorStore.getScopeModelId(scope);
2735
- if (existingModelId && existingModelId !== this.config.embeddings.model && !options.force) {
2736
- throw new SearchSocketError(
2737
- "EMBEDDING_MODEL_MISMATCH",
2738
- `Scope ${scope.scopeName} uses model ${existingModelId}. Re-run with --force to migrate.`
2739
- );
2740
- }
2416
+ const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
2741
2417
  stageEnd("manifest", manifestStart);
2742
2418
  this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
2743
2419
  const sourceStart = stageStart();
@@ -2775,11 +2451,11 @@ var IndexPipeline = class _IndexPipeline {
2775
2451
  let robotsRules = null;
2776
2452
  if (sourceMode === "static-output") {
2777
2453
  robotsRules = await loadRobotsTxtFromDir(
2778
- path12.resolve(this.cwd, this.config.source.staticOutputDir)
2454
+ path10.resolve(this.cwd, this.config.source.staticOutputDir)
2779
2455
  );
2780
2456
  } else if (sourceMode === "build" && this.config.source.build) {
2781
2457
  robotsRules = await loadRobotsTxtFromDir(
2782
- path12.resolve(this.cwd, this.config.source.build.outputDir)
2458
+ path10.resolve(this.cwd, this.config.source.build.outputDir)
2783
2459
  );
2784
2460
  } else if (sourceMode === "crawl" && this.config.source.crawl) {
2785
2461
  robotsRules = await fetchRobotsTxt(this.config.source.crawl.baseUrl);
@@ -2866,9 +2542,9 @@ var IndexPipeline = class _IndexPipeline {
2866
2542
  }
2867
2543
  stageEnd("links", linkStart);
2868
2544
  this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
2869
- const mirrorStart = stageStart();
2870
- this.logger.info("Writing mirror pages...");
2871
- const mirrorPages = [];
2545
+ const pagesStart = stageStart();
2546
+ this.logger.info("Building indexed pages...");
2547
+ const pages = [];
2872
2548
  let routeExact = 0;
2873
2549
  let routeBestEffort = 0;
2874
2550
  const precomputedRoutes = /* @__PURE__ */ new Map();
@@ -2897,7 +2573,7 @@ var IndexPipeline = class _IndexPipeline {
2897
2573
  } else {
2898
2574
  routeExact += 1;
2899
2575
  }
2900
- const mirror = {
2576
+ const indexedPage = {
2901
2577
  url: page.url,
2902
2578
  title: page.title,
2903
2579
  scope: scope.scopeName,
@@ -2912,35 +2588,38 @@ var IndexPipeline = class _IndexPipeline {
2912
2588
  description: page.description,
2913
2589
  keywords: page.keywords
2914
2590
  };
2915
- mirrorPages.push(mirror);
2916
- if (this.config.state.writeMirror) {
2917
- await writeMirrorPage(statePath, scope, mirror);
2918
- }
2919
- this.logger.event("markdown_written", { url: page.url });
2591
+ pages.push(indexedPage);
2592
+ this.logger.event("page_indexed", { url: page.url });
2920
2593
  }
2921
2594
  if (!options.dryRun) {
2922
- const pageRecords = mirrorPages.map((mp) => ({
2923
- url: mp.url,
2924
- title: mp.title,
2925
- markdown: mp.markdown,
2926
- projectId: scope.projectId,
2927
- scopeName: scope.scopeName,
2928
- routeFile: mp.routeFile,
2929
- routeResolution: mp.routeResolution,
2930
- incomingLinks: mp.incomingLinks,
2931
- outgoingLinks: mp.outgoingLinks,
2932
- depth: mp.depth,
2933
- tags: mp.tags,
2934
- indexedAt: mp.generatedAt
2935
- }));
2936
- await this.vectorStore.deletePages(scope);
2937
- await this.vectorStore.upsertPages(pageRecords, scope);
2595
+ const pageRecords = pages.map((p) => {
2596
+ const summary = buildPageSummary(p);
2597
+ return {
2598
+ url: p.url,
2599
+ title: p.title,
2600
+ markdown: p.markdown,
2601
+ projectId: scope.projectId,
2602
+ scopeName: scope.scopeName,
2603
+ routeFile: p.routeFile,
2604
+ routeResolution: p.routeResolution,
2605
+ incomingLinks: p.incomingLinks,
2606
+ outgoingLinks: p.outgoingLinks,
2607
+ depth: p.depth,
2608
+ tags: p.tags,
2609
+ indexedAt: p.generatedAt,
2610
+ summary,
2611
+ description: p.description,
2612
+ keywords: p.keywords
2613
+ };
2614
+ });
2615
+ await this.store.deletePages(scope);
2616
+ await this.store.upsertPages(pageRecords, scope);
2938
2617
  }
2939
- stageEnd("mirror", mirrorStart);
2940
- this.logger.info(`Mirrored ${mirrorPages.length} page${mirrorPages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["mirror"]}ms)`);
2618
+ stageEnd("pages", pagesStart);
2619
+ this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
2941
2620
  const chunkStart = stageStart();
2942
2621
  this.logger.info("Chunking pages...");
2943
- let chunks = mirrorPages.flatMap((page) => chunkMirrorPage(page, this.config, scope));
2622
+ let chunks = pages.flatMap((page) => chunkPage(page, this.config, scope));
2944
2623
  const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
2945
2624
  if (typeof maxChunks === "number") {
2946
2625
  chunks = chunks.slice(0, maxChunks);
@@ -2972,125 +2651,61 @@ var IndexPipeline = class _IndexPipeline {
2972
2651
  });
2973
2652
  const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
2974
2653
  this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
2975
- const embedStart = stageStart();
2976
- const chunkTokenEstimates = /* @__PURE__ */ new Map();
2977
- for (const chunk of changedChunks) {
2978
- chunkTokenEstimates.set(chunk.chunkKey, this.embeddings.estimateTokens(buildEmbeddingText(chunk, this.config.chunking.prependTitle)));
2979
- }
2980
- const estimatedTokens = changedChunks.reduce(
2981
- (sum, chunk) => sum + (chunkTokenEstimates.get(chunk.chunkKey) ?? 0),
2982
- 0
2983
- );
2984
- const pricePer1k = this.config.embeddings.pricePer1kTokens ?? EMBEDDING_PRICE_PER_1K_TOKENS_USD[this.config.embeddings.model] ?? DEFAULT_EMBEDDING_PRICE_PER_1K;
2985
- const estimatedCostUSD = estimatedTokens / 1e3 * pricePer1k;
2986
- let newEmbeddings = 0;
2987
- const vectorsByChunk = /* @__PURE__ */ new Map();
2654
+ const upsertStart = stageStart();
2655
+ let documentsUpserted = 0;
2988
2656
  if (!options.dryRun && changedChunks.length > 0) {
2989
- this.logger.info(`Embedding ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} (~${estimatedTokens.toLocaleString()} tokens, ~$${estimatedCostUSD.toFixed(6)})...`);
2990
- const embeddings = await this.embeddings.embedTexts(
2991
- changedChunks.map((chunk) => buildEmbeddingText(chunk, this.config.chunking.prependTitle)),
2992
- this.config.embeddings.model,
2993
- "retrieval.passage"
2994
- );
2995
- if (embeddings.length !== changedChunks.length) {
2996
- throw new SearchSocketError(
2997
- "VECTOR_BACKEND_UNAVAILABLE",
2998
- `Embedding provider returned ${embeddings.length} vectors for ${changedChunks.length} chunks.`
2999
- );
3000
- }
3001
- for (let i = 0; i < changedChunks.length; i += 1) {
3002
- const chunk = changedChunks[i];
3003
- const embedding = embeddings[i];
3004
- if (!chunk || !embedding || embedding.length === 0 || embedding.some((value) => !Number.isFinite(value))) {
3005
- throw new SearchSocketError(
3006
- "VECTOR_BACKEND_UNAVAILABLE",
3007
- `Embedding provider returned an invalid vector for chunk index ${i}.`
3008
- );
3009
- }
3010
- vectorsByChunk.set(chunk.chunkKey, embedding);
3011
- newEmbeddings += 1;
3012
- this.logger.event("embedded_new", { chunkKey: chunk.chunkKey });
3013
- }
3014
- }
3015
- stageEnd("embedding", embedStart);
3016
- if (changedChunks.length > 0) {
3017
- this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
3018
- } else {
3019
- this.logger.info("No chunks to embed \u2014 all up to date");
3020
- }
3021
- const syncStart = stageStart();
3022
- if (!options.dryRun) {
3023
- this.logger.info("Syncing vectors...");
3024
- const upserts = [];
3025
- for (const chunk of changedChunks) {
3026
- const vector = vectorsByChunk.get(chunk.chunkKey);
3027
- if (!vector) {
3028
- continue;
3029
- }
3030
- upserts.push({
2657
+ this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
2658
+ const UPSTASH_CONTENT_LIMIT = 4096;
2659
+ const FIELD_OVERHEAD = 200;
2660
+ const MAX_TEXT_CHARS = UPSTASH_CONTENT_LIMIT - FIELD_OVERHEAD;
2661
+ const docs = changedChunks.map((chunk) => {
2662
+ const title = chunk.title;
2663
+ const sectionTitle = chunk.sectionTitle ?? "";
2664
+ const url = chunk.url;
2665
+ const tags = chunk.tags.join(",");
2666
+ const headingPath = chunk.headingPath.join(" > ");
2667
+ const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
2668
+ const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
2669
+ const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
2670
+ return {
3031
2671
  id: chunk.chunkKey,
3032
- vector,
2672
+ content: { title, sectionTitle, text, url, tags, headingPath },
3033
2673
  metadata: {
3034
2674
  projectId: scope.projectId,
3035
2675
  scopeName: scope.scopeName,
3036
- url: chunk.url,
3037
2676
  path: chunk.path,
3038
- title: chunk.title,
3039
- sectionTitle: chunk.sectionTitle ?? "",
3040
- headingPath: chunk.headingPath,
3041
2677
  snippet: chunk.snippet,
3042
- chunkText: chunk.chunkText.slice(0, 4e3),
3043
2678
  ordinal: chunk.ordinal,
3044
2679
  contentHash: chunk.contentHash,
3045
- modelId: this.config.embeddings.model,
3046
2680
  depth: chunk.depth,
3047
2681
  incomingLinks: chunk.incomingLinks,
3048
2682
  routeFile: chunk.routeFile,
3049
- tags: chunk.tags,
3050
- description: chunk.description,
3051
- keywords: chunk.keywords
2683
+ description: chunk.description ?? "",
2684
+ keywords: (chunk.keywords ?? []).join(",")
3052
2685
  }
3053
- });
3054
- }
3055
- if (upserts.length > 0) {
3056
- await this.vectorStore.upsert(upserts, scope);
3057
- this.logger.event("upserted", { count: upserts.length });
3058
- }
3059
- if (deletes.length > 0) {
3060
- await this.vectorStore.deleteByIds(deletes, scope);
3061
- this.logger.event("deleted", { count: deletes.length });
3062
- }
3063
- }
3064
- stageEnd("sync", syncStart);
3065
- this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
3066
- const finalizeStart = stageStart();
3067
- if (!options.dryRun) {
3068
- const scopeInfo = {
3069
- projectId: scope.projectId,
3070
- scopeName: scope.scopeName,
3071
- modelId: this.config.embeddings.model,
3072
- lastIndexedAt: nowIso(),
3073
- vectorCount: chunks.length,
3074
- lastEstimateTokens: estimatedTokens,
3075
- lastEstimateCostUSD: Number(estimatedCostUSD.toFixed(8)),
3076
- lastEstimateChangedChunks: changedChunks.length
3077
- };
3078
- await this.vectorStore.recordScope(scopeInfo);
3079
- this.logger.event("registry_updated", {
3080
- scope: scope.scopeName,
3081
- vectorCount: chunks.length
2686
+ };
3082
2687
  });
2688
+ await this.store.upsertChunks(docs, scope);
2689
+ documentsUpserted = docs.length;
2690
+ this.logger.event("upserted", { count: docs.length });
2691
+ }
2692
+ if (!options.dryRun && deletes.length > 0) {
2693
+ await this.store.deleteByIds(deletes, scope);
2694
+ this.logger.event("deleted", { count: deletes.length });
2695
+ }
2696
+ stageEnd("upsert", upsertStart);
2697
+ if (changedChunks.length > 0) {
2698
+ this.logger.info(`Upserted ${documentsUpserted} document${documentsUpserted === 1 ? "" : "s"} (${stageTimingsMs["upsert"]}ms)`);
2699
+ } else {
2700
+ this.logger.info("No chunks to upsert \u2014 all up to date");
3083
2701
  }
3084
- stageEnd("finalize", finalizeStart);
3085
2702
  this.logger.info("Done.");
3086
2703
  return {
3087
- pagesProcessed: mirrorPages.length,
2704
+ pagesProcessed: pages.length,
3088
2705
  chunksTotal: chunks.length,
3089
2706
  chunksChanged: changedChunks.length,
3090
- newEmbeddings,
2707
+ documentsUpserted,
3091
2708
  deletes: deletes.length,
3092
- estimatedTokens,
3093
- estimatedCostUSD: Number(estimatedCostUSD.toFixed(8)),
3094
2709
  routeExact,
3095
2710
  routeBestEffort,
3096
2711
  stageTimingsMs
@@ -3106,142 +2721,33 @@ import { createMcpExpressApp } from "@modelcontextprotocol/sdk/server/express.js
3106
2721
  import { z as z3 } from "zod";
3107
2722
 
3108
2723
  // src/search/engine.ts
3109
- import path13 from "path";
2724
+ import path11 from "path";
3110
2725
  import { z as z2 } from "zod";
3111
-
3112
- // src/rerank/jina.ts
3113
- function sleep2(ms) {
3114
- return new Promise((resolve) => {
3115
- setTimeout(resolve, ms);
3116
- });
3117
- }
3118
- var JinaReranker = class {
3119
- apiKey;
3120
- model;
3121
- maxRetries;
3122
- constructor(options) {
3123
- this.apiKey = options.apiKey;
3124
- this.model = options.model;
3125
- this.maxRetries = options.maxRetries ?? 2;
3126
- }
3127
- async rerank(query, candidates, topN) {
3128
- if (candidates.length === 0) {
3129
- return [];
3130
- }
3131
- const body = {
3132
- model: this.model,
3133
- query,
3134
- documents: candidates.map((candidate) => candidate.text),
3135
- top_n: topN ?? candidates.length,
3136
- return_documents: false
3137
- };
3138
- let attempt = 0;
3139
- while (attempt <= this.maxRetries) {
3140
- attempt += 1;
3141
- let response;
3142
- try {
3143
- response = await fetch("https://api.jina.ai/v1/rerank", {
3144
- method: "POST",
3145
- headers: {
3146
- "content-type": "application/json",
3147
- authorization: `Bearer ${this.apiKey}`
3148
- },
3149
- body: JSON.stringify(body)
3150
- });
3151
- } catch (error) {
3152
- if (attempt <= this.maxRetries) {
3153
- await sleep2(Math.min(300 * 2 ** attempt, 4e3));
3154
- continue;
3155
- }
3156
- throw error;
3157
- }
3158
- if (!response.ok) {
3159
- const retryable = response.status === 429 || response.status >= 500;
3160
- if (retryable && attempt <= this.maxRetries) {
3161
- await sleep2(Math.min(300 * 2 ** attempt, 4e3));
3162
- continue;
3163
- }
3164
- const errorBody = await response.text();
3165
- throw new Error(`Jina rerank failed (${response.status}): ${errorBody}`);
3166
- }
3167
- const payload = await response.json();
3168
- const rawResults = payload.results ?? payload.data ?? [];
3169
- if (!Array.isArray(rawResults)) {
3170
- throw new Error("Invalid Jina rerank response format");
3171
- }
3172
- return rawResults.flatMap((item) => {
3173
- const index = item.index;
3174
- if (typeof index !== "number" || index < 0 || index >= candidates.length) {
3175
- return [];
3176
- }
3177
- const candidate = candidates[index];
3178
- if (!candidate) {
3179
- return [];
3180
- }
3181
- const score = typeof item.relevance_score === "number" ? item.relevance_score : item.score ?? 0;
3182
- return [
3183
- {
3184
- id: candidate.id,
3185
- score
3186
- }
3187
- ];
3188
- }).sort((a, b) => b.score - a.score);
3189
- }
3190
- throw new Error("Jina rerank request failed after retries");
3191
- }
3192
- };
3193
-
3194
- // src/rerank/factory.ts
3195
- function createReranker(config) {
3196
- if (!config.rerank.enabled) {
3197
- return null;
3198
- }
3199
- const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
3200
- if (!apiKey) {
3201
- return null;
3202
- }
3203
- return new JinaReranker({
3204
- apiKey,
3205
- model: config.rerank.model
3206
- });
3207
- }
3208
-
3209
- // src/search/engine.ts
3210
2726
  var requestSchema = z2.object({
3211
2727
  q: z2.string().trim().min(1),
3212
2728
  topK: z2.number().int().positive().max(100).optional(),
3213
2729
  scope: z2.string().optional(),
3214
2730
  pathPrefix: z2.string().optional(),
3215
2731
  tags: z2.array(z2.string()).optional(),
3216
- rerank: z2.boolean().optional(),
3217
- groupBy: z2.enum(["page", "chunk"]).optional(),
3218
- stream: z2.boolean().optional()
2732
+ groupBy: z2.enum(["page", "chunk"]).optional()
3219
2733
  });
3220
2734
  var SearchEngine = class _SearchEngine {
3221
2735
  cwd;
3222
2736
  config;
3223
- embeddings;
3224
- vectorStore;
3225
- reranker;
2737
+ store;
3226
2738
  constructor(options) {
3227
2739
  this.cwd = options.cwd;
3228
2740
  this.config = options.config;
3229
- this.embeddings = options.embeddings;
3230
- this.vectorStore = options.vectorStore;
3231
- this.reranker = options.reranker;
2741
+ this.store = options.store;
3232
2742
  }
3233
2743
  static async create(options = {}) {
3234
- const cwd = path13.resolve(options.cwd ?? process.cwd());
2744
+ const cwd = path11.resolve(options.cwd ?? process.cwd());
3235
2745
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
3236
- const embeddings = options.embeddingsProvider ?? createEmbeddingsProvider(config);
3237
- const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
3238
- const reranker = options.reranker === void 0 ? createReranker(config) : options.reranker;
2746
+ const store = options.store ?? await createUpstashStore(config);
3239
2747
  return new _SearchEngine({
3240
2748
  cwd,
3241
2749
  config,
3242
- embeddings,
3243
- vectorStore,
3244
- reranker
2750
+ store
3245
2751
  });
3246
2752
  }
3247
2753
  getConfig() {
@@ -3255,142 +2761,90 @@ var SearchEngine = class _SearchEngine {
3255
2761
  const input = parsed.data;
3256
2762
  const totalStart = process.hrtime.bigint();
3257
2763
  const resolvedScope = resolveScope(this.config, input.scope);
3258
- await this.assertModelCompatibility(resolvedScope);
3259
2764
  const topK = input.topK ?? 10;
3260
- const wantsRerank = Boolean(input.rerank);
3261
2765
  const groupByPage = (input.groupBy ?? "page") === "page";
3262
2766
  const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
3263
- const embedStart = process.hrtime.bigint();
3264
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
3265
- const queryVector = queryEmbeddings[0];
3266
- if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
3267
- throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
3268
- }
3269
- const embedMs = hrTimeMs(embedStart);
3270
- const vectorStart = process.hrtime.bigint();
3271
- const hits = await this.vectorStore.query(
3272
- queryVector,
3273
- {
3274
- topK: candidateK,
3275
- pathPrefix: input.pathPrefix,
3276
- tags: input.tags
3277
- },
3278
- resolvedScope
3279
- );
3280
- const vectorMs = hrTimeMs(vectorStart);
3281
- const ranked = rankHits(hits, this.config);
3282
- let usedRerank = false;
3283
- let rerankMs = 0;
3284
- let ordered = ranked;
3285
- if (wantsRerank) {
3286
- const rerankStart = process.hrtime.bigint();
3287
- ordered = await this.rerankHits(input.q, ranked, topK);
3288
- rerankMs = hrTimeMs(rerankStart);
3289
- usedRerank = true;
3290
- }
3291
- const results = this.buildResults(ordered, topK, groupByPage);
2767
+ const filterParts = [];
2768
+ if (input.pathPrefix) {
2769
+ const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
2770
+ filterParts.push(`url GLOB '${prefix}*'`);
2771
+ }
2772
+ if (input.tags && input.tags.length > 0) {
2773
+ for (const tag of input.tags) {
2774
+ filterParts.push(`tags GLOB '*${tag}*'`);
2775
+ }
2776
+ }
2777
+ const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
2778
+ const useDualSearch = this.config.search.dualSearch && groupByPage;
2779
+ const searchStart = process.hrtime.bigint();
2780
+ let ranked;
2781
+ if (useDualSearch) {
2782
+ const chunkLimit = Math.max(topK * 10, 100);
2783
+ const pageLimit = 20;
2784
+ const [pageHits, chunkHits] = await Promise.all([
2785
+ this.store.searchPages(
2786
+ input.q,
2787
+ {
2788
+ limit: pageLimit,
2789
+ semanticWeight: this.config.search.semanticWeight,
2790
+ inputEnrichment: this.config.search.inputEnrichment,
2791
+ filter
2792
+ },
2793
+ resolvedScope
2794
+ ),
2795
+ this.store.search(
2796
+ input.q,
2797
+ {
2798
+ limit: chunkLimit,
2799
+ semanticWeight: this.config.search.semanticWeight,
2800
+ inputEnrichment: this.config.search.inputEnrichment,
2801
+ reranking: false,
2802
+ filter
2803
+ },
2804
+ resolvedScope
2805
+ )
2806
+ ]);
2807
+ const rankedChunks = rankHits(chunkHits, this.config, input.q);
2808
+ ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
2809
+ } else {
2810
+ const hits = await this.store.search(
2811
+ input.q,
2812
+ {
2813
+ limit: candidateK,
2814
+ semanticWeight: this.config.search.semanticWeight,
2815
+ inputEnrichment: this.config.search.inputEnrichment,
2816
+ reranking: this.config.search.reranking,
2817
+ filter
2818
+ },
2819
+ resolvedScope
2820
+ );
2821
+ ranked = rankHits(hits, this.config, input.q);
2822
+ }
2823
+ const searchMs = hrTimeMs(searchStart);
2824
+ const results = this.buildResults(ranked, topK, groupByPage, input.q);
3292
2825
  return {
3293
2826
  q: input.q,
3294
2827
  scope: resolvedScope.scopeName,
3295
2828
  results,
3296
2829
  meta: {
3297
2830
  timingsMs: {
3298
- embed: Math.round(embedMs),
3299
- vector: Math.round(vectorMs),
3300
- rerank: Math.round(rerankMs),
2831
+ search: Math.round(searchMs),
3301
2832
  total: Math.round(hrTimeMs(totalStart))
3302
- },
3303
- usedRerank,
3304
- modelId: this.config.embeddings.model
3305
- }
3306
- };
3307
- }
3308
- async *searchStreaming(request) {
3309
- const parsed = requestSchema.safeParse(request);
3310
- if (!parsed.success) {
3311
- throw new SearchSocketError("INVALID_REQUEST", parsed.error.issues[0]?.message ?? "Invalid request", 400);
3312
- }
3313
- const input = parsed.data;
3314
- const wantsRerank = Boolean(input.rerank);
3315
- if (!wantsRerank) {
3316
- const response = await this.search(request);
3317
- yield { phase: "initial", data: response };
3318
- return;
3319
- }
3320
- const totalStart = process.hrtime.bigint();
3321
- const resolvedScope = resolveScope(this.config, input.scope);
3322
- await this.assertModelCompatibility(resolvedScope);
3323
- const topK = input.topK ?? 10;
3324
- const groupByPage = (input.groupBy ?? "page") === "page";
3325
- const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
3326
- const embedStart = process.hrtime.bigint();
3327
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
3328
- const queryVector = queryEmbeddings[0];
3329
- if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
3330
- throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
3331
- }
3332
- const embedMs = hrTimeMs(embedStart);
3333
- const vectorStart = process.hrtime.bigint();
3334
- const hits = await this.vectorStore.query(
3335
- queryVector,
3336
- {
3337
- topK: candidateK,
3338
- pathPrefix: input.pathPrefix,
3339
- tags: input.tags
3340
- },
3341
- resolvedScope
3342
- );
3343
- const vectorMs = hrTimeMs(vectorStart);
3344
- const ranked = rankHits(hits, this.config);
3345
- const initialResults = this.buildResults(ranked, topK, groupByPage);
3346
- yield {
3347
- phase: "initial",
3348
- data: {
3349
- q: input.q,
3350
- scope: resolvedScope.scopeName,
3351
- results: initialResults,
3352
- meta: {
3353
- timingsMs: {
3354
- embed: Math.round(embedMs),
3355
- vector: Math.round(vectorMs),
3356
- rerank: 0,
3357
- total: Math.round(hrTimeMs(totalStart))
3358
- },
3359
- usedRerank: false,
3360
- modelId: this.config.embeddings.model
3361
- }
3362
- }
3363
- };
3364
- const rerankStart = process.hrtime.bigint();
3365
- const reranked = await this.rerankHits(input.q, ranked, topK);
3366
- const rerankMs = hrTimeMs(rerankStart);
3367
- const rerankedResults = this.buildResults(reranked, topK, groupByPage);
3368
- yield {
3369
- phase: "reranked",
3370
- data: {
3371
- q: input.q,
3372
- scope: resolvedScope.scopeName,
3373
- results: rerankedResults,
3374
- meta: {
3375
- timingsMs: {
3376
- embed: Math.round(embedMs),
3377
- vector: Math.round(vectorMs),
3378
- rerank: Math.round(rerankMs),
3379
- total: Math.round(hrTimeMs(totalStart))
3380
- },
3381
- usedRerank: true,
3382
- modelId: this.config.embeddings.model
3383
2833
  }
3384
2834
  }
3385
2835
  };
3386
2836
  }
3387
- buildResults(ordered, topK, groupByPage) {
3388
- const minScore = this.config.ranking.minScore;
2837
+ ensureSnippet(hit) {
2838
+ const snippet = hit.hit.metadata.snippet;
2839
+ if (snippet && snippet.length >= 30) return snippet;
2840
+ const chunkText = hit.hit.metadata.chunkText;
2841
+ if (chunkText) return toSnippet(chunkText);
2842
+ return snippet || "";
2843
+ }
2844
+ buildResults(ordered, topK, groupByPage, _query) {
3389
2845
  if (groupByPage) {
3390
2846
  let pages = aggregateByPage(ordered, this.config);
3391
- if (minScore > 0) {
3392
- pages = pages.filter((p) => p.pageScore >= minScore);
3393
- }
2847
+ pages = trimByScoreGap(pages, this.config);
3394
2848
  const minRatio = this.config.ranking.minChunkScoreRatio;
3395
2849
  return pages.slice(0, topK).map((page) => {
3396
2850
  const bestScore = page.bestChunk.finalScore;
@@ -3400,12 +2854,12 @@ var SearchEngine = class _SearchEngine {
3400
2854
  url: page.url,
3401
2855
  title: page.title,
3402
2856
  sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
3403
- snippet: page.bestChunk.hit.metadata.snippet,
2857
+ snippet: this.ensureSnippet(page.bestChunk),
3404
2858
  score: Number(page.pageScore.toFixed(6)),
3405
2859
  routeFile: page.routeFile,
3406
2860
  chunks: meaningful.length > 1 ? meaningful.map((c) => ({
3407
2861
  sectionTitle: c.hit.metadata.sectionTitle || void 0,
3408
- snippet: c.hit.metadata.snippet,
2862
+ snippet: this.ensureSnippet(c),
3409
2863
  headingPath: c.hit.metadata.headingPath,
3410
2864
  score: Number(c.finalScore.toFixed(6))
3411
2865
  })) : void 0
@@ -3413,6 +2867,7 @@ var SearchEngine = class _SearchEngine {
3413
2867
  });
3414
2868
  } else {
3415
2869
  let filtered = ordered;
2870
+ const minScore = this.config.ranking.minScore;
3416
2871
  if (minScore > 0) {
3417
2872
  filtered = ordered.filter((entry) => entry.finalScore >= minScore);
3418
2873
  }
@@ -3420,7 +2875,7 @@ var SearchEngine = class _SearchEngine {
3420
2875
  url: hit.metadata.url,
3421
2876
  title: hit.metadata.title,
3422
2877
  sectionTitle: hit.metadata.sectionTitle || void 0,
3423
- snippet: hit.metadata.snippet,
2878
+ snippet: this.ensureSnippet({ hit, finalScore }),
3424
2879
  score: Number(finalScore.toFixed(6)),
3425
2880
  routeFile: hit.metadata.routeFile
3426
2881
  }));
@@ -3429,7 +2884,7 @@ var SearchEngine = class _SearchEngine {
3429
2884
  async getPage(pathOrUrl, scope) {
3430
2885
  const resolvedScope = resolveScope(this.config, scope);
3431
2886
  const urlPath = this.resolveInputPath(pathOrUrl);
3432
- const page = await this.vectorStore.getPage(urlPath, resolvedScope);
2887
+ const page = await this.store.getPage(urlPath, resolvedScope);
3433
2888
  if (!page) {
3434
2889
  throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
3435
2890
  }
@@ -3450,7 +2905,7 @@ var SearchEngine = class _SearchEngine {
3450
2905
  };
3451
2906
  }
3452
2907
  async health() {
3453
- return this.vectorStore.health();
2908
+ return this.store.health();
3454
2909
  }
3455
2910
  resolveInputPath(pathOrUrl) {
3456
2911
  try {
@@ -3462,94 +2917,10 @@ var SearchEngine = class _SearchEngine {
3462
2917
  const withoutQueryOrHash = pathOrUrl.split(/[?#]/)[0] ?? pathOrUrl;
3463
2918
  return normalizeUrlPath(withoutQueryOrHash);
3464
2919
  }
3465
- async assertModelCompatibility(scope) {
3466
- const modelId = await this.vectorStore.getScopeModelId(scope);
3467
- if (modelId && modelId !== this.config.embeddings.model) {
3468
- throw new SearchSocketError(
3469
- "EMBEDDING_MODEL_MISMATCH",
3470
- `Scope ${scope.scopeName} was indexed with ${modelId}. Current config uses ${this.config.embeddings.model}. Re-index with --force.`
3471
- );
3472
- }
3473
- }
3474
- async rerankHits(query, ranked, topK) {
3475
- if (!this.config.rerank.enabled) {
3476
- throw new SearchSocketError(
3477
- "INVALID_REQUEST",
3478
- "rerank=true requested but rerank.enabled is not set to true.",
3479
- 400
3480
- );
3481
- }
3482
- if (!this.reranker) {
3483
- throw new SearchSocketError(
3484
- "CONFIG_MISSING",
3485
- `rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
3486
- 400
3487
- );
3488
- }
3489
- const pageGroups = /* @__PURE__ */ new Map();
3490
- for (const entry of ranked) {
3491
- const url = entry.hit.metadata.url;
3492
- const group = pageGroups.get(url);
3493
- if (group) group.push(entry);
3494
- else pageGroups.set(url, [entry]);
3495
- }
3496
- const MAX_CHUNKS_PER_PAGE = 5;
3497
- const MIN_CHUNKS_PER_PAGE = 1;
3498
- const MIN_CHUNK_SCORE_RATIO = 0.5;
3499
- const MAX_DOC_CHARS = 2e3;
3500
- const pageCandidates = [];
3501
- for (const [url, chunks] of pageGroups) {
3502
- const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
3503
- const bestScore = byScore[0].finalScore;
3504
- const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
3505
- const selected = byScore.filter(
3506
- (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
3507
- ).slice(0, MAX_CHUNKS_PER_PAGE);
3508
- selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
3509
- const first = selected[0].hit.metadata;
3510
- const parts = [first.title];
3511
- if (first.description) {
3512
- parts.push(first.description);
3513
- }
3514
- if (first.keywords && first.keywords.length > 0) {
3515
- parts.push(first.keywords.join(", "));
3516
- }
3517
- const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
3518
- parts.push(body);
3519
- let text = parts.join("\n\n");
3520
- if (text.length > MAX_DOC_CHARS) {
3521
- text = text.slice(0, MAX_DOC_CHARS);
3522
- }
3523
- pageCandidates.push({ id: url, text });
3524
- }
3525
- const maxCandidates = Math.max(topK, this.config.rerank.topN);
3526
- const cappedCandidates = pageCandidates.slice(0, maxCandidates);
3527
- const reranked = await this.reranker.rerank(
3528
- query,
3529
- cappedCandidates,
3530
- maxCandidates
3531
- );
3532
- const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
3533
- return ranked.map((entry) => {
3534
- const pageScore = scoreByUrl.get(entry.hit.metadata.url);
3535
- const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
3536
- if (pageScore === void 0 || !Number.isFinite(pageScore)) {
3537
- return { ...entry, finalScore: base };
3538
- }
3539
- const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
3540
- return {
3541
- ...entry,
3542
- finalScore: Number.isFinite(combined) ? combined : base
3543
- };
3544
- }).sort((a, b) => {
3545
- const delta = b.finalScore - a.finalScore;
3546
- return Number.isNaN(delta) ? 0 : delta;
3547
- });
3548
- }
3549
2920
  };
3550
2921
 
3551
2922
  // src/mcp/server.ts
3552
- function createServer(engine, config) {
2923
+ function createServer(engine) {
3553
2924
  const server = new McpServer({
3554
2925
  name: "searchsocket-mcp",
3555
2926
  version: "0.1.0"
@@ -3557,15 +2928,14 @@ function createServer(engine, config) {
3557
2928
  server.registerTool(
3558
2929
  "search",
3559
2930
  {
3560
- description: "Semantic site search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and rerank. Enable rerank for better relevance on natural-language queries.",
2931
+ description: "Semantic site search powered by Upstash Search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and groupBy.",
3561
2932
  inputSchema: {
3562
2933
  query: z3.string().min(1),
3563
2934
  scope: z3.string().optional(),
3564
2935
  topK: z3.number().int().positive().max(100).optional(),
3565
2936
  pathPrefix: z3.string().optional(),
3566
2937
  tags: z3.array(z3.string()).optional(),
3567
- groupBy: z3.enum(["page", "chunk"]).optional(),
3568
- rerank: z3.boolean().optional().describe("Enable reranking for better relevance (uses Jina Reranker). Defaults to true when rerank is enabled in config.")
2938
+ groupBy: z3.enum(["page", "chunk"]).optional()
3569
2939
  }
3570
2940
  },
3571
2941
  async (input) => {
@@ -3575,8 +2945,7 @@ function createServer(engine, config) {
3575
2945
  scope: input.scope,
3576
2946
  pathPrefix: input.pathPrefix,
3577
2947
  tags: input.tags,
3578
- groupBy: input.groupBy,
3579
- rerank: input.rerank ?? config.rerank.enabled
2948
+ groupBy: input.groupBy
3580
2949
  });
3581
2950
  return {
3582
2951
  content: [
@@ -3704,10 +3073,10 @@ async function runMcpServer(options = {}) {
3704
3073
  config
3705
3074
  });
3706
3075
  if (resolvedTransport === "http") {
3707
- await startHttpServer(() => createServer(engine, config), config, options);
3076
+ await startHttpServer(() => createServer(engine), config, options);
3708
3077
  return;
3709
3078
  }
3710
- const server = createServer(engine, config);
3079
+ const server = createServer(engine);
3711
3080
  const stdioTransport = new StdioServerTransport();
3712
3081
  await server.connect(stdioTransport);
3713
3082
  }
@@ -3746,9 +3115,6 @@ function parseDurationMs(value) {
3746
3115
  throw new SearchSocketError("INVALID_REQUEST", `Unsupported duration unit: ${unit}`, 400);
3747
3116
  }
3748
3117
  }
3749
- function formatUsd(value) {
3750
- return `$${value.toFixed(6)}`;
3751
- }
3752
3118
  function printIndexSummary(stats) {
3753
3119
  process.stdout.write(`pages processed: ${stats.pagesProcessed}
3754
3120
  `);
@@ -3756,13 +3122,9 @@ function printIndexSummary(stats) {
3756
3122
  `);
3757
3123
  process.stdout.write(`chunks changed: ${stats.chunksChanged}
3758
3124
  `);
3759
- process.stdout.write(`embeddings created: ${stats.newEmbeddings}
3125
+ process.stdout.write(`documents upserted: ${stats.documentsUpserted}
3760
3126
  `);
3761
3127
  process.stdout.write(`deletes: ${stats.deletes}
3762
- `);
3763
- process.stdout.write(`estimated tokens: ${stats.estimatedTokens}
3764
- `);
3765
- process.stdout.write(`estimated cost (USD): ${formatUsd(stats.estimatedCostUSD)}
3766
3128
  `);
3767
3129
  process.stdout.write(`route mapping: ${stats.routeExact} exact, ${stats.routeBestEffort} best-effort
3768
3130
  `);
@@ -3776,7 +3138,7 @@ function collectWatchPaths(config, cwd) {
3776
3138
  const paths = ["src/routes/**"];
3777
3139
  if (config.source.mode === "content-files" && config.source.contentFiles) {
3778
3140
  for (const pattern of config.source.contentFiles.globs) {
3779
- paths.push(path14.join(config.source.contentFiles.baseDir, pattern));
3141
+ paths.push(path12.join(config.source.contentFiles.baseDir, pattern));
3780
3142
  }
3781
3143
  }
3782
3144
  if (config.source.mode === "static-output") {
@@ -3789,25 +3151,22 @@ function collectWatchPaths(config, cwd) {
3789
3151
  paths.push("searchsocket.config.ts");
3790
3152
  paths.push(config.source.build.outputDir);
3791
3153
  }
3792
- return paths.map((value) => path14.resolve(cwd, value));
3154
+ return paths.map((value) => path12.resolve(cwd, value));
3793
3155
  }
3794
3156
  function ensureStateDir(cwd) {
3795
- const target = path14.join(cwd, ".searchsocket");
3796
- fs10.mkdirSync(target, { recursive: true });
3157
+ const target = path12.join(cwd, ".searchsocket");
3158
+ fs8.mkdirSync(target, { recursive: true });
3797
3159
  return target;
3798
3160
  }
3799
3161
  function ensureGitignore(cwd) {
3800
- const gitignorePath = path14.join(cwd, ".gitignore");
3162
+ const gitignorePath = path12.join(cwd, ".gitignore");
3801
3163
  const entries = [
3802
- ".searchsocket/vectors.db",
3803
- ".searchsocket/vectors.db-shm",
3804
- ".searchsocket/vectors.db-wal",
3805
3164
  ".searchsocket/manifest.json",
3806
3165
  ".searchsocket/registry.json"
3807
3166
  ];
3808
3167
  let content = "";
3809
- if (fs10.existsSync(gitignorePath)) {
3810
- content = fs10.readFileSync(gitignorePath, "utf8");
3168
+ if (fs8.existsSync(gitignorePath)) {
3169
+ content = fs8.readFileSync(gitignorePath, "utf8");
3811
3170
  }
3812
3171
  const lines = content.split("\n");
3813
3172
  const missing = entries.filter((entry) => !lines.some((line) => line.trim() === entry));
@@ -3818,10 +3177,10 @@ function ensureGitignore(cwd) {
3818
3177
  # SearchSocket local state
3819
3178
  ${missing.join("\n")}
3820
3179
  `;
3821
- fs10.writeFileSync(gitignorePath, content.trimEnd() + block, "utf8");
3180
+ fs8.writeFileSync(gitignorePath, content.trimEnd() + block, "utf8");
3822
3181
  }
3823
3182
  function readScopesFromFile(filePath) {
3824
- const raw = fs10.readFileSync(filePath, "utf8");
3183
+ const raw = fs8.readFileSync(filePath, "utf8");
3825
3184
  return new Set(
3826
3185
  raw.split(/\r?\n/).map((line) => line.trim()).filter(Boolean)
3827
3186
  );
@@ -3845,8 +3204,8 @@ function readRemoteGitBranches(cwd) {
3845
3204
  }
3846
3205
  }
3847
3206
  async function loadResolvedConfigForDev(cwd, configPath) {
3848
- const resolvedConfigPath = path14.resolve(cwd, configPath ?? "searchsocket.config.ts");
3849
- if (fs10.existsSync(resolvedConfigPath)) {
3207
+ const resolvedConfigPath = path12.resolve(cwd, configPath ?? "searchsocket.config.ts");
3208
+ if (fs8.existsSync(resolvedConfigPath)) {
3850
3209
  return loadConfig({ cwd, configPath });
3851
3210
  }
3852
3211
  return mergeConfig(cwd, {});
@@ -3893,7 +3252,7 @@ var program = new Command();
3893
3252
  program.name("searchsocket").description("Semantic site search and MCP retrieval for SvelteKit").version(package_default.version).option("-C, --cwd <path>", "working directory", process.cwd()).option("--config <path>", "config path (defaults to searchsocket.config.ts)");
3894
3253
  program.command("init").description("Create searchsocket.config.ts and .searchsocket state directory").action(async (_opts, command) => {
3895
3254
  const root = getRootOptions(command).cwd ?? process.cwd();
3896
- const cwd = path14.resolve(root);
3255
+ const cwd = path12.resolve(root);
3897
3256
  const configPath = writeMinimalConfig(cwd);
3898
3257
  const stateDir = ensureStateDir(cwd);
3899
3258
  ensureGitignore(cwd);
@@ -3911,9 +3270,9 @@ program.command("init").description("Create searchsocket.config.ts and .searchso
3911
3270
  process.stdout.write("// searchsocketVitePlugin({ enabled: true, changedOnly: true })\n");
3912
3271
  process.stdout.write("// or env-driven: SEARCHSOCKET_AUTO_INDEX=1 pnpm build\n");
3913
3272
  });
3914
- program.command("index").description("Index site content into markdown mirror + vector store").option("--scope <name>", "scope override").option("--changed-only", "only process changed chunks", true).option("--no-changed-only", "re-index regardless of previous manifest").option("--force", "force full mirror rebuild and re-upsert", false).option("--dry-run", "compute plan and cost, no API writes", false).option("--source <mode>", "source mode override: static-output|crawl|content-files|build").option("--max-pages <n>", "limit pages processed").option("--max-chunks <n>", "limit chunks processed").option("--quiet", "suppress all output except errors and warnings", false).option("--verbose", "verbose output", false).option("--json", "emit JSON logs and summary", false).action(async (opts, command) => {
3273
+ program.command("index").description("Index site content into Upstash Search").option("--scope <name>", "scope override").option("--changed-only", "only process changed chunks", true).option("--no-changed-only", "re-index regardless of previous manifest").option("--force", "force full rebuild", false).option("--dry-run", "compute plan, no writes", false).option("--source <mode>", "source mode override: static-output|crawl|content-files|build").option("--max-pages <n>", "limit pages processed").option("--max-chunks <n>", "limit chunks processed").option("--quiet", "suppress all output except errors and warnings", false).option("--verbose", "verbose output", false).option("--json", "emit JSON logs and summary", false).action(async (opts, command) => {
3915
3274
  const rootOpts = getRootOptions(command);
3916
- const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3275
+ const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
3917
3276
  await runIndexCommand({
3918
3277
  cwd,
3919
3278
  configPath: rootOpts?.config,
@@ -3929,16 +3288,16 @@ program.command("index").description("Index site content into markdown mirror +
3929
3288
  json: opts.json
3930
3289
  });
3931
3290
  });
3932
- program.command("status").description("Show scope, indexing state, backend health, and recent cost estimate").option("--scope <name>", "scope override").action(async (opts, command) => {
3291
+ program.command("status").description("Show scope, indexing state, and backend health").option("--scope <name>", "scope override").action(async (opts, command) => {
3933
3292
  const rootOpts = getRootOptions(command);
3934
- const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3293
+ const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
3935
3294
  const config = await loadConfig({ cwd, configPath: rootOpts?.config });
3936
3295
  const scope = resolveScope(config, opts.scope);
3937
- let vectorStore;
3296
+ let store;
3938
3297
  let health = { ok: false, details: "not checked" };
3939
3298
  try {
3940
- vectorStore = await createVectorStore(config, cwd);
3941
- health = await vectorStore.health();
3299
+ store = await createUpstashStore(config);
3300
+ health = await store.health();
3942
3301
  } catch (error) {
3943
3302
  health = {
3944
3303
  ok: false,
@@ -3946,24 +3305,22 @@ program.command("status").description("Show scope, indexing state, backend healt
3946
3305
  };
3947
3306
  process.stdout.write(`project: ${config.project.id}
3948
3307
  `);
3949
- process.stdout.write(`vector health: error (${health.details})
3308
+ process.stdout.write(`backend health: error (${health.details})
3950
3309
  `);
3951
3310
  process.exitCode = 1;
3952
3311
  return;
3953
3312
  }
3954
3313
  let scopeRegistry = [];
3955
3314
  let scopeInfo;
3956
- let hashes = /* @__PURE__ */ new Map();
3957
3315
  try {
3958
- scopeRegistry = await vectorStore.listScopes(config.project.id);
3316
+ scopeRegistry = await store.listScopes(config.project.id);
3959
3317
  scopeInfo = scopeRegistry.find((entry) => entry.scopeName === scope.scopeName);
3960
- hashes = await vectorStore.getContentHashes(scope);
3961
3318
  } catch (error) {
3962
3319
  process.stdout.write(`project: ${config.project.id}
3963
3320
  `);
3964
3321
  process.stdout.write(`resolved scope: ${scope.scopeName}
3965
3322
  `);
3966
- process.stdout.write(`vector health: error (${error instanceof Error ? error.message : "unknown error"})
3323
+ process.stdout.write(`backend health: error (${error instanceof Error ? error.message : "unknown error"})
3967
3324
  `);
3968
3325
  process.exitCode = 1;
3969
3326
  return;
@@ -3972,25 +3329,15 @@ program.command("status").description("Show scope, indexing state, backend healt
3972
3329
  `);
3973
3330
  process.stdout.write(`resolved scope: ${scope.scopeName}
3974
3331
  `);
3975
- process.stdout.write(`embedding model: ${config.embeddings.model}
3976
- `);
3977
- const tursoUrl = process.env[config.vector.turso.urlEnv];
3978
- const vectorMode = tursoUrl ? `remote (${tursoUrl})` : `local (${config.vector.turso.localPath})`;
3979
- process.stdout.write(`vector backend: turso/libsql (${vectorMode})
3332
+ process.stdout.write(`backend: upstash-search
3980
3333
  `);
3981
- process.stdout.write(`vector health: ${health.ok ? "ok" : `error (${health.details ?? "n/a"})`}
3334
+ process.stdout.write(`backend health: ${health.ok ? "ok" : `error (${health.details ?? "n/a"})`}
3982
3335
  `);
3983
3336
  if (scopeInfo) {
3984
3337
  process.stdout.write(`last indexed (${scope.scopeName}): ${scopeInfo.lastIndexedAt ?? "never"}
3985
3338
  `);
3986
- process.stdout.write(`tracked chunks: ${hashes.size}
3987
- `);
3988
- if (scopeInfo.lastEstimateTokens != null) {
3989
- process.stdout.write(`last estimated tokens: ${scopeInfo.lastEstimateTokens}
3990
- `);
3991
- }
3992
- if (scopeInfo.lastEstimateCostUSD != null) {
3993
- process.stdout.write(`last estimated cost: ${formatUsd(scopeInfo.lastEstimateCostUSD)}
3339
+ if (scopeInfo.documentCount != null) {
3340
+ process.stdout.write(`documents: ${scopeInfo.documentCount}
3994
3341
  `);
3995
3342
  }
3996
3343
  } else {
@@ -4001,7 +3348,7 @@ program.command("status").description("Show scope, indexing state, backend healt
4001
3348
  process.stdout.write("\nregistry scopes:\n");
4002
3349
  for (const item of scopeRegistry) {
4003
3350
  process.stdout.write(
4004
- ` - ${item.scopeName} model=${item.modelId} lastIndexedAt=${item.lastIndexedAt} vectors=${item.vectorCount ?? "unknown"}
3351
+ ` - ${item.scopeName} lastIndexedAt=${item.lastIndexedAt} documents=${item.documentCount ?? "unknown"}
4005
3352
  `
4006
3353
  );
4007
3354
  }
@@ -4009,7 +3356,7 @@ program.command("status").description("Show scope, indexing state, backend healt
4009
3356
  });
4010
3357
  program.command("dev").description("Watch content files/routes and incrementally reindex on changes").option("--scope <name>", "scope override").option("--mcp", "start MCP server (http transport) alongside watcher", false).option("--mcp-port <n>", "MCP HTTP port", "3338").option("--mcp-path <path>", "MCP HTTP path", "/mcp").option("--verbose", "verbose logs", false).action(async (opts, command) => {
4011
3358
  const rootOpts = getRootOptions(command);
4012
- const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3359
+ const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
4013
3360
  const config = await loadResolvedConfigForDev(cwd, rootOpts?.config);
4014
3361
  const watchPaths = collectWatchPaths(config, cwd);
4015
3362
  process.stdout.write("starting searchsocket dev watcher...\n");
@@ -4076,45 +3423,44 @@ ${watchPaths.map((entry) => ` - ${entry}`).join("\n")}
4076
3423
  });
4077
3424
  });
4078
3425
  });
4079
- program.command("clean").description("Delete local state and optionally delete remote vectors for a scope").option("--scope <name>", "scope override").option("--remote", "delete remote scope vectors", false).action(async (opts, command) => {
3426
+ program.command("clean").description("Delete local state and optionally delete remote indexes for a scope").option("--scope <name>", "scope override").option("--remote", "delete remote scope indexes", false).action(async (opts, command) => {
4080
3427
  const rootOpts = getRootOptions(command);
4081
- const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3428
+ const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
4082
3429
  const config = await loadConfig({ cwd, configPath: rootOpts?.config });
4083
- const scope = resolveScope(config, opts.scope);
4084
- const statePath = path14.join(cwd, config.state.dir);
3430
+ const statePath = path12.join(cwd, config.state.dir);
4085
3431
  await fsp.rm(statePath, { recursive: true, force: true });
4086
3432
  process.stdout.write(`deleted local state directory: ${statePath}
4087
3433
  `);
4088
3434
  if (opts.remote) {
4089
- const vectorStore = await createVectorStore(config, cwd);
4090
- await vectorStore.dropAllTables();
4091
- process.stdout.write(`dropped all remote tables (chunks, registry, pages)
3435
+ const store = await createUpstashStore(config);
3436
+ await store.dropAllIndexes(config.project.id);
3437
+ process.stdout.write(`dropped all remote indexes for project ${config.project.id}
4092
3438
  `);
4093
3439
  }
4094
3440
  });
4095
3441
  program.command("prune").description("List/delete stale scopes (dry-run by default)").option("--apply", "apply deletions", false).option("--scopes-file <path>", "file containing active scopes").option("--older-than <duration>", "ttl cutoff like 30d").action(async (opts, command) => {
4096
3442
  const rootOpts = getRootOptions(command);
4097
- const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3443
+ const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
4098
3444
  const config = await loadConfig({ cwd, configPath: rootOpts?.config });
4099
3445
  const baseScope = resolveScope(config);
4100
- let vectorStore;
3446
+ let store;
4101
3447
  let scopes;
4102
3448
  try {
4103
- vectorStore = await createVectorStore(config, cwd);
4104
- scopes = await vectorStore.listScopes(config.project.id);
3449
+ store = await createUpstashStore(config);
3450
+ scopes = await store.listScopes(config.project.id);
4105
3451
  } catch (error) {
4106
3452
  process.stderr.write(
4107
- `error: failed to access Turso vector store: ${error instanceof Error ? error.message : String(error)}
3453
+ `error: failed to access Upstash Search: ${error instanceof Error ? error.message : String(error)}
4108
3454
  `
4109
3455
  );
4110
3456
  process.exitCode = 1;
4111
3457
  return;
4112
3458
  }
4113
- process.stdout.write(`using remote registry
3459
+ process.stdout.write(`using Upstash Search
4114
3460
  `);
4115
3461
  let keepScopes = /* @__PURE__ */ new Set();
4116
3462
  if (opts.scopesFile) {
4117
- keepScopes = readScopesFromFile(path14.resolve(cwd, opts.scopesFile));
3463
+ keepScopes = readScopesFromFile(path12.resolve(cwd, opts.scopesFile));
4118
3464
  } else {
4119
3465
  keepScopes = readRemoteGitBranches(cwd);
4120
3466
  }
@@ -4132,7 +3478,7 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
4132
3478
  staleByList = !keepScopes.has(entry.scopeName);
4133
3479
  }
4134
3480
  let staleByTtl = false;
4135
- if (olderThanMs) {
3481
+ if (olderThanMs && entry.lastIndexedAt !== "unknown") {
4136
3482
  staleByTtl = now - Date.parse(entry.lastIndexedAt) > olderThanMs;
4137
3483
  }
4138
3484
  if (keepScopes.size > 0 && olderThanMs) {
@@ -4168,7 +3514,7 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
4168
3514
  scopeId: `${config.project.id}:${entry.scopeName}`
4169
3515
  };
4170
3516
  try {
4171
- await vectorStore.deleteScope(scope);
3517
+ await store.deleteScope(scope);
4172
3518
  deleted += 1;
4173
3519
  } catch (error) {
4174
3520
  process.stdout.write(
@@ -4185,7 +3531,7 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
4185
3531
  });
4186
3532
  program.command("doctor").description("Validate config, env vars, provider connectivity, and local write access").action(async (_opts, command) => {
4187
3533
  const rootOpts = getRootOptions(command);
4188
- const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3534
+ const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
4189
3535
  const checks = [];
4190
3536
  let config = null;
4191
3537
  try {
@@ -4199,23 +3545,21 @@ program.command("doctor").description("Validate config, env vars, provider conne
4199
3545
  });
4200
3546
  }
4201
3547
  if (config) {
4202
- const embKey = process.env[config.embeddings.apiKeyEnv];
3548
+ const upstashUrl = config.upstash.url ?? process.env[config.upstash.urlEnv];
3549
+ const upstashToken = config.upstash.token ?? process.env[config.upstash.tokenEnv];
4203
3550
  checks.push({
4204
- name: `env ${config.embeddings.apiKeyEnv}`,
4205
- ok: Boolean(embKey),
4206
- details: embKey ? void 0 : "missing"
3551
+ name: `env ${config.upstash.urlEnv}`,
3552
+ ok: Boolean(upstashUrl),
3553
+ details: upstashUrl ? void 0 : "missing"
3554
+ });
3555
+ checks.push({
3556
+ name: `env ${config.upstash.tokenEnv}`,
3557
+ ok: Boolean(upstashToken),
3558
+ details: upstashToken ? void 0 : "missing"
4207
3559
  });
4208
- {
4209
- const tursoUrl = process.env[config.vector.turso.urlEnv];
4210
- checks.push({
4211
- name: "turso/libsql",
4212
- ok: true,
4213
- details: tursoUrl ? `remote: ${tursoUrl}` : `local file: ${config.vector.turso.localPath}`
4214
- });
4215
- }
4216
3560
  if (config.source.mode === "static-output") {
4217
- const outputDir = path14.resolve(cwd, config.source.staticOutputDir);
4218
- const exists = fs10.existsSync(outputDir);
3561
+ const outputDir = path12.resolve(cwd, config.source.staticOutputDir);
3562
+ const exists = fs8.existsSync(outputDir);
4219
3563
  checks.push({
4220
3564
  name: "source: static output dir",
4221
3565
  ok: exists,
@@ -4224,15 +3568,15 @@ program.command("doctor").description("Validate config, env vars, provider conne
4224
3568
  } else if (config.source.mode === "build") {
4225
3569
  const buildConfig = config.source.build;
4226
3570
  if (buildConfig) {
4227
- const manifestPath = path14.resolve(cwd, buildConfig.outputDir, "server", "manifest-full.js");
4228
- const manifestExists = fs10.existsSync(manifestPath);
3571
+ const manifestPath = path12.resolve(cwd, buildConfig.outputDir, "server", "manifest-full.js");
3572
+ const manifestExists = fs8.existsSync(manifestPath);
4229
3573
  checks.push({
4230
3574
  name: "source: build manifest",
4231
3575
  ok: manifestExists,
4232
3576
  details: manifestExists ? manifestPath : `${manifestPath} not found (run \`vite build\` first)`
4233
3577
  });
4234
- const viteBin = path14.resolve(cwd, "node_modules", ".bin", "vite");
4235
- const viteExists = fs10.existsSync(viteBin);
3578
+ const viteBin = path12.resolve(cwd, "node_modules", ".bin", "vite");
3579
+ const viteExists = fs8.existsSync(viteBin);
4236
3580
  checks.push({
4237
3581
  name: "source: vite binary",
4238
3582
  ok: viteExists,
@@ -4249,7 +3593,7 @@ program.command("doctor").description("Validate config, env vars, provider conne
4249
3593
  const contentConfig = config.source.contentFiles;
4250
3594
  if (contentConfig) {
4251
3595
  const fg4 = await import("fast-glob");
4252
- const baseDir = path14.resolve(cwd, contentConfig.baseDir);
3596
+ const baseDir = path12.resolve(cwd, contentConfig.baseDir);
4253
3597
  const files = await fg4.default(contentConfig.globs, { cwd: baseDir, onlyFiles: true });
4254
3598
  checks.push({
4255
3599
  name: "source: content files",
@@ -4264,61 +3608,26 @@ program.command("doctor").description("Validate config, env vars, provider conne
4264
3608
  });
4265
3609
  }
4266
3610
  }
4267
- try {
4268
- const provider = createEmbeddingsProvider(config);
4269
- await provider.embedTexts(["searchsocket doctor ping"], config.embeddings.model);
4270
- checks.push({ name: "embedding provider connectivity", ok: true });
4271
- } catch (error) {
4272
- checks.push({
4273
- name: "embedding provider connectivity",
4274
- ok: false,
4275
- details: error instanceof Error ? error.message : "unknown error"
4276
- });
4277
- }
4278
3611
  let store = null;
4279
3612
  try {
4280
- store = await createVectorStore(config, cwd);
3613
+ store = await createUpstashStore(config);
4281
3614
  const health = await store.health();
4282
3615
  checks.push({
4283
- name: "vector backend connectivity",
3616
+ name: "upstash search connectivity",
4284
3617
  ok: health.ok,
4285
3618
  details: health.details
4286
3619
  });
4287
3620
  } catch (error) {
4288
3621
  checks.push({
4289
- name: "vector backend connectivity",
3622
+ name: "upstash search connectivity",
4290
3623
  ok: false,
4291
3624
  details: error instanceof Error ? error.message : "unknown error"
4292
3625
  });
4293
3626
  }
4294
- if (store) {
4295
- try {
4296
- const testScope = {
4297
- projectId: config.project.id,
4298
- scopeName: "_searchsocket_doctor_probe",
4299
- scopeId: `${config.project.id}:_searchsocket_doctor_probe`
4300
- };
4301
- await store.recordScope({
4302
- projectId: testScope.projectId,
4303
- scopeName: testScope.scopeName,
4304
- modelId: config.embeddings.model,
4305
- lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
4306
- vectorCount: 0
4307
- });
4308
- await store.deleteScope(testScope);
4309
- checks.push({ name: "vector backend write permission", ok: true });
4310
- } catch (error) {
4311
- checks.push({
4312
- name: "vector backend write permission",
4313
- ok: false,
4314
- details: error instanceof Error ? error.message : "write test failed"
4315
- });
4316
- }
4317
- }
4318
3627
  try {
4319
3628
  const scope = resolveScope(config);
4320
3629
  const { statePath } = ensureStateDirs(cwd, config.state.dir, scope);
4321
- const testPath = path14.join(statePath, ".write-test");
3630
+ const testPath = path12.join(statePath, ".write-test");
4322
3631
  await fsp.writeFile(testPath, "ok\n", "utf8");
4323
3632
  await fsp.rm(testPath, { force: true });
4324
3633
  checks.push({ name: "state directory writable", ok: true });
@@ -4347,7 +3656,7 @@ program.command("doctor").description("Validate config, env vars, provider conne
4347
3656
  });
4348
3657
  program.command("mcp").description("Run SearchSocket MCP server").option("--transport <transport>", "stdio|http", "stdio").option("--port <n>", "HTTP port", "3338").option("--path <path>", "HTTP path", "/mcp").action(async (opts, command) => {
4349
3658
  const rootOpts = getRootOptions(command);
4350
- const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3659
+ const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
4351
3660
  await runMcpServer({
4352
3661
  cwd,
4353
3662
  configPath: rootOpts?.config,
@@ -4356,9 +3665,9 @@ program.command("mcp").description("Run SearchSocket MCP server").option("--tran
4356
3665
  httpPath: opts.path
4357
3666
  });
4358
3667
  });
4359
- program.command("search").description("Quick local CLI search against indexed vectors").requiredOption("--q <query>", "search query").option("--scope <name>", "scope override").option("--top-k <n>", "top K results", "10").option("--path-prefix <prefix>", "path prefix filter").option("--rerank", "enable configured reranker", false).action(async (opts, command) => {
3668
+ program.command("search").description("Quick CLI search against Upstash Search").requiredOption("--q <query>", "search query").option("--scope <name>", "scope override").option("--top-k <n>", "top K results", "10").option("--path-prefix <prefix>", "path prefix filter").action(async (opts, command) => {
4360
3669
  const rootOpts = getRootOptions(command);
4361
- const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3670
+ const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
4362
3671
  const engine = await SearchEngine.create({
4363
3672
  cwd,
4364
3673
  configPath: rootOpts?.config
@@ -4367,14 +3676,13 @@ program.command("search").description("Quick local CLI search against indexed ve
4367
3676
  q: opts.q,
4368
3677
  scope: opts.scope,
4369
3678
  topK: parsePositiveInt(opts.topK, "--top-k"),
4370
- pathPrefix: opts.pathPrefix,
4371
- rerank: opts.rerank
3679
+ pathPrefix: opts.pathPrefix
4372
3680
  });
4373
3681
  process.stdout.write(`${JSON.stringify(result, null, 2)}
4374
3682
  `);
4375
3683
  });
4376
3684
  async function main() {
4377
- dotenvConfig({ path: path14.resolve(process.cwd(), ".env") });
3685
+ dotenvConfig({ path: path12.resolve(process.cwd(), ".env") });
4378
3686
  await program.parseAsync(process.argv);
4379
3687
  }
4380
3688
  main().catch((error) => {