searchsocket 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -12,7 +12,7 @@ import { Command } from "commander";
12
12
  // package.json
13
13
  var package_default = {
14
14
  name: "searchsocket",
15
- version: "0.2.1",
15
+ version: "0.3.0",
16
16
  description: "Semantic site search and MCP retrieval for SvelteKit static sites",
17
17
  license: "MIT",
18
18
  author: "Greg Priday <greg@siteorigin.com>",
@@ -82,7 +82,6 @@ var package_default = {
82
82
  "fast-glob": "^3.3.3",
83
83
  "gray-matter": "^4.0.3",
84
84
  jiti: "^2.6.1",
85
- openai: "^6.19.0",
86
85
  "p-limit": "^7.3.0",
87
86
  turndown: "^7.2.2",
88
87
  "turndown-plugin-gfm": "^1.0.2",
@@ -133,7 +132,11 @@ var searchSocketConfigSchema = z.object({
133
132
  outputDir: z.string().min(1).optional(),
134
133
  paramValues: z.record(z.string(), z.array(z.string())).optional(),
135
134
  exclude: z.array(z.string()).optional(),
136
- previewTimeout: z.number().int().positive().optional()
135
+ previewTimeout: z.number().int().positive().optional(),
136
+ discover: z.boolean().optional(),
137
+ seedUrls: z.array(z.string()).optional(),
138
+ maxPages: z.number().int().positive().optional(),
139
+ maxDepth: z.number().int().nonnegative().optional()
137
140
  }).optional()
138
141
  }).optional(),
139
142
  extract: z.object({
@@ -160,8 +163,9 @@ var searchSocketConfigSchema = z.object({
160
163
  pageSummaryChunk: z.boolean().optional()
161
164
  }).optional(),
162
165
  embeddings: z.object({
163
- provider: z.literal("openai").optional(),
166
+ provider: z.literal("jina").optional(),
164
167
  model: z.string().min(1).optional(),
168
+ apiKey: z.string().min(1).optional(),
165
169
  apiKeyEnv: z.string().min(1).optional(),
166
170
  batchSize: z.number().int().positive().optional(),
167
171
  concurrency: z.number().int().positive().optional(),
@@ -170,18 +174,17 @@ var searchSocketConfigSchema = z.object({
170
174
  vector: z.object({
171
175
  dimension: z.number().int().positive().optional(),
172
176
  turso: z.object({
177
+ url: z.string().url().optional(),
178
+ authToken: z.string().min(1).optional(),
173
179
  urlEnv: z.string().optional(),
174
180
  authTokenEnv: z.string().optional(),
175
181
  localPath: z.string().optional()
176
182
  }).optional()
177
183
  }).optional(),
178
184
  rerank: z.object({
179
- provider: z.enum(["none", "jina"]).optional(),
185
+ enabled: z.boolean().optional(),
180
186
  topN: z.number().int().positive().optional(),
181
- jina: z.object({
182
- apiKeyEnv: z.string().optional(),
183
- model: z.string().optional()
184
- }).optional()
187
+ model: z.string().optional()
185
188
  }).optional(),
186
189
  ranking: z.object({
187
190
  enableIncomingLinkBoost: z.boolean().optional(),
@@ -190,6 +193,7 @@ var searchSocketConfigSchema = z.object({
190
193
  aggregationCap: z.number().int().positive().optional(),
191
194
  aggregationDecay: z.number().min(0).max(1).optional(),
192
195
  minChunkScoreRatio: z.number().min(0).max(1).optional(),
196
+ minScore: z.number().min(0).max(1).optional(),
193
197
  weights: z.object({
194
198
  incomingLinks: z.number().optional(),
195
199
  depth: z.number().optional(),
@@ -270,9 +274,9 @@ function createDefaultConfig(projectId) {
270
274
  pageSummaryChunk: true
271
275
  },
272
276
  embeddings: {
273
- provider: "openai",
274
- model: "text-embedding-3-small",
275
- apiKeyEnv: "OPENAI_API_KEY",
277
+ provider: "jina",
278
+ model: "jina-embeddings-v3",
279
+ apiKeyEnv: "JINA_API_KEY",
276
280
  batchSize: 64,
277
281
  concurrency: 4
278
282
  },
@@ -284,12 +288,9 @@ function createDefaultConfig(projectId) {
284
288
  }
285
289
  },
286
290
  rerank: {
287
- provider: "none",
291
+ enabled: false,
288
292
  topN: 20,
289
- jina: {
290
- apiKeyEnv: "JINA_API_KEY",
291
- model: "jina-reranker-v2-base-multilingual"
292
- }
293
+ model: "jina-reranker-v2-base-multilingual"
293
294
  },
294
295
  ranking: {
295
296
  enableIncomingLinkBoost: true,
@@ -298,6 +299,7 @@ function createDefaultConfig(projectId) {
298
299
  aggregationCap: 5,
299
300
  aggregationDecay: 0.5,
300
301
  minChunkScoreRatio: 0.5,
302
+ minScore: 0,
301
303
  weights: {
302
304
  incomingLinks: 0.05,
303
305
  depth: 0.03,
@@ -408,7 +410,11 @@ ${issues}`
408
410
  outputDir: parsed.source.build.outputDir ?? ".svelte-kit/output",
409
411
  paramValues: parsed.source.build.paramValues ?? {},
410
412
  exclude: parsed.source.build.exclude ?? [],
411
- previewTimeout: parsed.source.build.previewTimeout ?? 3e4
413
+ previewTimeout: parsed.source.build.previewTimeout ?? 3e4,
414
+ discover: parsed.source.build.discover ?? false,
415
+ seedUrls: parsed.source.build.seedUrls ?? ["/"],
416
+ maxPages: parsed.source.build.maxPages ?? 200,
417
+ maxDepth: parsed.source.build.maxDepth ?? 10
412
418
  } : void 0
413
419
  },
414
420
  extract: {
@@ -437,11 +443,7 @@ ${issues}`
437
443
  },
438
444
  rerank: {
439
445
  ...defaults.rerank,
440
- ...parsed.rerank,
441
- jina: {
442
- ...defaults.rerank.jina,
443
- ...parsed.rerank?.jina
444
- }
446
+ ...parsed.rerank
445
447
  },
446
448
  ranking: {
447
449
  ...defaults.ranking,
@@ -488,7 +490,11 @@ ${issues}`
488
490
  outputDir: ".svelte-kit/output",
489
491
  paramValues: {},
490
492
  exclude: [],
491
- previewTimeout: 3e4
493
+ previewTimeout: 3e4,
494
+ discover: false,
495
+ seedUrls: ["/"],
496
+ maxPages: 200,
497
+ maxDepth: 10
492
498
  };
493
499
  }
494
500
  if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
@@ -529,7 +535,7 @@ function writeMinimalConfig(cwd) {
529
535
  return target;
530
536
  }
531
537
  const content = `export default {
532
- embeddings: { apiKeyEnv: "OPENAI_API_KEY" }
538
+ embeddings: { apiKeyEnv: "JINA_API_KEY" }
533
539
  };
534
540
  `;
535
541
  fs.writeFileSync(target, content, "utf8");
@@ -540,14 +546,16 @@ function writeMinimalConfig(cwd) {
540
546
  var Logger = class {
541
547
  json;
542
548
  verbose;
549
+ quiet;
543
550
  stderrOnly;
544
551
  constructor(opts = {}) {
545
552
  this.json = opts.json ?? false;
546
553
  this.verbose = opts.verbose ?? false;
554
+ this.quiet = opts.quiet ?? false;
547
555
  this.stderrOnly = opts.stderrOnly ?? false;
548
556
  }
549
557
  info(message) {
550
- if (this.json) {
558
+ if (this.quiet || this.json) {
551
559
  return;
552
560
  }
553
561
  this.writeOut(`${message}
@@ -561,7 +569,7 @@ var Logger = class {
561
569
  this.logJson("debug", { message });
562
570
  return;
563
571
  }
564
- this.writeOut(`${message}
572
+ this.writeOut(` ${message}
565
573
  `);
566
574
  }
567
575
  warn(message) {
@@ -588,7 +596,7 @@ var Logger = class {
588
596
  this.logJson(event, data);
589
597
  return;
590
598
  }
591
- this.writeOut(`[${event}] ${data ? JSON.stringify(data) : ""}
599
+ this.writeOut(` [${event}] ${data ? JSON.stringify(data) : ""}
592
600
  `);
593
601
  }
594
602
  writeOut(text) {
@@ -695,18 +703,18 @@ function ensureStateDirs(cwd, stateDir, scope) {
695
703
  return { statePath, pagesPath };
696
704
  }
697
705
 
698
- // src/embeddings/openai.ts
699
- import OpenAI from "openai";
706
+ // src/embeddings/jina.ts
700
707
  import pLimit from "p-limit";
701
708
  function sleep(ms) {
702
709
  return new Promise((resolve) => {
703
710
  setTimeout(resolve, ms);
704
711
  });
705
712
  }
706
- var OpenAIEmbeddingsProvider = class {
707
- client;
713
+ var JinaEmbeddingsProvider = class {
714
+ apiKey;
708
715
  batchSize;
709
716
  concurrency;
717
+ defaultTask;
710
718
  constructor(options) {
711
719
  if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
712
720
  throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
@@ -714,11 +722,10 @@ var OpenAIEmbeddingsProvider = class {
714
722
  if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
715
723
  throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
716
724
  }
717
- this.client = new OpenAI({
718
- apiKey: options.apiKey
719
- });
725
+ this.apiKey = options.apiKey;
720
726
  this.batchSize = options.batchSize;
721
727
  this.concurrency = options.concurrency;
728
+ this.defaultTask = options.task ?? "retrieval.passage";
722
729
  }
723
730
  estimateTokens(text) {
724
731
  const normalized = text.trim();
@@ -732,7 +739,7 @@ var OpenAIEmbeddingsProvider = class {
732
739
  const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
733
740
  return Math.max(1, Math.max(charEstimate, lexicalEstimate));
734
741
  }
735
- async embedTexts(texts, modelId) {
742
+ async embedTexts(texts, modelId, task) {
736
743
  if (texts.length === 0) {
737
744
  return [];
738
745
  }
@@ -748,33 +755,52 @@ var OpenAIEmbeddingsProvider = class {
748
755
  await Promise.all(
749
756
  batches.map(
750
757
  (batch, position) => limit(async () => {
751
- outputs[position] = await this.embedWithRetry(batch.values, modelId);
758
+ outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
752
759
  })
753
760
  )
754
761
  );
755
762
  return outputs.flat();
756
763
  }
757
- async embedWithRetry(texts, modelId) {
764
+ async embedWithRetry(texts, modelId, task) {
758
765
  const maxAttempts = 5;
759
766
  let attempt = 0;
760
767
  while (attempt < maxAttempts) {
761
768
  attempt += 1;
769
+ let response;
762
770
  try {
763
- const response = await this.client.embeddings.create({
764
- model: modelId,
765
- input: texts,
766
- encoding_format: "float"
771
+ response = await fetch("https://api.jina.ai/v1/embeddings", {
772
+ method: "POST",
773
+ headers: {
774
+ "content-type": "application/json",
775
+ authorization: `Bearer ${this.apiKey}`
776
+ },
777
+ body: JSON.stringify({
778
+ model: modelId,
779
+ input: texts,
780
+ task
781
+ })
767
782
  });
768
- return response.data.map((entry) => entry.embedding);
769
783
  } catch (error) {
770
- const status = error.status;
771
- const retryable = status === 429 || typeof status === "number" && status >= 500;
772
- if (!retryable || attempt >= maxAttempts) {
784
+ if (attempt >= maxAttempts) {
773
785
  throw error;
774
786
  }
775
- const delay = Math.min(2 ** attempt * 300, 5e3);
776
- await sleep(delay);
787
+ await sleep(Math.min(2 ** attempt * 300, 5e3));
788
+ continue;
777
789
  }
790
+ if (!response.ok) {
791
+ const retryable = response.status === 429 || response.status >= 500;
792
+ if (!retryable || attempt >= maxAttempts) {
793
+ const errorBody = await response.text();
794
+ throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
795
+ }
796
+ await sleep(Math.min(2 ** attempt * 300, 5e3));
797
+ continue;
798
+ }
799
+ const payload = await response.json();
800
+ if (!payload.data || !Array.isArray(payload.data)) {
801
+ throw new Error("Invalid Jina embeddings response format");
802
+ }
803
+ return payload.data.map((entry) => entry.embedding);
778
804
  }
779
805
  throw new Error("Unreachable retry state");
780
806
  }
@@ -782,20 +808,20 @@ var OpenAIEmbeddingsProvider = class {
782
808
 
783
809
  // src/embeddings/factory.ts
784
810
  function createEmbeddingsProvider(config) {
785
- if (config.embeddings.provider !== "openai") {
811
+ if (config.embeddings.provider !== "jina") {
786
812
  throw new SearchSocketError(
787
813
  "CONFIG_MISSING",
788
814
  `Unsupported embeddings provider ${config.embeddings.provider}`
789
815
  );
790
816
  }
791
- const apiKey = process.env[config.embeddings.apiKeyEnv];
817
+ const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
792
818
  if (!apiKey) {
793
819
  throw new SearchSocketError(
794
820
  "CONFIG_MISSING",
795
- `Missing embeddings API key env var: ${config.embeddings.apiKeyEnv}`
821
+ `Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
796
822
  );
797
823
  }
798
- return new OpenAIEmbeddingsProvider({
824
+ return new JinaEmbeddingsProvider({
799
825
  apiKey,
800
826
  batchSize: config.embeddings.batchSize,
801
827
  concurrency: config.embeddings.concurrency
@@ -809,6 +835,11 @@ import path11 from "path";
809
835
  import fs3 from "fs";
810
836
  import path3 from "path";
811
837
 
838
+ // src/core/serverless.ts
839
+ function isServerless() {
840
+ return !!(process.env.VERCEL || process.env.NETLIFY || process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.FUNCTIONS_WORKER || process.env.CF_PAGES);
841
+ }
842
+
812
843
  // src/vector/turso.ts
813
844
  var TursoVectorStore = class {
814
845
  client;
@@ -853,6 +884,16 @@ var TursoVectorStore = class {
853
884
  }
854
885
  async ensureChunks(dim) {
855
886
  if (this.chunksReady) return;
887
+ const exists = await this.chunksTableExists();
888
+ if (exists) {
889
+ const currentDim = await this.getChunksDimension();
890
+ if (currentDim !== null && currentDim !== dim) {
891
+ await this.client.batch([
892
+ "DROP INDEX IF EXISTS idx",
893
+ "DROP TABLE IF EXISTS chunks"
894
+ ]);
895
+ }
896
+ }
856
897
  await this.client.batch([
857
898
  `CREATE TABLE IF NOT EXISTS chunks (
858
899
  id TEXT PRIMARY KEY,
@@ -864,6 +905,8 @@ var TursoVectorStore = class {
864
905
  section_title TEXT NOT NULL DEFAULT '',
865
906
  heading_path TEXT NOT NULL DEFAULT '[]',
866
907
  snippet TEXT NOT NULL DEFAULT '',
908
+ chunk_text TEXT NOT NULL DEFAULT '',
909
+ ordinal INTEGER NOT NULL DEFAULT 0,
867
910
  content_hash TEXT NOT NULL DEFAULT '',
868
911
  model_id TEXT NOT NULL DEFAULT '',
869
912
  depth INTEGER NOT NULL DEFAULT 0,
@@ -874,6 +917,19 @@ var TursoVectorStore = class {
874
917
  )`,
875
918
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
876
919
  ]);
920
+ const chunkMigrationCols = [
921
+ { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
922
+ { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
923
+ ];
924
+ for (const col of chunkMigrationCols) {
925
+ try {
926
+ await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
927
+ } catch (error) {
928
+ if (error instanceof Error && !error.message.includes("duplicate column")) {
929
+ throw error;
930
+ }
931
+ }
932
+ }
877
933
  this.chunksReady = true;
878
934
  }
879
935
  async ensurePages() {
@@ -908,6 +964,38 @@ var TursoVectorStore = class {
908
964
  throw error;
909
965
  }
910
966
  }
967
+ /**
968
+ * Read the current F32_BLOB dimension from the chunks table schema.
969
+ * Returns null if the table doesn't exist or the dimension can't be parsed.
970
+ */
971
+ async getChunksDimension() {
972
+ try {
973
+ const rs = await this.client.execute(
974
+ "SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
975
+ );
976
+ if (rs.rows.length === 0) return null;
977
+ const sql = rs.rows[0].sql;
978
+ const match = sql.match(/F32_BLOB\((\d+)\)/i);
979
+ return match ? parseInt(match[1], 10) : null;
980
+ } catch {
981
+ return null;
982
+ }
983
+ }
984
+ /**
985
+ * Drop all SearchSocket tables (chunks, registry, pages) and their indexes.
986
+ * Used by `clean --remote` for a full reset.
987
+ */
988
+ async dropAllTables() {
989
+ await this.client.batch([
990
+ "DROP INDEX IF EXISTS idx",
991
+ "DROP TABLE IF EXISTS chunks",
992
+ "DROP TABLE IF EXISTS registry",
993
+ "DROP TABLE IF EXISTS pages"
994
+ ]);
995
+ this.chunksReady = false;
996
+ this.registryReady = false;
997
+ this.pagesReady = false;
998
+ }
911
999
  async upsert(records, _scope) {
912
1000
  if (records.length === 0) return;
913
1001
  const dim = this.dimension ?? records[0].vector.length;
@@ -918,9 +1006,9 @@ var TursoVectorStore = class {
918
1006
  const stmts = batch.map((r) => ({
919
1007
  sql: `INSERT OR REPLACE INTO chunks
920
1008
  (id, project_id, scope_name, url, path, title, section_title,
921
- heading_path, snippet, content_hash, model_id, depth,
1009
+ heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
922
1010
  incoming_links, route_file, tags, embedding)
923
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
1011
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
924
1012
  args: [
925
1013
  r.id,
926
1014
  r.metadata.projectId,
@@ -931,6 +1019,8 @@ var TursoVectorStore = class {
931
1019
  r.metadata.sectionTitle,
932
1020
  JSON.stringify(r.metadata.headingPath),
933
1021
  r.metadata.snippet,
1022
+ r.metadata.chunkText,
1023
+ r.metadata.ordinal,
934
1024
  r.metadata.contentHash,
935
1025
  r.metadata.modelId,
936
1026
  r.metadata.depth,
@@ -949,7 +1039,8 @@ var TursoVectorStore = class {
949
1039
  const queryJson = JSON.stringify(queryVector);
950
1040
  const rs = await this.client.execute({
951
1041
  sql: `SELECT c.id, c.project_id, c.scope_name, c.url, c.path, c.title,
952
- c.section_title, c.heading_path, c.snippet, c.content_hash,
1042
+ c.section_title, c.heading_path, c.snippet, c.chunk_text,
1043
+ c.ordinal, c.content_hash,
953
1044
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
954
1045
  vector_distance_cos(c.embedding, vector(?)) AS distance
955
1046
  FROM vector_top_k('idx', vector(?), ?) AS v
@@ -993,6 +1084,8 @@ var TursoVectorStore = class {
993
1084
  sectionTitle: row.section_title,
994
1085
  headingPath: JSON.parse(row.heading_path || "[]"),
995
1086
  snippet: row.snippet,
1087
+ chunkText: row.chunk_text || "",
1088
+ ordinal: row.ordinal || 0,
996
1089
  contentHash: row.content_hash,
997
1090
  modelId: row.model_id,
998
1091
  depth: row.depth,
@@ -1188,10 +1281,10 @@ var TursoVectorStore = class {
1188
1281
  // src/vector/factory.ts
1189
1282
  async function createVectorStore(config, cwd) {
1190
1283
  const turso = config.vector.turso;
1191
- const remoteUrl = process.env[turso.urlEnv];
1284
+ const remoteUrl = turso.url ?? process.env[turso.urlEnv];
1192
1285
  if (remoteUrl) {
1193
1286
  const { createClient: createClient2 } = await import("@libsql/client/http");
1194
- const authToken = process.env[turso.authTokenEnv];
1287
+ const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
1195
1288
  const client2 = createClient2({
1196
1289
  url: remoteUrl,
1197
1290
  authToken
@@ -1201,6 +1294,12 @@ async function createVectorStore(config, cwd) {
1201
1294
  dimension: config.vector.dimension
1202
1295
  });
1203
1296
  }
1297
+ if (isServerless()) {
1298
+ throw new SearchSocketError(
1299
+ "VECTOR_BACKEND_UNAVAILABLE",
1300
+ `No remote vector database URL found (checked vector.turso.url and env var "${turso.urlEnv}"). Local SQLite storage is not available in serverless environments. Set ${turso.urlEnv} or pass vector.turso.url directly.`
1301
+ );
1302
+ }
1204
1303
  const { createClient } = await import("@libsql/client");
1205
1304
  const localPath = path3.resolve(cwd, turso.localPath);
1206
1305
  fs3.mkdirSync(path3.dirname(localPath), { recursive: true });
@@ -1828,6 +1927,7 @@ function mapUrlToRoute(urlPath, patterns) {
1828
1927
  }
1829
1928
 
1830
1929
  // src/indexing/sources/build/index.ts
1930
+ import { load as cheerioLoad } from "cheerio";
1831
1931
  import pLimit2 from "p-limit";
1832
1932
 
1833
1933
  // src/indexing/sources/build/manifest-parser.ts
@@ -2004,11 +2104,108 @@ async function startPreviewServer(cwd, options, logger3) {
2004
2104
 
2005
2105
  // src/indexing/sources/build/index.ts
2006
2106
  var logger = new Logger();
2107
+ function extractLinksFromHtml(html, pageUrl, baseOrigin) {
2108
+ const $ = cheerioLoad(html);
2109
+ const links = [];
2110
+ $("a[href]").each((_i, el) => {
2111
+ const href = $(el).attr("href");
2112
+ if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:") || href.startsWith("javascript:")) {
2113
+ return;
2114
+ }
2115
+ try {
2116
+ const resolved = new URL(href, `${baseOrigin}${pageUrl}`);
2117
+ if (resolved.origin !== baseOrigin) return;
2118
+ if (!["http:", "https:"].includes(resolved.protocol)) return;
2119
+ links.push(normalizeUrlPath(resolved.pathname));
2120
+ } catch {
2121
+ }
2122
+ });
2123
+ return [...new Set(links)];
2124
+ }
2125
+ async function discoverPages(server, buildConfig, pipelineMaxPages) {
2126
+ const { seedUrls, maxDepth, exclude } = buildConfig;
2127
+ const baseOrigin = new URL(server.baseUrl).origin;
2128
+ let effectiveMax = buildConfig.maxPages;
2129
+ if (typeof pipelineMaxPages === "number") {
2130
+ const floored = Math.max(0, Math.floor(pipelineMaxPages));
2131
+ effectiveMax = Math.min(effectiveMax, floored);
2132
+ }
2133
+ if (effectiveMax === 0) return [];
2134
+ const visited = /* @__PURE__ */ new Set();
2135
+ const pages = [];
2136
+ const queue = [];
2137
+ const limit = pLimit2(8);
2138
+ for (const seed of seedUrls) {
2139
+ const normalized = normalizeUrlPath(seed);
2140
+ if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
2141
+ visited.add(normalized);
2142
+ queue.push({ url: normalized, depth: 0 });
2143
+ }
2144
+ }
2145
+ while (queue.length > 0 && pages.length < effectiveMax) {
2146
+ const remaining = effectiveMax - pages.length;
2147
+ const batch = queue.splice(0, remaining);
2148
+ const results = await Promise.allSettled(
2149
+ batch.map(
2150
+ (item) => limit(async () => {
2151
+ const fullUrl = joinUrl(server.baseUrl, item.url);
2152
+ const response = await fetch(fullUrl);
2153
+ if (!response.ok) {
2154
+ logger.warn(`Skipping ${item.url}: ${response.status} ${response.statusText}`);
2155
+ return null;
2156
+ }
2157
+ const contentType = response.headers.get("content-type") ?? "";
2158
+ if (!contentType.includes("text/html")) {
2159
+ return null;
2160
+ }
2161
+ const html = await response.text();
2162
+ if (item.depth < maxDepth) {
2163
+ const links = extractLinksFromHtml(html, item.url, baseOrigin);
2164
+ for (const link of links) {
2165
+ if (!visited.has(link) && !isExcluded(link, exclude)) {
2166
+ visited.add(link);
2167
+ queue.push({ url: link, depth: item.depth + 1 });
2168
+ }
2169
+ }
2170
+ }
2171
+ return {
2172
+ url: item.url,
2173
+ html,
2174
+ sourcePath: fullUrl,
2175
+ outgoingLinks: []
2176
+ };
2177
+ })
2178
+ )
2179
+ );
2180
+ for (const result of results) {
2181
+ if (result.status === "fulfilled" && result.value) {
2182
+ pages.push(result.value);
2183
+ }
2184
+ }
2185
+ }
2186
+ if (pages.length >= effectiveMax && queue.length > 0) {
2187
+ logger.warn(`Discovery crawl reached maxPages limit (${effectiveMax}), ${queue.length} URLs not visited.`);
2188
+ }
2189
+ logger.event("build_discover_complete", {
2190
+ pagesFound: pages.length,
2191
+ urlsVisited: visited.size,
2192
+ urlsSkipped: queue.length
2193
+ });
2194
+ return pages;
2195
+ }
2007
2196
  async function loadBuildPages(cwd, config, maxPages) {
2008
2197
  const buildConfig = config.source.build;
2009
2198
  if (!buildConfig) {
2010
2199
  throw new Error("build source config is missing");
2011
2200
  }
2201
+ if (buildConfig.discover) {
2202
+ const server2 = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
2203
+ try {
2204
+ return await discoverPages(server2, buildConfig, maxPages);
2205
+ } finally {
2206
+ await server2.shutdown();
2207
+ }
2208
+ }
2012
2209
  const routes = await parseManifest(cwd, buildConfig.outputDir);
2013
2210
  const expanded = expandRoutes(routes, buildConfig.paramValues, buildConfig.exclude, logger);
2014
2211
  logger.event("build_routes_discovered", {
@@ -2112,11 +2309,11 @@ async function loadContentFilesPages(cwd, config, maxPages) {
2112
2309
 
2113
2310
  // src/indexing/sources/crawl.ts
2114
2311
  import { gunzipSync } from "zlib";
2115
- import { load as cheerioLoad } from "cheerio";
2312
+ import { load as cheerioLoad2 } from "cheerio";
2116
2313
  import pLimit3 from "p-limit";
2117
2314
  var logger2 = new Logger();
2118
2315
  function extractLocs(xml) {
2119
- const $ = cheerioLoad(xml, { xmlMode: true });
2316
+ const $ = cheerioLoad2(xml, { xmlMode: true });
2120
2317
  const locs = [];
2121
2318
  $("loc").each((_i, el) => {
2122
2319
  const text = $(el).text().trim();
@@ -2127,7 +2324,7 @@ function extractLocs(xml) {
2127
2324
  return locs;
2128
2325
  }
2129
2326
  function isSitemapIndex(xml) {
2130
- const $ = cheerioLoad(xml, { xmlMode: true });
2327
+ const $ = cheerioLoad2(xml, { xmlMode: true });
2131
2328
  return $("sitemapindex").length > 0;
2132
2329
  }
2133
2330
  async function fetchSitemapXml(url) {
@@ -2265,9 +2462,7 @@ function hrTimeMs(start) {
2265
2462
 
2266
2463
  // src/indexing/pipeline.ts
2267
2464
  var EMBEDDING_PRICE_PER_1K_TOKENS_USD = {
2268
- "text-embedding-3-small": 2e-5,
2269
- "text-embedding-3-large": 13e-5,
2270
- "text-embedding-ada-002": 1e-4
2465
+ "jina-embeddings-v3": 2e-5
2271
2466
  };
2272
2467
  var DEFAULT_EMBEDDING_PRICE_PER_1K = 2e-5;
2273
2468
  var IndexPipeline = class _IndexPipeline {
@@ -2313,9 +2508,15 @@ var IndexPipeline = class _IndexPipeline {
2313
2508
  };
2314
2509
  const scope = resolveScope(this.config, options.scopeOverride);
2315
2510
  const { statePath } = ensureStateDirs(this.cwd, this.config.state.dir, scope);
2511
+ const sourceMode = options.sourceOverride ?? this.config.source.mode;
2512
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, model: ${this.config.embeddings.model})`);
2316
2513
  if (options.force) {
2514
+ this.logger.info("Force mode enabled \u2014 full rebuild");
2317
2515
  await cleanMirrorForScope(statePath, scope);
2318
2516
  }
2517
+ if (options.dryRun) {
2518
+ this.logger.info("Dry run \u2014 no writes will be performed");
2519
+ }
2319
2520
  const manifestStart = stageStart();
2320
2521
  const existingHashes = await this.vectorStore.getContentHashes(scope);
2321
2522
  const existingModelId = await this.vectorStore.getScopeModelId(scope);
@@ -2326,8 +2527,9 @@ var IndexPipeline = class _IndexPipeline {
2326
2527
  );
2327
2528
  }
2328
2529
  stageEnd("manifest", manifestStart);
2530
+ this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
2329
2531
  const sourceStart = stageStart();
2330
- const sourceMode = options.sourceOverride ?? this.config.source.mode;
2532
+ this.logger.info(`Loading pages (source: ${sourceMode})...`);
2331
2533
  let sourcePages;
2332
2534
  if (sourceMode === "static-output") {
2333
2535
  sourcePages = await loadStaticOutputPages(this.cwd, this.config, options.maxPages);
@@ -2339,10 +2541,13 @@ var IndexPipeline = class _IndexPipeline {
2339
2541
  sourcePages = await loadContentFilesPages(this.cwd, this.config, options.maxPages);
2340
2542
  }
2341
2543
  stageEnd("source", sourceStart);
2544
+ this.logger.info(`Loaded ${sourcePages.length} page${sourcePages.length === 1 ? "" : "s"} (${stageTimingsMs["source"]}ms)`);
2342
2545
  const routeStart = stageStart();
2343
2546
  const routePatterns = await buildRoutePatterns(this.cwd);
2344
2547
  stageEnd("route_map", routeStart);
2548
+ this.logger.debug(`Route mapping: ${routePatterns.length} pattern${routePatterns.length === 1 ? "" : "s"} discovered (${stageTimingsMs["route_map"]}ms)`);
2345
2549
  const extractStart = stageStart();
2550
+ this.logger.info("Extracting content...");
2346
2551
  const extractedPages = [];
2347
2552
  for (const sourcePage of sourcePages) {
2348
2553
  const extracted = sourcePage.html ? extractFromHtml(sourcePage.url, sourcePage.html, this.config) : extractFromMarkdown(sourcePage.url, sourcePage.markdown ?? "", sourcePage.title);
@@ -2371,6 +2576,8 @@ var IndexPipeline = class _IndexPipeline {
2371
2576
  uniquePages.push(page);
2372
2577
  }
2373
2578
  stageEnd("extract", extractStart);
2579
+ const skippedPages = sourcePages.length - uniquePages.length;
2580
+ this.logger.info(`Extracted ${uniquePages.length} page${uniquePages.length === 1 ? "" : "s"}${skippedPages > 0 ? ` (${skippedPages} skipped)` : ""} (${stageTimingsMs["extract"]}ms)`);
2374
2581
  const linkStart = stageStart();
2375
2582
  const pageSet = new Set(uniquePages.map((page) => normalizeUrlPath(page.url)));
2376
2583
  const incomingLinkCount = /* @__PURE__ */ new Map();
@@ -2386,7 +2593,9 @@ var IndexPipeline = class _IndexPipeline {
2386
2593
  }
2387
2594
  }
2388
2595
  stageEnd("links", linkStart);
2596
+ this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
2389
2597
  const mirrorStart = stageStart();
2598
+ this.logger.info("Writing mirror pages...");
2390
2599
  const mirrorPages = [];
2391
2600
  let routeExact = 0;
2392
2601
  let routeBestEffort = 0;
@@ -2456,7 +2665,9 @@ var IndexPipeline = class _IndexPipeline {
2456
2665
  await this.vectorStore.upsertPages(pageRecords, scope);
2457
2666
  }
2458
2667
  stageEnd("mirror", mirrorStart);
2668
+ this.logger.info(`Mirrored ${mirrorPages.length} page${mirrorPages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["mirror"]}ms)`);
2459
2669
  const chunkStart = stageStart();
2670
+ this.logger.info("Chunking pages...");
2460
2671
  let chunks = mirrorPages.flatMap((page) => chunkMirrorPage(page, this.config, scope));
2461
2672
  const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
2462
2673
  if (typeof maxChunks === "number") {
@@ -2469,6 +2680,7 @@ var IndexPipeline = class _IndexPipeline {
2469
2680
  });
2470
2681
  }
2471
2682
  stageEnd("chunk", chunkStart);
2683
+ this.logger.info(`Chunked into ${chunks.length} chunk${chunks.length === 1 ? "" : "s"} (${stageTimingsMs["chunk"]}ms)`);
2472
2684
  const currentChunkMap = /* @__PURE__ */ new Map();
2473
2685
  for (const chunk of chunks) {
2474
2686
  currentChunkMap.set(chunk.chunkKey, chunk);
@@ -2487,6 +2699,7 @@ var IndexPipeline = class _IndexPipeline {
2487
2699
  return existingHash !== chunk.contentHash;
2488
2700
  });
2489
2701
  const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
2702
+ this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
2490
2703
  const embedStart = stageStart();
2491
2704
  const chunkTokenEstimates = /* @__PURE__ */ new Map();
2492
2705
  for (const chunk of changedChunks) {
@@ -2501,9 +2714,11 @@ var IndexPipeline = class _IndexPipeline {
2501
2714
  let newEmbeddings = 0;
2502
2715
  const vectorsByChunk = /* @__PURE__ */ new Map();
2503
2716
  if (!options.dryRun && changedChunks.length > 0) {
2717
+ this.logger.info(`Embedding ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} (~${estimatedTokens.toLocaleString()} tokens, ~$${estimatedCostUSD.toFixed(6)})...`);
2504
2718
  const embeddings = await this.embeddings.embedTexts(
2505
2719
  changedChunks.map((chunk) => buildEmbeddingText(chunk, this.config.chunking.prependTitle)),
2506
- this.config.embeddings.model
2720
+ this.config.embeddings.model,
2721
+ "retrieval.passage"
2507
2722
  );
2508
2723
  if (embeddings.length !== changedChunks.length) {
2509
2724
  throw new SearchSocketError(
@@ -2526,8 +2741,14 @@ var IndexPipeline = class _IndexPipeline {
2526
2741
  }
2527
2742
  }
2528
2743
  stageEnd("embedding", embedStart);
2744
+ if (changedChunks.length > 0) {
2745
+ this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
2746
+ } else {
2747
+ this.logger.info("No chunks to embed \u2014 all up to date");
2748
+ }
2529
2749
  const syncStart = stageStart();
2530
2750
  if (!options.dryRun) {
2751
+ this.logger.info("Syncing vectors...");
2531
2752
  const upserts = [];
2532
2753
  for (const chunk of changedChunks) {
2533
2754
  const vector = vectorsByChunk.get(chunk.chunkKey);
@@ -2546,6 +2767,8 @@ var IndexPipeline = class _IndexPipeline {
2546
2767
  sectionTitle: chunk.sectionTitle ?? "",
2547
2768
  headingPath: chunk.headingPath,
2548
2769
  snippet: chunk.snippet,
2770
+ chunkText: chunk.chunkText.slice(0, 4e3),
2771
+ ordinal: chunk.ordinal,
2549
2772
  contentHash: chunk.contentHash,
2550
2773
  modelId: this.config.embeddings.model,
2551
2774
  depth: chunk.depth,
@@ -2565,6 +2788,7 @@ var IndexPipeline = class _IndexPipeline {
2565
2788
  }
2566
2789
  }
2567
2790
  stageEnd("sync", syncStart);
2791
+ this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
2568
2792
  const finalizeStart = stageStart();
2569
2793
  if (!options.dryRun) {
2570
2794
  const scopeInfo = {
@@ -2584,6 +2808,7 @@ var IndexPipeline = class _IndexPipeline {
2584
2808
  });
2585
2809
  }
2586
2810
  stageEnd("finalize", finalizeStart);
2811
+ this.logger.info("Done.");
2587
2812
  return {
2588
2813
  pagesProcessed: mirrorPages.length,
2589
2814
  chunksTotal: chunks.length,
@@ -2693,20 +2918,17 @@ var JinaReranker = class {
2693
2918
 
2694
2919
  // src/rerank/factory.ts
2695
2920
  function createReranker(config) {
2696
- if (config.rerank.provider === "none") {
2921
+ if (!config.rerank.enabled) {
2697
2922
  return null;
2698
2923
  }
2699
- if (config.rerank.provider === "jina") {
2700
- const apiKey = process.env[config.rerank.jina.apiKeyEnv];
2701
- if (!apiKey) {
2702
- return null;
2703
- }
2704
- return new JinaReranker({
2705
- apiKey,
2706
- model: config.rerank.jina.model
2707
- });
2924
+ const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
2925
+ if (!apiKey) {
2926
+ return null;
2708
2927
  }
2709
- return null;
2928
+ return new JinaReranker({
2929
+ apiKey,
2930
+ model: config.rerank.model
2931
+ });
2710
2932
  }
2711
2933
 
2712
2934
  // src/search/ranking.ts
@@ -2854,7 +3076,7 @@ var SearchEngine = class _SearchEngine {
2854
3076
  const groupByPage = (input.groupBy ?? "page") === "page";
2855
3077
  const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
2856
3078
  const embedStart = process.hrtime.bigint();
2857
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model);
3079
+ const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
2858
3080
  const queryVector = queryEmbeddings[0];
2859
3081
  if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
2860
3082
  throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
@@ -2882,13 +3104,17 @@ var SearchEngine = class _SearchEngine {
2882
3104
  usedRerank = true;
2883
3105
  }
2884
3106
  let results;
3107
+ const minScore = this.config.ranking.minScore;
2885
3108
  if (groupByPage) {
2886
- const pages = aggregateByPage(ordered, this.config);
3109
+ let pages = aggregateByPage(ordered, this.config);
3110
+ if (minScore > 0) {
3111
+ pages = pages.filter((p) => p.pageScore >= minScore);
3112
+ }
2887
3113
  const minRatio = this.config.ranking.minChunkScoreRatio;
2888
3114
  results = pages.slice(0, topK).map((page) => {
2889
3115
  const bestScore = page.bestChunk.finalScore;
2890
- const minScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
2891
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minScore).slice(0, 5);
3116
+ const minScore2 = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
3117
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minScore2).slice(0, 5);
2892
3118
  return {
2893
3119
  url: page.url,
2894
3120
  title: page.title,
@@ -2905,6 +3131,9 @@ var SearchEngine = class _SearchEngine {
2905
3131
  };
2906
3132
  });
2907
3133
  } else {
3134
+ if (minScore > 0) {
3135
+ ordered = ordered.filter((entry) => entry.finalScore >= minScore);
3136
+ }
2908
3137
  results = ordered.slice(0, topK).map(({ hit, finalScore }) => ({
2909
3138
  url: hit.metadata.url,
2910
3139
  title: hit.metadata.title,
@@ -2976,43 +3205,54 @@ var SearchEngine = class _SearchEngine {
2976
3205
  }
2977
3206
  }
2978
3207
  async rerankHits(query, ranked, topK) {
2979
- if (this.config.rerank.provider !== "jina") {
3208
+ if (!this.config.rerank.enabled) {
2980
3209
  throw new SearchSocketError(
2981
3210
  "INVALID_REQUEST",
2982
- "rerank=true requested but rerank.provider is not configured as 'jina'.",
3211
+ "rerank=true requested but rerank.enabled is not set to true.",
2983
3212
  400
2984
3213
  );
2985
3214
  }
2986
3215
  if (!this.reranker) {
2987
3216
  throw new SearchSocketError(
2988
3217
  "CONFIG_MISSING",
2989
- `rerank=true requested but ${this.config.rerank.jina.apiKeyEnv} is not set.`,
3218
+ `rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
2990
3219
  400
2991
3220
  );
2992
3221
  }
2993
- const candidates = ranked.map(({ hit }) => ({
2994
- id: hit.id,
2995
- text: [hit.metadata.title, hit.metadata.sectionTitle, hit.metadata.snippet].filter(Boolean).join("\n")
2996
- }));
3222
+ const pageGroups = /* @__PURE__ */ new Map();
3223
+ for (const entry of ranked) {
3224
+ const url = entry.hit.metadata.url;
3225
+ const group = pageGroups.get(url);
3226
+ if (group) group.push(entry);
3227
+ else pageGroups.set(url, [entry]);
3228
+ }
3229
+ const pageCandidates = [];
3230
+ for (const [url, chunks] of pageGroups) {
3231
+ const sorted = [...chunks].sort(
3232
+ (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
3233
+ );
3234
+ const title = sorted[0].hit.metadata.title;
3235
+ const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
3236
+ pageCandidates.push({ id: url, text: `${title}
3237
+
3238
+ ${body}` });
3239
+ }
2997
3240
  const reranked = await this.reranker.rerank(
2998
3241
  query,
2999
- candidates,
3242
+ pageCandidates,
3000
3243
  Math.max(topK, this.config.rerank.topN)
3001
3244
  );
3002
- const rerankScoreById = new Map(reranked.map((entry) => [entry.id, entry.score]));
3245
+ const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
3003
3246
  return ranked.map((entry) => {
3004
- const rerankScore = rerankScoreById.get(entry.hit.id);
3005
- const safeBaseScore = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
3006
- if (rerankScore === void 0 || !Number.isFinite(rerankScore)) {
3007
- return {
3008
- ...entry,
3009
- finalScore: safeBaseScore
3010
- };
3247
+ const pageScore = scoreByUrl.get(entry.hit.metadata.url);
3248
+ const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
3249
+ if (pageScore === void 0 || !Number.isFinite(pageScore)) {
3250
+ return { ...entry, finalScore: base };
3011
3251
  }
3012
- const combinedScore = rerankScore * this.config.ranking.weights.rerank + safeBaseScore * 1e-3;
3252
+ const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
3013
3253
  return {
3014
3254
  ...entry,
3015
- finalScore: Number.isFinite(combinedScore) ? combinedScore : safeBaseScore
3255
+ finalScore: Number.isFinite(combined) ? combined : base
3016
3256
  };
3017
3257
  }).sort((a, b) => {
3018
3258
  const delta = b.finalScore - a.finalScore;
@@ -3332,6 +3572,7 @@ function getRootOptions(command) {
3332
3572
  }
3333
3573
  async function runIndexCommand(opts) {
3334
3574
  const logger3 = new Logger({
3575
+ quiet: opts.quiet,
3335
3576
  verbose: opts.verbose,
3336
3577
  json: opts.json
3337
3578
  });
@@ -3355,7 +3596,9 @@ async function runIndexCommand(opts) {
3355
3596
  `);
3356
3597
  return;
3357
3598
  }
3358
- printIndexSummary(stats);
3599
+ if (!opts.quiet) {
3600
+ printIndexSummary(stats);
3601
+ }
3359
3602
  }
3360
3603
  var program = new Command();
3361
3604
  program.name("searchsocket").description("Semantic site search and MCP retrieval for SvelteKit").version(package_default.version).option("-C, --cwd <path>", "working directory", process.cwd()).option("--config <path>", "config path (defaults to searchsocket.config.ts)");
@@ -3379,7 +3622,7 @@ program.command("init").description("Create searchsocket.config.ts and .searchso
3379
3622
  process.stdout.write("// searchsocketVitePlugin({ enabled: true, changedOnly: true })\n");
3380
3623
  process.stdout.write("// or env-driven: SEARCHSOCKET_AUTO_INDEX=1 pnpm build\n");
3381
3624
  });
3382
- program.command("index").description("Index site content into markdown mirror + vector store").option("--scope <name>", "scope override").option("--changed-only", "only process changed chunks", true).option("--no-changed-only", "re-index regardless of previous manifest").option("--force", "force full mirror rebuild and re-upsert", false).option("--dry-run", "compute plan and cost, no API writes", false).option("--source <mode>", "source mode override: static-output|crawl|content-files|build").option("--max-pages <n>", "limit pages processed").option("--max-chunks <n>", "limit chunks processed").option("--verbose", "verbose output", false).option("--json", "emit JSON logs and summary", false).action(async (opts, command) => {
3625
+ program.command("index").description("Index site content into markdown mirror + vector store").option("--scope <name>", "scope override").option("--changed-only", "only process changed chunks", true).option("--no-changed-only", "re-index regardless of previous manifest").option("--force", "force full mirror rebuild and re-upsert", false).option("--dry-run", "compute plan and cost, no API writes", false).option("--source <mode>", "source mode override: static-output|crawl|content-files|build").option("--max-pages <n>", "limit pages processed").option("--max-chunks <n>", "limit chunks processed").option("--quiet", "suppress all output except errors and warnings", false).option("--verbose", "verbose output", false).option("--json", "emit JSON logs and summary", false).action(async (opts, command) => {
3383
3626
  const rootOpts = getRootOptions(command);
3384
3627
  const cwd = path13.resolve(rootOpts?.cwd ?? process.cwd());
3385
3628
  await runIndexCommand({
@@ -3392,6 +3635,7 @@ program.command("index").description("Index site content into markdown mirror +
3392
3635
  source: opts.source,
3393
3636
  maxPages: opts.maxPages ? parsePositiveInt(opts.maxPages, "--max-pages") : void 0,
3394
3637
  maxChunks: opts.maxChunks ? parsePositiveInt(opts.maxChunks, "--max-chunks") : void 0,
3638
+ quiet: opts.quiet,
3395
3639
  verbose: opts.verbose,
3396
3640
  json: opts.json
3397
3641
  });
@@ -3554,8 +3798,8 @@ program.command("clean").description("Delete local state and optionally delete r
3554
3798
  `);
3555
3799
  if (opts.remote) {
3556
3800
  const vectorStore = await createVectorStore(config, cwd);
3557
- await vectorStore.deleteScope(scope);
3558
- process.stdout.write(`deleted remote vectors for scope ${scope.scopeName}
3801
+ await vectorStore.dropAllTables();
3802
+ process.stdout.write(`dropped all remote tables (chunks, registry, pages)
3559
3803
  `);
3560
3804
  }
3561
3805
  });
@@ -3680,14 +3924,6 @@ program.command("doctor").description("Validate config, env vars, provider conne
3680
3924
  details: tursoUrl ? `remote: ${tursoUrl}` : `local file: ${config.vector.turso.localPath}`
3681
3925
  });
3682
3926
  }
3683
- if (config.rerank.provider === "jina") {
3684
- const jinaKey = process.env[config.rerank.jina.apiKeyEnv];
3685
- checks.push({
3686
- name: `env ${config.rerank.jina.apiKeyEnv}`,
3687
- ok: Boolean(jinaKey),
3688
- details: jinaKey ? void 0 : "missing"
3689
- });
3690
- }
3691
3927
  if (config.source.mode === "static-output") {
3692
3928
  const outputDir = path13.resolve(cwd, config.source.staticOutputDir);
3693
3929
  const exists = fs9.existsSync(outputDir);