searchsocket 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,8 +4,7 @@ var fs = require('fs');
4
4
  var path = require('path');
5
5
  var jiti = require('jiti');
6
6
  var zod = require('zod');
7
- var OpenAI = require('openai');
8
- var pLimit = require('p-limit');
7
+ var pLimit2 = require('p-limit');
9
8
  var child_process = require('child_process');
10
9
  var crypto = require('crypto');
11
10
  var cheerio = require('cheerio');
@@ -19,8 +18,7 @@ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
19
18
 
20
19
  var fs__default = /*#__PURE__*/_interopDefault(fs);
21
20
  var path__default = /*#__PURE__*/_interopDefault(path);
22
- var OpenAI__default = /*#__PURE__*/_interopDefault(OpenAI);
23
- var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
21
+ var pLimit2__default = /*#__PURE__*/_interopDefault(pLimit2);
24
22
  var matter__default = /*#__PURE__*/_interopDefault(matter);
25
23
  var fs4__default = /*#__PURE__*/_interopDefault(fs4);
26
24
  var fg__default = /*#__PURE__*/_interopDefault(fg);
@@ -16629,7 +16627,11 @@ var searchSocketConfigSchema = zod.z.object({
16629
16627
  outputDir: zod.z.string().min(1).optional(),
16630
16628
  paramValues: zod.z.record(zod.z.string(), zod.z.array(zod.z.string())).optional(),
16631
16629
  exclude: zod.z.array(zod.z.string()).optional(),
16632
- previewTimeout: zod.z.number().int().positive().optional()
16630
+ previewTimeout: zod.z.number().int().positive().optional(),
16631
+ discover: zod.z.boolean().optional(),
16632
+ seedUrls: zod.z.array(zod.z.string()).optional(),
16633
+ maxPages: zod.z.number().int().positive().optional(),
16634
+ maxDepth: zod.z.number().int().nonnegative().optional()
16633
16635
  }).optional()
16634
16636
  }).optional(),
16635
16637
  extract: zod.z.object({
@@ -16656,8 +16658,9 @@ var searchSocketConfigSchema = zod.z.object({
16656
16658
  pageSummaryChunk: zod.z.boolean().optional()
16657
16659
  }).optional(),
16658
16660
  embeddings: zod.z.object({
16659
- provider: zod.z.literal("openai").optional(),
16661
+ provider: zod.z.literal("jina").optional(),
16660
16662
  model: zod.z.string().min(1).optional(),
16663
+ apiKey: zod.z.string().min(1).optional(),
16661
16664
  apiKeyEnv: zod.z.string().min(1).optional(),
16662
16665
  batchSize: zod.z.number().int().positive().optional(),
16663
16666
  concurrency: zod.z.number().int().positive().optional(),
@@ -16666,18 +16669,17 @@ var searchSocketConfigSchema = zod.z.object({
16666
16669
  vector: zod.z.object({
16667
16670
  dimension: zod.z.number().int().positive().optional(),
16668
16671
  turso: zod.z.object({
16672
+ url: zod.z.string().url().optional(),
16673
+ authToken: zod.z.string().min(1).optional(),
16669
16674
  urlEnv: zod.z.string().optional(),
16670
16675
  authTokenEnv: zod.z.string().optional(),
16671
16676
  localPath: zod.z.string().optional()
16672
16677
  }).optional()
16673
16678
  }).optional(),
16674
16679
  rerank: zod.z.object({
16675
- provider: zod.z.enum(["none", "jina"]).optional(),
16680
+ enabled: zod.z.boolean().optional(),
16676
16681
  topN: zod.z.number().int().positive().optional(),
16677
- jina: zod.z.object({
16678
- apiKeyEnv: zod.z.string().optional(),
16679
- model: zod.z.string().optional()
16680
- }).optional()
16682
+ model: zod.z.string().optional()
16681
16683
  }).optional(),
16682
16684
  ranking: zod.z.object({
16683
16685
  enableIncomingLinkBoost: zod.z.boolean().optional(),
@@ -16686,6 +16688,7 @@ var searchSocketConfigSchema = zod.z.object({
16686
16688
  aggregationCap: zod.z.number().int().positive().optional(),
16687
16689
  aggregationDecay: zod.z.number().min(0).max(1).optional(),
16688
16690
  minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
16691
+ minScore: zod.z.number().min(0).max(1).optional(),
16689
16692
  weights: zod.z.object({
16690
16693
  incomingLinks: zod.z.number().optional(),
16691
16694
  depth: zod.z.number().optional(),
@@ -16766,9 +16769,9 @@ function createDefaultConfig(projectId) {
16766
16769
  pageSummaryChunk: true
16767
16770
  },
16768
16771
  embeddings: {
16769
- provider: "openai",
16770
- model: "text-embedding-3-small",
16771
- apiKeyEnv: "OPENAI_API_KEY",
16772
+ provider: "jina",
16773
+ model: "jina-embeddings-v3",
16774
+ apiKeyEnv: "JINA_API_KEY",
16772
16775
  batchSize: 64,
16773
16776
  concurrency: 4
16774
16777
  },
@@ -16780,12 +16783,9 @@ function createDefaultConfig(projectId) {
16780
16783
  }
16781
16784
  },
16782
16785
  rerank: {
16783
- provider: "none",
16786
+ enabled: false,
16784
16787
  topN: 20,
16785
- jina: {
16786
- apiKeyEnv: "JINA_API_KEY",
16787
- model: "jina-reranker-v2-base-multilingual"
16788
- }
16788
+ model: "jina-reranker-v2-base-multilingual"
16789
16789
  },
16790
16790
  ranking: {
16791
16791
  enableIncomingLinkBoost: true,
@@ -16794,6 +16794,7 @@ function createDefaultConfig(projectId) {
16794
16794
  aggregationCap: 5,
16795
16795
  aggregationDecay: 0.5,
16796
16796
  minChunkScoreRatio: 0.5,
16797
+ minScore: 0,
16797
16798
  weights: {
16798
16799
  incomingLinks: 0.05,
16799
16800
  depth: 0.03,
@@ -16920,7 +16921,11 @@ ${issues}`
16920
16921
  outputDir: parsed.source.build.outputDir ?? ".svelte-kit/output",
16921
16922
  paramValues: parsed.source.build.paramValues ?? {},
16922
16923
  exclude: parsed.source.build.exclude ?? [],
16923
- previewTimeout: parsed.source.build.previewTimeout ?? 3e4
16924
+ previewTimeout: parsed.source.build.previewTimeout ?? 3e4,
16925
+ discover: parsed.source.build.discover ?? false,
16926
+ seedUrls: parsed.source.build.seedUrls ?? ["/"],
16927
+ maxPages: parsed.source.build.maxPages ?? 200,
16928
+ maxDepth: parsed.source.build.maxDepth ?? 10
16924
16929
  } : void 0
16925
16930
  },
16926
16931
  extract: {
@@ -16949,11 +16954,7 @@ ${issues}`
16949
16954
  },
16950
16955
  rerank: {
16951
16956
  ...defaults.rerank,
16952
- ...parsed.rerank,
16953
- jina: {
16954
- ...defaults.rerank.jina,
16955
- ...parsed.rerank?.jina
16956
- }
16957
+ ...parsed.rerank
16957
16958
  },
16958
16959
  ranking: {
16959
16960
  ...defaults.ranking,
@@ -17000,7 +17001,11 @@ ${issues}`
17000
17001
  outputDir: ".svelte-kit/output",
17001
17002
  paramValues: {},
17002
17003
  exclude: [],
17003
- previewTimeout: 3e4
17004
+ previewTimeout: 3e4,
17005
+ discover: false,
17006
+ seedUrls: ["/"],
17007
+ maxPages: 200,
17008
+ maxDepth: 10
17004
17009
  };
17005
17010
  }
17006
17011
  if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
@@ -17035,15 +17040,21 @@ async function loadConfig(options = {}) {
17035
17040
  const raw = loaded.default ?? loaded;
17036
17041
  return mergeConfig(cwd, raw);
17037
17042
  }
17043
+
17044
+ // src/core/serverless.ts
17045
+ function isServerless() {
17046
+ return !!(process.env.VERCEL || process.env.NETLIFY || process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.FUNCTIONS_WORKER || process.env.CF_PAGES);
17047
+ }
17038
17048
  function sleep(ms) {
17039
17049
  return new Promise((resolve) => {
17040
17050
  setTimeout(resolve, ms);
17041
17051
  });
17042
17052
  }
17043
- var OpenAIEmbeddingsProvider = class {
17044
- client;
17053
+ var JinaEmbeddingsProvider = class {
17054
+ apiKey;
17045
17055
  batchSize;
17046
17056
  concurrency;
17057
+ defaultTask;
17047
17058
  constructor(options) {
17048
17059
  if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
17049
17060
  throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
@@ -17051,11 +17062,10 @@ var OpenAIEmbeddingsProvider = class {
17051
17062
  if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
17052
17063
  throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
17053
17064
  }
17054
- this.client = new OpenAI__default.default({
17055
- apiKey: options.apiKey
17056
- });
17065
+ this.apiKey = options.apiKey;
17057
17066
  this.batchSize = options.batchSize;
17058
17067
  this.concurrency = options.concurrency;
17068
+ this.defaultTask = options.task ?? "retrieval.passage";
17059
17069
  }
17060
17070
  estimateTokens(text) {
17061
17071
  const normalized = text.trim();
@@ -17069,7 +17079,7 @@ var OpenAIEmbeddingsProvider = class {
17069
17079
  const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
17070
17080
  return Math.max(1, Math.max(charEstimate, lexicalEstimate));
17071
17081
  }
17072
- async embedTexts(texts, modelId) {
17082
+ async embedTexts(texts, modelId, task) {
17073
17083
  if (texts.length === 0) {
17074
17084
  return [];
17075
17085
  }
@@ -17081,37 +17091,56 @@ var OpenAIEmbeddingsProvider = class {
17081
17091
  });
17082
17092
  }
17083
17093
  const outputs = new Array(batches.length);
17084
- const limit = pLimit__default.default(this.concurrency);
17094
+ const limit = pLimit2__default.default(this.concurrency);
17085
17095
  await Promise.all(
17086
17096
  batches.map(
17087
17097
  (batch, position) => limit(async () => {
17088
- outputs[position] = await this.embedWithRetry(batch.values, modelId);
17098
+ outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
17089
17099
  })
17090
17100
  )
17091
17101
  );
17092
17102
  return outputs.flat();
17093
17103
  }
17094
- async embedWithRetry(texts, modelId) {
17104
+ async embedWithRetry(texts, modelId, task) {
17095
17105
  const maxAttempts = 5;
17096
17106
  let attempt = 0;
17097
17107
  while (attempt < maxAttempts) {
17098
17108
  attempt += 1;
17109
+ let response;
17099
17110
  try {
17100
- const response = await this.client.embeddings.create({
17101
- model: modelId,
17102
- input: texts,
17103
- encoding_format: "float"
17111
+ response = await fetch("https://api.jina.ai/v1/embeddings", {
17112
+ method: "POST",
17113
+ headers: {
17114
+ "content-type": "application/json",
17115
+ authorization: `Bearer ${this.apiKey}`
17116
+ },
17117
+ body: JSON.stringify({
17118
+ model: modelId,
17119
+ input: texts,
17120
+ task
17121
+ })
17104
17122
  });
17105
- return response.data.map((entry) => entry.embedding);
17106
17123
  } catch (error) {
17107
- const status = error.status;
17108
- const retryable = status === 429 || typeof status === "number" && status >= 500;
17109
- if (!retryable || attempt >= maxAttempts) {
17124
+ if (attempt >= maxAttempts) {
17110
17125
  throw error;
17111
17126
  }
17112
- const delay = Math.min(2 ** attempt * 300, 5e3);
17113
- await sleep(delay);
17127
+ await sleep(Math.min(2 ** attempt * 300, 5e3));
17128
+ continue;
17129
+ }
17130
+ if (!response.ok) {
17131
+ const retryable = response.status === 429 || response.status >= 500;
17132
+ if (!retryable || attempt >= maxAttempts) {
17133
+ const errorBody = await response.text();
17134
+ throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
17135
+ }
17136
+ await sleep(Math.min(2 ** attempt * 300, 5e3));
17137
+ continue;
17114
17138
  }
17139
+ const payload = await response.json();
17140
+ if (!payload.data || !Array.isArray(payload.data)) {
17141
+ throw new Error("Invalid Jina embeddings response format");
17142
+ }
17143
+ return payload.data.map((entry) => entry.embedding);
17115
17144
  }
17116
17145
  throw new Error("Unreachable retry state");
17117
17146
  }
@@ -17119,20 +17148,20 @@ var OpenAIEmbeddingsProvider = class {
17119
17148
 
17120
17149
  // src/embeddings/factory.ts
17121
17150
  function createEmbeddingsProvider(config) {
17122
- if (config.embeddings.provider !== "openai") {
17151
+ if (config.embeddings.provider !== "jina") {
17123
17152
  throw new SearchSocketError(
17124
17153
  "CONFIG_MISSING",
17125
17154
  `Unsupported embeddings provider ${config.embeddings.provider}`
17126
17155
  );
17127
17156
  }
17128
- const apiKey = process.env[config.embeddings.apiKeyEnv];
17157
+ const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17129
17158
  if (!apiKey) {
17130
17159
  throw new SearchSocketError(
17131
17160
  "CONFIG_MISSING",
17132
- `Missing embeddings API key env var: ${config.embeddings.apiKeyEnv}`
17161
+ `Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
17133
17162
  );
17134
17163
  }
17135
- return new OpenAIEmbeddingsProvider({
17164
+ return new JinaEmbeddingsProvider({
17136
17165
  apiKey,
17137
17166
  batchSize: config.embeddings.batchSize,
17138
17167
  concurrency: config.embeddings.concurrency
@@ -17295,20 +17324,17 @@ var JinaReranker = class {
17295
17324
 
17296
17325
  // src/rerank/factory.ts
17297
17326
  function createReranker(config) {
17298
- if (config.rerank.provider === "none") {
17327
+ if (!config.rerank.enabled) {
17299
17328
  return null;
17300
17329
  }
17301
- if (config.rerank.provider === "jina") {
17302
- const apiKey = process.env[config.rerank.jina.apiKeyEnv];
17303
- if (!apiKey) {
17304
- return null;
17305
- }
17306
- return new JinaReranker({
17307
- apiKey,
17308
- model: config.rerank.jina.model
17309
- });
17330
+ const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17331
+ if (!apiKey) {
17332
+ return null;
17310
17333
  }
17311
- return null;
17334
+ return new JinaReranker({
17335
+ apiKey,
17336
+ model: config.rerank.model
17337
+ });
17312
17338
  }
17313
17339
 
17314
17340
  // src/utils/time.ts
@@ -17413,6 +17439,16 @@ var TursoVectorStore = class {
17413
17439
  }
17414
17440
  async ensureChunks(dim) {
17415
17441
  if (this.chunksReady) return;
17442
+ const exists = await this.chunksTableExists();
17443
+ if (exists) {
17444
+ const currentDim = await this.getChunksDimension();
17445
+ if (currentDim !== null && currentDim !== dim) {
17446
+ await this.client.batch([
17447
+ "DROP INDEX IF EXISTS idx",
17448
+ "DROP TABLE IF EXISTS chunks"
17449
+ ]);
17450
+ }
17451
+ }
17416
17452
  await this.client.batch([
17417
17453
  `CREATE TABLE IF NOT EXISTS chunks (
17418
17454
  id TEXT PRIMARY KEY,
@@ -17424,6 +17460,8 @@ var TursoVectorStore = class {
17424
17460
  section_title TEXT NOT NULL DEFAULT '',
17425
17461
  heading_path TEXT NOT NULL DEFAULT '[]',
17426
17462
  snippet TEXT NOT NULL DEFAULT '',
17463
+ chunk_text TEXT NOT NULL DEFAULT '',
17464
+ ordinal INTEGER NOT NULL DEFAULT 0,
17427
17465
  content_hash TEXT NOT NULL DEFAULT '',
17428
17466
  model_id TEXT NOT NULL DEFAULT '',
17429
17467
  depth INTEGER NOT NULL DEFAULT 0,
@@ -17434,6 +17472,19 @@ var TursoVectorStore = class {
17434
17472
  )`,
17435
17473
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17436
17474
  ]);
17475
+ const chunkMigrationCols = [
17476
+ { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
17477
+ { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
17478
+ ];
17479
+ for (const col of chunkMigrationCols) {
17480
+ try {
17481
+ await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
17482
+ } catch (error) {
17483
+ if (error instanceof Error && !error.message.includes("duplicate column")) {
17484
+ throw error;
17485
+ }
17486
+ }
17487
+ }
17437
17488
  this.chunksReady = true;
17438
17489
  }
17439
17490
  async ensurePages() {
@@ -17468,6 +17519,38 @@ var TursoVectorStore = class {
17468
17519
  throw error;
17469
17520
  }
17470
17521
  }
17522
+ /**
17523
+ * Read the current F32_BLOB dimension from the chunks table schema.
17524
+ * Returns null if the table doesn't exist or the dimension can't be parsed.
17525
+ */
17526
+ async getChunksDimension() {
17527
+ try {
17528
+ const rs = await this.client.execute(
17529
+ "SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
17530
+ );
17531
+ if (rs.rows.length === 0) return null;
17532
+ const sql = rs.rows[0].sql;
17533
+ const match = sql.match(/F32_BLOB\((\d+)\)/i);
17534
+ return match ? parseInt(match[1], 10) : null;
17535
+ } catch {
17536
+ return null;
17537
+ }
17538
+ }
17539
+ /**
17540
+ * Drop all SearchSocket tables (chunks, registry, pages) and their indexes.
17541
+ * Used by `clean --remote` for a full reset.
17542
+ */
17543
+ async dropAllTables() {
17544
+ await this.client.batch([
17545
+ "DROP INDEX IF EXISTS idx",
17546
+ "DROP TABLE IF EXISTS chunks",
17547
+ "DROP TABLE IF EXISTS registry",
17548
+ "DROP TABLE IF EXISTS pages"
17549
+ ]);
17550
+ this.chunksReady = false;
17551
+ this.registryReady = false;
17552
+ this.pagesReady = false;
17553
+ }
17471
17554
  async upsert(records, _scope) {
17472
17555
  if (records.length === 0) return;
17473
17556
  const dim = this.dimension ?? records[0].vector.length;
@@ -17478,9 +17561,9 @@ var TursoVectorStore = class {
17478
17561
  const stmts = batch.map((r) => ({
17479
17562
  sql: `INSERT OR REPLACE INTO chunks
17480
17563
  (id, project_id, scope_name, url, path, title, section_title,
17481
- heading_path, snippet, content_hash, model_id, depth,
17564
+ heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17482
17565
  incoming_links, route_file, tags, embedding)
17483
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17566
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17484
17567
  args: [
17485
17568
  r.id,
17486
17569
  r.metadata.projectId,
@@ -17491,6 +17574,8 @@ var TursoVectorStore = class {
17491
17574
  r.metadata.sectionTitle,
17492
17575
  JSON.stringify(r.metadata.headingPath),
17493
17576
  r.metadata.snippet,
17577
+ r.metadata.chunkText,
17578
+ r.metadata.ordinal,
17494
17579
  r.metadata.contentHash,
17495
17580
  r.metadata.modelId,
17496
17581
  r.metadata.depth,
@@ -17509,7 +17594,8 @@ var TursoVectorStore = class {
17509
17594
  const queryJson = JSON.stringify(queryVector);
17510
17595
  const rs = await this.client.execute({
17511
17596
  sql: `SELECT c.id, c.project_id, c.scope_name, c.url, c.path, c.title,
17512
- c.section_title, c.heading_path, c.snippet, c.content_hash,
17597
+ c.section_title, c.heading_path, c.snippet, c.chunk_text,
17598
+ c.ordinal, c.content_hash,
17513
17599
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17514
17600
  vector_distance_cos(c.embedding, vector(?)) AS distance
17515
17601
  FROM vector_top_k('idx', vector(?), ?) AS v
@@ -17553,6 +17639,8 @@ var TursoVectorStore = class {
17553
17639
  sectionTitle: row.section_title,
17554
17640
  headingPath: JSON.parse(row.heading_path || "[]"),
17555
17641
  snippet: row.snippet,
17642
+ chunkText: row.chunk_text || "",
17643
+ ordinal: row.ordinal || 0,
17556
17644
  contentHash: row.content_hash,
17557
17645
  modelId: row.model_id,
17558
17646
  depth: row.depth,
@@ -17748,10 +17836,10 @@ var TursoVectorStore = class {
17748
17836
  // src/vector/factory.ts
17749
17837
  async function createVectorStore(config, cwd) {
17750
17838
  const turso = config.vector.turso;
17751
- const remoteUrl = process.env[turso.urlEnv];
17839
+ const remoteUrl = turso.url ?? process.env[turso.urlEnv];
17752
17840
  if (remoteUrl) {
17753
17841
  const { createClient: createClient2 } = await import('@libsql/client/http');
17754
- const authToken = process.env[turso.authTokenEnv];
17842
+ const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
17755
17843
  const client2 = createClient2({
17756
17844
  url: remoteUrl,
17757
17845
  authToken
@@ -17761,6 +17849,12 @@ async function createVectorStore(config, cwd) {
17761
17849
  dimension: config.vector.dimension
17762
17850
  });
17763
17851
  }
17852
+ if (isServerless()) {
17853
+ throw new SearchSocketError(
17854
+ "VECTOR_BACKEND_UNAVAILABLE",
17855
+ `No remote vector database URL found (checked vector.turso.url and env var "${turso.urlEnv}"). Local SQLite storage is not available in serverless environments. Set ${turso.urlEnv} or pass vector.turso.url directly.`
17856
+ );
17857
+ }
17764
17858
  const { createClient } = await import('@libsql/client');
17765
17859
  const localPath = path__default.default.resolve(cwd, turso.localPath);
17766
17860
  fs__default.default.mkdirSync(path__default.default.dirname(localPath), { recursive: true });
@@ -17918,7 +18012,7 @@ var SearchEngine = class _SearchEngine {
17918
18012
  const groupByPage = (input.groupBy ?? "page") === "page";
17919
18013
  const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
17920
18014
  const embedStart = process.hrtime.bigint();
17921
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model);
18015
+ const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
17922
18016
  const queryVector = queryEmbeddings[0];
17923
18017
  if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
17924
18018
  throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
@@ -17946,13 +18040,17 @@ var SearchEngine = class _SearchEngine {
17946
18040
  usedRerank = true;
17947
18041
  }
17948
18042
  let results;
18043
+ const minScore = this.config.ranking.minScore;
17949
18044
  if (groupByPage) {
17950
- const pages = aggregateByPage(ordered, this.config);
18045
+ let pages = aggregateByPage(ordered, this.config);
18046
+ if (minScore > 0) {
18047
+ pages = pages.filter((p) => p.pageScore >= minScore);
18048
+ }
17951
18049
  const minRatio = this.config.ranking.minChunkScoreRatio;
17952
18050
  results = pages.slice(0, topK).map((page) => {
17953
18051
  const bestScore = page.bestChunk.finalScore;
17954
- const minScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
17955
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minScore).slice(0, 5);
18052
+ const minScore2 = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
18053
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minScore2).slice(0, 5);
17956
18054
  return {
17957
18055
  url: page.url,
17958
18056
  title: page.title,
@@ -17969,6 +18067,9 @@ var SearchEngine = class _SearchEngine {
17969
18067
  };
17970
18068
  });
17971
18069
  } else {
18070
+ if (minScore > 0) {
18071
+ ordered = ordered.filter((entry) => entry.finalScore >= minScore);
18072
+ }
17972
18073
  results = ordered.slice(0, topK).map(({ hit, finalScore }) => ({
17973
18074
  url: hit.metadata.url,
17974
18075
  title: hit.metadata.title,
@@ -18040,43 +18141,54 @@ var SearchEngine = class _SearchEngine {
18040
18141
  }
18041
18142
  }
18042
18143
  async rerankHits(query, ranked, topK) {
18043
- if (this.config.rerank.provider !== "jina") {
18144
+ if (!this.config.rerank.enabled) {
18044
18145
  throw new SearchSocketError(
18045
18146
  "INVALID_REQUEST",
18046
- "rerank=true requested but rerank.provider is not configured as 'jina'.",
18147
+ "rerank=true requested but rerank.enabled is not set to true.",
18047
18148
  400
18048
18149
  );
18049
18150
  }
18050
18151
  if (!this.reranker) {
18051
18152
  throw new SearchSocketError(
18052
18153
  "CONFIG_MISSING",
18053
- `rerank=true requested but ${this.config.rerank.jina.apiKeyEnv} is not set.`,
18154
+ `rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
18054
18155
  400
18055
18156
  );
18056
18157
  }
18057
- const candidates = ranked.map(({ hit }) => ({
18058
- id: hit.id,
18059
- text: [hit.metadata.title, hit.metadata.sectionTitle, hit.metadata.snippet].filter(Boolean).join("\n")
18060
- }));
18158
+ const pageGroups = /* @__PURE__ */ new Map();
18159
+ for (const entry of ranked) {
18160
+ const url = entry.hit.metadata.url;
18161
+ const group = pageGroups.get(url);
18162
+ if (group) group.push(entry);
18163
+ else pageGroups.set(url, [entry]);
18164
+ }
18165
+ const pageCandidates = [];
18166
+ for (const [url, chunks] of pageGroups) {
18167
+ const sorted = [...chunks].sort(
18168
+ (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
18169
+ );
18170
+ const title = sorted[0].hit.metadata.title;
18171
+ const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
18172
+ pageCandidates.push({ id: url, text: `${title}
18173
+
18174
+ ${body}` });
18175
+ }
18061
18176
  const reranked = await this.reranker.rerank(
18062
18177
  query,
18063
- candidates,
18178
+ pageCandidates,
18064
18179
  Math.max(topK, this.config.rerank.topN)
18065
18180
  );
18066
- const rerankScoreById = new Map(reranked.map((entry) => [entry.id, entry.score]));
18181
+ const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
18067
18182
  return ranked.map((entry) => {
18068
- const rerankScore = rerankScoreById.get(entry.hit.id);
18069
- const safeBaseScore = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
18070
- if (rerankScore === void 0 || !Number.isFinite(rerankScore)) {
18071
- return {
18072
- ...entry,
18073
- finalScore: safeBaseScore
18074
- };
18183
+ const pageScore = scoreByUrl.get(entry.hit.metadata.url);
18184
+ const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
18185
+ if (pageScore === void 0 || !Number.isFinite(pageScore)) {
18186
+ return { ...entry, finalScore: base };
18075
18187
  }
18076
- const combinedScore = rerankScore * this.config.ranking.weights.rerank + safeBaseScore * 1e-3;
18188
+ const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
18077
18189
  return {
18078
18190
  ...entry,
18079
- finalScore: Number.isFinite(combinedScore) ? combinedScore : safeBaseScore
18191
+ finalScore: Number.isFinite(combined) ? combined : base
18080
18192
  };
18081
18193
  }).sort((a, b) => {
18082
18194
  const delta = b.finalScore - a.finalScore;
@@ -18116,13 +18228,21 @@ function searchsocketHandle(options = {}) {
18116
18228
  let rateLimiter = null;
18117
18229
  const getConfig = async () => {
18118
18230
  if (!configPromise) {
18119
- const configP = options.config ? Promise.resolve(options.config) : loadConfig({
18120
- cwd: options.cwd,
18121
- configPath: options.configPath
18122
- });
18231
+ let configP;
18232
+ if (options.config) {
18233
+ configP = Promise.resolve(options.config);
18234
+ } else if (options.rawConfig) {
18235
+ const cwd = options.cwd ?? process.cwd();
18236
+ configP = Promise.resolve(mergeConfig(cwd, options.rawConfig));
18237
+ } else {
18238
+ configP = loadConfig({
18239
+ cwd: options.cwd,
18240
+ configPath: options.configPath
18241
+ });
18242
+ }
18123
18243
  configPromise = configP.then((config) => {
18124
18244
  apiPath = apiPath ?? config.api.path;
18125
- if (config.api.rateLimit) {
18245
+ if (config.api.rateLimit && !isServerless()) {
18126
18246
  rateLimiter = new InMemoryRateLimiter(config.api.rateLimit.windowMs, config.api.rateLimit.max);
18127
18247
  }
18128
18248
  return config;
@@ -18132,10 +18252,9 @@ function searchsocketHandle(options = {}) {
18132
18252
  };
18133
18253
  const getEngine = async () => {
18134
18254
  if (!enginePromise) {
18135
- const config = options.config;
18255
+ const config = await getConfig();
18136
18256
  enginePromise = SearchEngine.create({
18137
18257
  cwd: options.cwd,
18138
- configPath: options.configPath,
18139
18258
  config
18140
18259
  });
18141
18260
  }
@@ -19670,14 +19789,16 @@ function mapUrlToRoute(urlPath, patterns) {
19670
19789
  var Logger = class {
19671
19790
  json;
19672
19791
  verbose;
19792
+ quiet;
19673
19793
  stderrOnly;
19674
19794
  constructor(opts = {}) {
19675
19795
  this.json = opts.json ?? false;
19676
19796
  this.verbose = opts.verbose ?? false;
19797
+ this.quiet = opts.quiet ?? false;
19677
19798
  this.stderrOnly = opts.stderrOnly ?? false;
19678
19799
  }
19679
19800
  info(message) {
19680
- if (this.json) {
19801
+ if (this.quiet || this.json) {
19681
19802
  return;
19682
19803
  }
19683
19804
  this.writeOut(`${message}
@@ -19691,7 +19812,7 @@ var Logger = class {
19691
19812
  this.logJson("debug", { message });
19692
19813
  return;
19693
19814
  }
19694
- this.writeOut(`${message}
19815
+ this.writeOut(` ${message}
19695
19816
  `);
19696
19817
  }
19697
19818
  warn(message) {
@@ -19718,7 +19839,7 @@ var Logger = class {
19718
19839
  this.logJson(event, data);
19719
19840
  return;
19720
19841
  }
19721
- this.writeOut(`[${event}] ${data ? JSON.stringify(data) : ""}
19842
+ this.writeOut(` [${event}] ${data ? JSON.stringify(data) : ""}
19722
19843
  `);
19723
19844
  }
19724
19845
  writeOut(text) {
@@ -19903,11 +20024,108 @@ async function startPreviewServer(cwd, options, logger3) {
19903
20024
 
19904
20025
  // src/indexing/sources/build/index.ts
19905
20026
  var logger = new Logger();
20027
+ function extractLinksFromHtml(html, pageUrl, baseOrigin) {
20028
+ const $ = cheerio.load(html);
20029
+ const links = [];
20030
+ $("a[href]").each((_i, el) => {
20031
+ const href = $(el).attr("href");
20032
+ if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:") || href.startsWith("javascript:")) {
20033
+ return;
20034
+ }
20035
+ try {
20036
+ const resolved = new URL(href, `${baseOrigin}${pageUrl}`);
20037
+ if (resolved.origin !== baseOrigin) return;
20038
+ if (!["http:", "https:"].includes(resolved.protocol)) return;
20039
+ links.push(normalizeUrlPath(resolved.pathname));
20040
+ } catch {
20041
+ }
20042
+ });
20043
+ return [...new Set(links)];
20044
+ }
20045
+ async function discoverPages(server, buildConfig, pipelineMaxPages) {
20046
+ const { seedUrls, maxDepth, exclude } = buildConfig;
20047
+ const baseOrigin = new URL(server.baseUrl).origin;
20048
+ let effectiveMax = buildConfig.maxPages;
20049
+ if (typeof pipelineMaxPages === "number") {
20050
+ const floored = Math.max(0, Math.floor(pipelineMaxPages));
20051
+ effectiveMax = Math.min(effectiveMax, floored);
20052
+ }
20053
+ if (effectiveMax === 0) return [];
20054
+ const visited = /* @__PURE__ */ new Set();
20055
+ const pages = [];
20056
+ const queue = [];
20057
+ const limit = pLimit2__default.default(8);
20058
+ for (const seed of seedUrls) {
20059
+ const normalized = normalizeUrlPath(seed);
20060
+ if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
20061
+ visited.add(normalized);
20062
+ queue.push({ url: normalized, depth: 0 });
20063
+ }
20064
+ }
20065
+ while (queue.length > 0 && pages.length < effectiveMax) {
20066
+ const remaining = effectiveMax - pages.length;
20067
+ const batch = queue.splice(0, remaining);
20068
+ const results = await Promise.allSettled(
20069
+ batch.map(
20070
+ (item) => limit(async () => {
20071
+ const fullUrl = joinUrl(server.baseUrl, item.url);
20072
+ const response = await fetch(fullUrl);
20073
+ if (!response.ok) {
20074
+ logger.warn(`Skipping ${item.url}: ${response.status} ${response.statusText}`);
20075
+ return null;
20076
+ }
20077
+ const contentType = response.headers.get("content-type") ?? "";
20078
+ if (!contentType.includes("text/html")) {
20079
+ return null;
20080
+ }
20081
+ const html = await response.text();
20082
+ if (item.depth < maxDepth) {
20083
+ const links = extractLinksFromHtml(html, item.url, baseOrigin);
20084
+ for (const link of links) {
20085
+ if (!visited.has(link) && !isExcluded(link, exclude)) {
20086
+ visited.add(link);
20087
+ queue.push({ url: link, depth: item.depth + 1 });
20088
+ }
20089
+ }
20090
+ }
20091
+ return {
20092
+ url: item.url,
20093
+ html,
20094
+ sourcePath: fullUrl,
20095
+ outgoingLinks: []
20096
+ };
20097
+ })
20098
+ )
20099
+ );
20100
+ for (const result of results) {
20101
+ if (result.status === "fulfilled" && result.value) {
20102
+ pages.push(result.value);
20103
+ }
20104
+ }
20105
+ }
20106
+ if (pages.length >= effectiveMax && queue.length > 0) {
20107
+ logger.warn(`Discovery crawl reached maxPages limit (${effectiveMax}), ${queue.length} URLs not visited.`);
20108
+ }
20109
+ logger.event("build_discover_complete", {
20110
+ pagesFound: pages.length,
20111
+ urlsVisited: visited.size,
20112
+ urlsSkipped: queue.length
20113
+ });
20114
+ return pages;
20115
+ }
19906
20116
  async function loadBuildPages(cwd, config, maxPages) {
19907
20117
  const buildConfig = config.source.build;
19908
20118
  if (!buildConfig) {
19909
20119
  throw new Error("build source config is missing");
19910
20120
  }
20121
+ if (buildConfig.discover) {
20122
+ const server2 = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
20123
+ try {
20124
+ return await discoverPages(server2, buildConfig, maxPages);
20125
+ } finally {
20126
+ await server2.shutdown();
20127
+ }
20128
+ }
19911
20129
  const routes = await parseManifest(cwd, buildConfig.outputDir);
19912
20130
  const expanded = expandRoutes(routes, buildConfig.paramValues, buildConfig.exclude, logger);
19913
20131
  logger.event("build_routes_discovered", {
@@ -19918,7 +20136,7 @@ async function loadBuildPages(cwd, config, maxPages) {
19918
20136
  const selected = typeof maxCount === "number" ? expanded.slice(0, maxCount) : expanded;
19919
20137
  const server = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
19920
20138
  try {
19921
- const concurrencyLimit = pLimit__default.default(8);
20139
+ const concurrencyLimit = pLimit2__default.default(8);
19922
20140
  const results = await Promise.allSettled(
19923
20141
  selected.map(
19924
20142
  (route) => concurrencyLimit(async () => {
@@ -20087,7 +20305,7 @@ async function loadCrawledPages(config, maxPages) {
20087
20305
  const routes = await resolveRoutes(config);
20088
20306
  const maxCount = typeof maxPages === "number" ? Math.max(0, Math.floor(maxPages)) : void 0;
20089
20307
  const selected = typeof maxCount === "number" ? routes.slice(0, maxCount) : routes;
20090
- const concurrencyLimit = pLimit__default.default(8);
20308
+ const concurrencyLimit = pLimit2__default.default(8);
20091
20309
  const results = await Promise.allSettled(
20092
20310
  selected.map(
20093
20311
  (route) => concurrencyLimit(async () => {
@@ -20141,9 +20359,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
20141
20359
 
20142
20360
  // src/indexing/pipeline.ts
20143
20361
  var EMBEDDING_PRICE_PER_1K_TOKENS_USD = {
20144
- "text-embedding-3-small": 2e-5,
20145
- "text-embedding-3-large": 13e-5,
20146
- "text-embedding-ada-002": 1e-4
20362
+ "jina-embeddings-v3": 2e-5
20147
20363
  };
20148
20364
  var DEFAULT_EMBEDDING_PRICE_PER_1K = 2e-5;
20149
20365
  var IndexPipeline = class _IndexPipeline {
@@ -20189,9 +20405,15 @@ var IndexPipeline = class _IndexPipeline {
20189
20405
  };
20190
20406
  const scope = resolveScope(this.config, options.scopeOverride);
20191
20407
  const { statePath } = ensureStateDirs(this.cwd, this.config.state.dir, scope);
20408
+ const sourceMode = options.sourceOverride ?? this.config.source.mode;
20409
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, model: ${this.config.embeddings.model})`);
20192
20410
  if (options.force) {
20411
+ this.logger.info("Force mode enabled \u2014 full rebuild");
20193
20412
  await cleanMirrorForScope(statePath, scope);
20194
20413
  }
20414
+ if (options.dryRun) {
20415
+ this.logger.info("Dry run \u2014 no writes will be performed");
20416
+ }
20195
20417
  const manifestStart = stageStart();
20196
20418
  const existingHashes = await this.vectorStore.getContentHashes(scope);
20197
20419
  const existingModelId = await this.vectorStore.getScopeModelId(scope);
@@ -20202,8 +20424,9 @@ var IndexPipeline = class _IndexPipeline {
20202
20424
  );
20203
20425
  }
20204
20426
  stageEnd("manifest", manifestStart);
20427
+ this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
20205
20428
  const sourceStart = stageStart();
20206
- const sourceMode = options.sourceOverride ?? this.config.source.mode;
20429
+ this.logger.info(`Loading pages (source: ${sourceMode})...`);
20207
20430
  let sourcePages;
20208
20431
  if (sourceMode === "static-output") {
20209
20432
  sourcePages = await loadStaticOutputPages(this.cwd, this.config, options.maxPages);
@@ -20215,10 +20438,13 @@ var IndexPipeline = class _IndexPipeline {
20215
20438
  sourcePages = await loadContentFilesPages(this.cwd, this.config, options.maxPages);
20216
20439
  }
20217
20440
  stageEnd("source", sourceStart);
20441
+ this.logger.info(`Loaded ${sourcePages.length} page${sourcePages.length === 1 ? "" : "s"} (${stageTimingsMs["source"]}ms)`);
20218
20442
  const routeStart = stageStart();
20219
20443
  const routePatterns = await buildRoutePatterns(this.cwd);
20220
20444
  stageEnd("route_map", routeStart);
20445
+ this.logger.debug(`Route mapping: ${routePatterns.length} pattern${routePatterns.length === 1 ? "" : "s"} discovered (${stageTimingsMs["route_map"]}ms)`);
20221
20446
  const extractStart = stageStart();
20447
+ this.logger.info("Extracting content...");
20222
20448
  const extractedPages = [];
20223
20449
  for (const sourcePage of sourcePages) {
20224
20450
  const extracted = sourcePage.html ? extractFromHtml(sourcePage.url, sourcePage.html, this.config) : extractFromMarkdown(sourcePage.url, sourcePage.markdown ?? "", sourcePage.title);
@@ -20247,6 +20473,8 @@ var IndexPipeline = class _IndexPipeline {
20247
20473
  uniquePages.push(page);
20248
20474
  }
20249
20475
  stageEnd("extract", extractStart);
20476
+ const skippedPages = sourcePages.length - uniquePages.length;
20477
+ this.logger.info(`Extracted ${uniquePages.length} page${uniquePages.length === 1 ? "" : "s"}${skippedPages > 0 ? ` (${skippedPages} skipped)` : ""} (${stageTimingsMs["extract"]}ms)`);
20250
20478
  const linkStart = stageStart();
20251
20479
  const pageSet = new Set(uniquePages.map((page) => normalizeUrlPath(page.url)));
20252
20480
  const incomingLinkCount = /* @__PURE__ */ new Map();
@@ -20262,7 +20490,9 @@ var IndexPipeline = class _IndexPipeline {
20262
20490
  }
20263
20491
  }
20264
20492
  stageEnd("links", linkStart);
20493
+ this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
20265
20494
  const mirrorStart = stageStart();
20495
+ this.logger.info("Writing mirror pages...");
20266
20496
  const mirrorPages = [];
20267
20497
  let routeExact = 0;
20268
20498
  let routeBestEffort = 0;
@@ -20332,7 +20562,9 @@ var IndexPipeline = class _IndexPipeline {
20332
20562
  await this.vectorStore.upsertPages(pageRecords, scope);
20333
20563
  }
20334
20564
  stageEnd("mirror", mirrorStart);
20565
+ this.logger.info(`Mirrored ${mirrorPages.length} page${mirrorPages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["mirror"]}ms)`);
20335
20566
  const chunkStart = stageStart();
20567
+ this.logger.info("Chunking pages...");
20336
20568
  let chunks = mirrorPages.flatMap((page) => chunkMirrorPage(page, this.config, scope));
20337
20569
  const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
20338
20570
  if (typeof maxChunks === "number") {
@@ -20345,6 +20577,7 @@ var IndexPipeline = class _IndexPipeline {
20345
20577
  });
20346
20578
  }
20347
20579
  stageEnd("chunk", chunkStart);
20580
+ this.logger.info(`Chunked into ${chunks.length} chunk${chunks.length === 1 ? "" : "s"} (${stageTimingsMs["chunk"]}ms)`);
20348
20581
  const currentChunkMap = /* @__PURE__ */ new Map();
20349
20582
  for (const chunk of chunks) {
20350
20583
  currentChunkMap.set(chunk.chunkKey, chunk);
@@ -20363,6 +20596,7 @@ var IndexPipeline = class _IndexPipeline {
20363
20596
  return existingHash !== chunk.contentHash;
20364
20597
  });
20365
20598
  const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
20599
+ this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
20366
20600
  const embedStart = stageStart();
20367
20601
  const chunkTokenEstimates = /* @__PURE__ */ new Map();
20368
20602
  for (const chunk of changedChunks) {
@@ -20377,9 +20611,11 @@ var IndexPipeline = class _IndexPipeline {
20377
20611
  let newEmbeddings = 0;
20378
20612
  const vectorsByChunk = /* @__PURE__ */ new Map();
20379
20613
  if (!options.dryRun && changedChunks.length > 0) {
20614
+ this.logger.info(`Embedding ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} (~${estimatedTokens.toLocaleString()} tokens, ~$${estimatedCostUSD.toFixed(6)})...`);
20380
20615
  const embeddings = await this.embeddings.embedTexts(
20381
20616
  changedChunks.map((chunk) => buildEmbeddingText(chunk, this.config.chunking.prependTitle)),
20382
- this.config.embeddings.model
20617
+ this.config.embeddings.model,
20618
+ "retrieval.passage"
20383
20619
  );
20384
20620
  if (embeddings.length !== changedChunks.length) {
20385
20621
  throw new SearchSocketError(
@@ -20402,8 +20638,14 @@ var IndexPipeline = class _IndexPipeline {
20402
20638
  }
20403
20639
  }
20404
20640
  stageEnd("embedding", embedStart);
20641
+ if (changedChunks.length > 0) {
20642
+ this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
20643
+ } else {
20644
+ this.logger.info("No chunks to embed \u2014 all up to date");
20645
+ }
20405
20646
  const syncStart = stageStart();
20406
20647
  if (!options.dryRun) {
20648
+ this.logger.info("Syncing vectors...");
20407
20649
  const upserts = [];
20408
20650
  for (const chunk of changedChunks) {
20409
20651
  const vector = vectorsByChunk.get(chunk.chunkKey);
@@ -20422,6 +20664,8 @@ var IndexPipeline = class _IndexPipeline {
20422
20664
  sectionTitle: chunk.sectionTitle ?? "",
20423
20665
  headingPath: chunk.headingPath,
20424
20666
  snippet: chunk.snippet,
20667
+ chunkText: chunk.chunkText.slice(0, 4e3),
20668
+ ordinal: chunk.ordinal,
20425
20669
  contentHash: chunk.contentHash,
20426
20670
  modelId: this.config.embeddings.model,
20427
20671
  depth: chunk.depth,
@@ -20441,6 +20685,7 @@ var IndexPipeline = class _IndexPipeline {
20441
20685
  }
20442
20686
  }
20443
20687
  stageEnd("sync", syncStart);
20688
+ this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
20444
20689
  const finalizeStart = stageStart();
20445
20690
  if (!options.dryRun) {
20446
20691
  const scopeInfo = {
@@ -20460,6 +20705,7 @@ var IndexPipeline = class _IndexPipeline {
20460
20705
  });
20461
20706
  }
20462
20707
  stageEnd("finalize", finalizeStart);
20708
+ this.logger.info("Done.");
20463
20709
  return {
20464
20710
  pagesProcessed: mirrorPages.length,
20465
20711
  chunksTotal: chunks.length,