searchsocket 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -5,8 +5,7 @@ var path = require('path');
5
5
  var jiti = require('jiti');
6
6
  var zod = require('zod');
7
7
  var child_process = require('child_process');
8
- var OpenAI = require('openai');
9
- var pLimit = require('p-limit');
8
+ var pLimit2 = require('p-limit');
10
9
  var crypto = require('crypto');
11
10
  var cheerio = require('cheerio');
12
11
  var matter = require('gray-matter');
@@ -23,8 +22,7 @@ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
23
22
 
24
23
  var fs__default = /*#__PURE__*/_interopDefault(fs);
25
24
  var path__default = /*#__PURE__*/_interopDefault(path);
26
- var OpenAI__default = /*#__PURE__*/_interopDefault(OpenAI);
27
- var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
25
+ var pLimit2__default = /*#__PURE__*/_interopDefault(pLimit2);
28
26
  var matter__default = /*#__PURE__*/_interopDefault(matter);
29
27
  var fs4__default = /*#__PURE__*/_interopDefault(fs4);
30
28
  var fg__default = /*#__PURE__*/_interopDefault(fg);
@@ -16633,7 +16631,11 @@ var searchSocketConfigSchema = zod.z.object({
16633
16631
  outputDir: zod.z.string().min(1).optional(),
16634
16632
  paramValues: zod.z.record(zod.z.string(), zod.z.array(zod.z.string())).optional(),
16635
16633
  exclude: zod.z.array(zod.z.string()).optional(),
16636
- previewTimeout: zod.z.number().int().positive().optional()
16634
+ previewTimeout: zod.z.number().int().positive().optional(),
16635
+ discover: zod.z.boolean().optional(),
16636
+ seedUrls: zod.z.array(zod.z.string()).optional(),
16637
+ maxPages: zod.z.number().int().positive().optional(),
16638
+ maxDepth: zod.z.number().int().nonnegative().optional()
16637
16639
  }).optional()
16638
16640
  }).optional(),
16639
16641
  extract: zod.z.object({
@@ -16660,8 +16662,9 @@ var searchSocketConfigSchema = zod.z.object({
16660
16662
  pageSummaryChunk: zod.z.boolean().optional()
16661
16663
  }).optional(),
16662
16664
  embeddings: zod.z.object({
16663
- provider: zod.z.literal("openai").optional(),
16665
+ provider: zod.z.literal("jina").optional(),
16664
16666
  model: zod.z.string().min(1).optional(),
16667
+ apiKey: zod.z.string().min(1).optional(),
16665
16668
  apiKeyEnv: zod.z.string().min(1).optional(),
16666
16669
  batchSize: zod.z.number().int().positive().optional(),
16667
16670
  concurrency: zod.z.number().int().positive().optional(),
@@ -16670,18 +16673,17 @@ var searchSocketConfigSchema = zod.z.object({
16670
16673
  vector: zod.z.object({
16671
16674
  dimension: zod.z.number().int().positive().optional(),
16672
16675
  turso: zod.z.object({
16676
+ url: zod.z.string().url().optional(),
16677
+ authToken: zod.z.string().min(1).optional(),
16673
16678
  urlEnv: zod.z.string().optional(),
16674
16679
  authTokenEnv: zod.z.string().optional(),
16675
16680
  localPath: zod.z.string().optional()
16676
16681
  }).optional()
16677
16682
  }).optional(),
16678
16683
  rerank: zod.z.object({
16679
- provider: zod.z.enum(["none", "jina"]).optional(),
16684
+ enabled: zod.z.boolean().optional(),
16680
16685
  topN: zod.z.number().int().positive().optional(),
16681
- jina: zod.z.object({
16682
- apiKeyEnv: zod.z.string().optional(),
16683
- model: zod.z.string().optional()
16684
- }).optional()
16686
+ model: zod.z.string().optional()
16685
16687
  }).optional(),
16686
16688
  ranking: zod.z.object({
16687
16689
  enableIncomingLinkBoost: zod.z.boolean().optional(),
@@ -16690,6 +16692,7 @@ var searchSocketConfigSchema = zod.z.object({
16690
16692
  aggregationCap: zod.z.number().int().positive().optional(),
16691
16693
  aggregationDecay: zod.z.number().min(0).max(1).optional(),
16692
16694
  minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
16695
+ minScore: zod.z.number().min(0).max(1).optional(),
16693
16696
  weights: zod.z.object({
16694
16697
  incomingLinks: zod.z.number().optional(),
16695
16698
  depth: zod.z.number().optional(),
@@ -16770,9 +16773,9 @@ function createDefaultConfig(projectId) {
16770
16773
  pageSummaryChunk: true
16771
16774
  },
16772
16775
  embeddings: {
16773
- provider: "openai",
16774
- model: "text-embedding-3-small",
16775
- apiKeyEnv: "OPENAI_API_KEY",
16776
+ provider: "jina",
16777
+ model: "jina-embeddings-v3",
16778
+ apiKeyEnv: "JINA_API_KEY",
16776
16779
  batchSize: 64,
16777
16780
  concurrency: 4
16778
16781
  },
@@ -16784,12 +16787,9 @@ function createDefaultConfig(projectId) {
16784
16787
  }
16785
16788
  },
16786
16789
  rerank: {
16787
- provider: "none",
16790
+ enabled: false,
16788
16791
  topN: 20,
16789
- jina: {
16790
- apiKeyEnv: "JINA_API_KEY",
16791
- model: "jina-reranker-v2-base-multilingual"
16792
- }
16792
+ model: "jina-reranker-v2-base-multilingual"
16793
16793
  },
16794
16794
  ranking: {
16795
16795
  enableIncomingLinkBoost: true,
@@ -16798,6 +16798,7 @@ function createDefaultConfig(projectId) {
16798
16798
  aggregationCap: 5,
16799
16799
  aggregationDecay: 0.5,
16800
16800
  minChunkScoreRatio: 0.5,
16801
+ minScore: 0,
16801
16802
  weights: {
16802
16803
  incomingLinks: 0.05,
16803
16804
  depth: 0.03,
@@ -16924,7 +16925,11 @@ ${issues}`
16924
16925
  outputDir: parsed.source.build.outputDir ?? ".svelte-kit/output",
16925
16926
  paramValues: parsed.source.build.paramValues ?? {},
16926
16927
  exclude: parsed.source.build.exclude ?? [],
16927
- previewTimeout: parsed.source.build.previewTimeout ?? 3e4
16928
+ previewTimeout: parsed.source.build.previewTimeout ?? 3e4,
16929
+ discover: parsed.source.build.discover ?? false,
16930
+ seedUrls: parsed.source.build.seedUrls ?? ["/"],
16931
+ maxPages: parsed.source.build.maxPages ?? 200,
16932
+ maxDepth: parsed.source.build.maxDepth ?? 10
16928
16933
  } : void 0
16929
16934
  },
16930
16935
  extract: {
@@ -16953,11 +16958,7 @@ ${issues}`
16953
16958
  },
16954
16959
  rerank: {
16955
16960
  ...defaults.rerank,
16956
- ...parsed.rerank,
16957
- jina: {
16958
- ...defaults.rerank.jina,
16959
- ...parsed.rerank?.jina
16960
- }
16961
+ ...parsed.rerank
16961
16962
  },
16962
16963
  ranking: {
16963
16964
  ...defaults.ranking,
@@ -17004,7 +17005,11 @@ ${issues}`
17004
17005
  outputDir: ".svelte-kit/output",
17005
17006
  paramValues: {},
17006
17007
  exclude: [],
17007
- previewTimeout: 3e4
17008
+ previewTimeout: 3e4,
17009
+ discover: false,
17010
+ seedUrls: ["/"],
17011
+ maxPages: 200,
17012
+ maxDepth: 10
17008
17013
  };
17009
17014
  }
17010
17015
  if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
@@ -17018,6 +17023,21 @@ ${issues}`
17018
17023
  }
17019
17024
  return merged;
17020
17025
  }
17026
+ function mergeConfigServerless(rawConfig) {
17027
+ if (!rawConfig.project?.id) {
17028
+ throw new SearchSocketError(
17029
+ "CONFIG_MISSING",
17030
+ "`project.id` is required for serverless config (cannot infer from package.json)."
17031
+ );
17032
+ }
17033
+ if (!rawConfig.source?.mode) {
17034
+ throw new SearchSocketError(
17035
+ "CONFIG_MISSING",
17036
+ "`source.mode` is required for serverless config (cannot auto-detect from filesystem)."
17037
+ );
17038
+ }
17039
+ return mergeConfig(process.cwd(), rawConfig);
17040
+ }
17021
17041
  async function loadConfig(options = {}) {
17022
17042
  const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
17023
17043
  const configPath = path__default.default.resolve(cwd, options.configPath ?? "searchsocket.config.ts");
@@ -17040,6 +17060,11 @@ async function loadConfig(options = {}) {
17040
17060
  return mergeConfig(cwd, raw);
17041
17061
  }
17042
17062
 
17063
+ // src/core/serverless.ts
17064
+ function isServerless() {
17065
+ return !!(process.env.VERCEL || process.env.NETLIFY || process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.FUNCTIONS_WORKER || process.env.CF_PAGES);
17066
+ }
17067
+
17043
17068
  // src/utils/text.ts
17044
17069
  function normalizeText(input) {
17045
17070
  return input.replace(/\r\n/g, "\n").replace(/\s+/g, " ").trim();
@@ -17117,10 +17142,11 @@ function sleep(ms) {
17117
17142
  setTimeout(resolve, ms);
17118
17143
  });
17119
17144
  }
17120
- var OpenAIEmbeddingsProvider = class {
17121
- client;
17145
+ var JinaEmbeddingsProvider = class {
17146
+ apiKey;
17122
17147
  batchSize;
17123
17148
  concurrency;
17149
+ defaultTask;
17124
17150
  constructor(options) {
17125
17151
  if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
17126
17152
  throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
@@ -17128,11 +17154,10 @@ var OpenAIEmbeddingsProvider = class {
17128
17154
  if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
17129
17155
  throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
17130
17156
  }
17131
- this.client = new OpenAI__default.default({
17132
- apiKey: options.apiKey
17133
- });
17157
+ this.apiKey = options.apiKey;
17134
17158
  this.batchSize = options.batchSize;
17135
17159
  this.concurrency = options.concurrency;
17160
+ this.defaultTask = options.task ?? "retrieval.passage";
17136
17161
  }
17137
17162
  estimateTokens(text) {
17138
17163
  const normalized = text.trim();
@@ -17146,7 +17171,7 @@ var OpenAIEmbeddingsProvider = class {
17146
17171
  const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
17147
17172
  return Math.max(1, Math.max(charEstimate, lexicalEstimate));
17148
17173
  }
17149
- async embedTexts(texts, modelId) {
17174
+ async embedTexts(texts, modelId, task) {
17150
17175
  if (texts.length === 0) {
17151
17176
  return [];
17152
17177
  }
@@ -17158,37 +17183,56 @@ var OpenAIEmbeddingsProvider = class {
17158
17183
  });
17159
17184
  }
17160
17185
  const outputs = new Array(batches.length);
17161
- const limit = pLimit__default.default(this.concurrency);
17186
+ const limit = pLimit2__default.default(this.concurrency);
17162
17187
  await Promise.all(
17163
17188
  batches.map(
17164
17189
  (batch, position) => limit(async () => {
17165
- outputs[position] = await this.embedWithRetry(batch.values, modelId);
17190
+ outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
17166
17191
  })
17167
17192
  )
17168
17193
  );
17169
17194
  return outputs.flat();
17170
17195
  }
17171
- async embedWithRetry(texts, modelId) {
17196
+ async embedWithRetry(texts, modelId, task) {
17172
17197
  const maxAttempts = 5;
17173
17198
  let attempt = 0;
17174
17199
  while (attempt < maxAttempts) {
17175
17200
  attempt += 1;
17201
+ let response;
17176
17202
  try {
17177
- const response = await this.client.embeddings.create({
17178
- model: modelId,
17179
- input: texts,
17180
- encoding_format: "float"
17203
+ response = await fetch("https://api.jina.ai/v1/embeddings", {
17204
+ method: "POST",
17205
+ headers: {
17206
+ "content-type": "application/json",
17207
+ authorization: `Bearer ${this.apiKey}`
17208
+ },
17209
+ body: JSON.stringify({
17210
+ model: modelId,
17211
+ input: texts,
17212
+ task
17213
+ })
17181
17214
  });
17182
- return response.data.map((entry) => entry.embedding);
17183
17215
  } catch (error) {
17184
- const status = error.status;
17185
- const retryable = status === 429 || typeof status === "number" && status >= 500;
17186
- if (!retryable || attempt >= maxAttempts) {
17216
+ if (attempt >= maxAttempts) {
17187
17217
  throw error;
17188
17218
  }
17189
- const delay = Math.min(2 ** attempt * 300, 5e3);
17190
- await sleep(delay);
17219
+ await sleep(Math.min(2 ** attempt * 300, 5e3));
17220
+ continue;
17221
+ }
17222
+ if (!response.ok) {
17223
+ const retryable = response.status === 429 || response.status >= 500;
17224
+ if (!retryable || attempt >= maxAttempts) {
17225
+ const errorBody = await response.text();
17226
+ throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
17227
+ }
17228
+ await sleep(Math.min(2 ** attempt * 300, 5e3));
17229
+ continue;
17191
17230
  }
17231
+ const payload = await response.json();
17232
+ if (!payload.data || !Array.isArray(payload.data)) {
17233
+ throw new Error("Invalid Jina embeddings response format");
17234
+ }
17235
+ return payload.data.map((entry) => entry.embedding);
17192
17236
  }
17193
17237
  throw new Error("Unreachable retry state");
17194
17238
  }
@@ -17196,20 +17240,20 @@ var OpenAIEmbeddingsProvider = class {
17196
17240
 
17197
17241
  // src/embeddings/factory.ts
17198
17242
  function createEmbeddingsProvider(config) {
17199
- if (config.embeddings.provider !== "openai") {
17243
+ if (config.embeddings.provider !== "jina") {
17200
17244
  throw new SearchSocketError(
17201
17245
  "CONFIG_MISSING",
17202
17246
  `Unsupported embeddings provider ${config.embeddings.provider}`
17203
17247
  );
17204
17248
  }
17205
- const apiKey = process.env[config.embeddings.apiKeyEnv];
17249
+ const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17206
17250
  if (!apiKey) {
17207
17251
  throw new SearchSocketError(
17208
17252
  "CONFIG_MISSING",
17209
- `Missing embeddings API key env var: ${config.embeddings.apiKeyEnv}`
17253
+ `Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
17210
17254
  );
17211
17255
  }
17212
- return new OpenAIEmbeddingsProvider({
17256
+ return new JinaEmbeddingsProvider({
17213
17257
  apiKey,
17214
17258
  batchSize: config.embeddings.batchSize,
17215
17259
  concurrency: config.embeddings.concurrency
@@ -17299,20 +17343,17 @@ var JinaReranker = class {
17299
17343
 
17300
17344
  // src/rerank/factory.ts
17301
17345
  function createReranker(config) {
17302
- if (config.rerank.provider === "none") {
17346
+ if (!config.rerank.enabled) {
17303
17347
  return null;
17304
17348
  }
17305
- if (config.rerank.provider === "jina") {
17306
- const apiKey = process.env[config.rerank.jina.apiKeyEnv];
17307
- if (!apiKey) {
17308
- return null;
17309
- }
17310
- return new JinaReranker({
17311
- apiKey,
17312
- model: config.rerank.jina.model
17313
- });
17349
+ const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17350
+ if (!apiKey) {
17351
+ return null;
17314
17352
  }
17315
- return null;
17353
+ return new JinaReranker({
17354
+ apiKey,
17355
+ model: config.rerank.model
17356
+ });
17316
17357
  }
17317
17358
  function ensureStateDirs(cwd, stateDir, scope) {
17318
17359
  const statePath = path__default.default.resolve(cwd, stateDir);
@@ -17365,6 +17406,16 @@ var TursoVectorStore = class {
17365
17406
  }
17366
17407
  async ensureChunks(dim) {
17367
17408
  if (this.chunksReady) return;
17409
+ const exists = await this.chunksTableExists();
17410
+ if (exists) {
17411
+ const currentDim = await this.getChunksDimension();
17412
+ if (currentDim !== null && currentDim !== dim) {
17413
+ await this.client.batch([
17414
+ "DROP INDEX IF EXISTS idx",
17415
+ "DROP TABLE IF EXISTS chunks"
17416
+ ]);
17417
+ }
17418
+ }
17368
17419
  await this.client.batch([
17369
17420
  `CREATE TABLE IF NOT EXISTS chunks (
17370
17421
  id TEXT PRIMARY KEY,
@@ -17376,6 +17427,8 @@ var TursoVectorStore = class {
17376
17427
  section_title TEXT NOT NULL DEFAULT '',
17377
17428
  heading_path TEXT NOT NULL DEFAULT '[]',
17378
17429
  snippet TEXT NOT NULL DEFAULT '',
17430
+ chunk_text TEXT NOT NULL DEFAULT '',
17431
+ ordinal INTEGER NOT NULL DEFAULT 0,
17379
17432
  content_hash TEXT NOT NULL DEFAULT '',
17380
17433
  model_id TEXT NOT NULL DEFAULT '',
17381
17434
  depth INTEGER NOT NULL DEFAULT 0,
@@ -17386,6 +17439,19 @@ var TursoVectorStore = class {
17386
17439
  )`,
17387
17440
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17388
17441
  ]);
17442
+ const chunkMigrationCols = [
17443
+ { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
17444
+ { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
17445
+ ];
17446
+ for (const col of chunkMigrationCols) {
17447
+ try {
17448
+ await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
17449
+ } catch (error) {
17450
+ if (error instanceof Error && !error.message.includes("duplicate column")) {
17451
+ throw error;
17452
+ }
17453
+ }
17454
+ }
17389
17455
  this.chunksReady = true;
17390
17456
  }
17391
17457
  async ensurePages() {
@@ -17420,6 +17486,38 @@ var TursoVectorStore = class {
17420
17486
  throw error;
17421
17487
  }
17422
17488
  }
17489
+ /**
17490
+ * Read the current F32_BLOB dimension from the chunks table schema.
17491
+ * Returns null if the table doesn't exist or the dimension can't be parsed.
17492
+ */
17493
+ async getChunksDimension() {
17494
+ try {
17495
+ const rs = await this.client.execute(
17496
+ "SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
17497
+ );
17498
+ if (rs.rows.length === 0) return null;
17499
+ const sql = rs.rows[0].sql;
17500
+ const match = sql.match(/F32_BLOB\((\d+)\)/i);
17501
+ return match ? parseInt(match[1], 10) : null;
17502
+ } catch {
17503
+ return null;
17504
+ }
17505
+ }
17506
+ /**
17507
+ * Drop all SearchSocket tables (chunks, registry, pages) and their indexes.
17508
+ * Used by `clean --remote` for a full reset.
17509
+ */
17510
+ async dropAllTables() {
17511
+ await this.client.batch([
17512
+ "DROP INDEX IF EXISTS idx",
17513
+ "DROP TABLE IF EXISTS chunks",
17514
+ "DROP TABLE IF EXISTS registry",
17515
+ "DROP TABLE IF EXISTS pages"
17516
+ ]);
17517
+ this.chunksReady = false;
17518
+ this.registryReady = false;
17519
+ this.pagesReady = false;
17520
+ }
17423
17521
  async upsert(records, _scope) {
17424
17522
  if (records.length === 0) return;
17425
17523
  const dim = this.dimension ?? records[0].vector.length;
@@ -17430,9 +17528,9 @@ var TursoVectorStore = class {
17430
17528
  const stmts = batch.map((r) => ({
17431
17529
  sql: `INSERT OR REPLACE INTO chunks
17432
17530
  (id, project_id, scope_name, url, path, title, section_title,
17433
- heading_path, snippet, content_hash, model_id, depth,
17531
+ heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17434
17532
  incoming_links, route_file, tags, embedding)
17435
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17533
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17436
17534
  args: [
17437
17535
  r.id,
17438
17536
  r.metadata.projectId,
@@ -17443,6 +17541,8 @@ var TursoVectorStore = class {
17443
17541
  r.metadata.sectionTitle,
17444
17542
  JSON.stringify(r.metadata.headingPath),
17445
17543
  r.metadata.snippet,
17544
+ r.metadata.chunkText,
17545
+ r.metadata.ordinal,
17446
17546
  r.metadata.contentHash,
17447
17547
  r.metadata.modelId,
17448
17548
  r.metadata.depth,
@@ -17461,7 +17561,8 @@ var TursoVectorStore = class {
17461
17561
  const queryJson = JSON.stringify(queryVector);
17462
17562
  const rs = await this.client.execute({
17463
17563
  sql: `SELECT c.id, c.project_id, c.scope_name, c.url, c.path, c.title,
17464
- c.section_title, c.heading_path, c.snippet, c.content_hash,
17564
+ c.section_title, c.heading_path, c.snippet, c.chunk_text,
17565
+ c.ordinal, c.content_hash,
17465
17566
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17466
17567
  vector_distance_cos(c.embedding, vector(?)) AS distance
17467
17568
  FROM vector_top_k('idx', vector(?), ?) AS v
@@ -17505,6 +17606,8 @@ var TursoVectorStore = class {
17505
17606
  sectionTitle: row.section_title,
17506
17607
  headingPath: JSON.parse(row.heading_path || "[]"),
17507
17608
  snippet: row.snippet,
17609
+ chunkText: row.chunk_text || "",
17610
+ ordinal: row.ordinal || 0,
17508
17611
  contentHash: row.content_hash,
17509
17612
  modelId: row.model_id,
17510
17613
  depth: row.depth,
@@ -17700,10 +17803,10 @@ var TursoVectorStore = class {
17700
17803
  // src/vector/factory.ts
17701
17804
  async function createVectorStore(config, cwd) {
17702
17805
  const turso = config.vector.turso;
17703
- const remoteUrl = process.env[turso.urlEnv];
17806
+ const remoteUrl = turso.url ?? process.env[turso.urlEnv];
17704
17807
  if (remoteUrl) {
17705
17808
  const { createClient: createClient2 } = await import('@libsql/client/http');
17706
- const authToken = process.env[turso.authTokenEnv];
17809
+ const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
17707
17810
  const client2 = createClient2({
17708
17811
  url: remoteUrl,
17709
17812
  authToken
@@ -17713,6 +17816,12 @@ async function createVectorStore(config, cwd) {
17713
17816
  dimension: config.vector.dimension
17714
17817
  });
17715
17818
  }
17819
+ if (isServerless()) {
17820
+ throw new SearchSocketError(
17821
+ "VECTOR_BACKEND_UNAVAILABLE",
17822
+ `No remote vector database URL found (checked vector.turso.url and env var "${turso.urlEnv}"). Local SQLite storage is not available in serverless environments. Set ${turso.urlEnv} or pass vector.turso.url directly.`
17823
+ );
17824
+ }
17716
17825
  const { createClient } = await import('@libsql/client');
17717
17826
  const localPath = path__default.default.resolve(cwd, turso.localPath);
17718
17827
  fs__default.default.mkdirSync(path__default.default.dirname(localPath), { recursive: true });
@@ -19151,14 +19260,16 @@ function mapUrlToRoute(urlPath, patterns) {
19151
19260
  var Logger = class {
19152
19261
  json;
19153
19262
  verbose;
19263
+ quiet;
19154
19264
  stderrOnly;
19155
19265
  constructor(opts = {}) {
19156
19266
  this.json = opts.json ?? false;
19157
19267
  this.verbose = opts.verbose ?? false;
19268
+ this.quiet = opts.quiet ?? false;
19158
19269
  this.stderrOnly = opts.stderrOnly ?? false;
19159
19270
  }
19160
19271
  info(message) {
19161
- if (this.json) {
19272
+ if (this.quiet || this.json) {
19162
19273
  return;
19163
19274
  }
19164
19275
  this.writeOut(`${message}
@@ -19172,7 +19283,7 @@ var Logger = class {
19172
19283
  this.logJson("debug", { message });
19173
19284
  return;
19174
19285
  }
19175
- this.writeOut(`${message}
19286
+ this.writeOut(` ${message}
19176
19287
  `);
19177
19288
  }
19178
19289
  warn(message) {
@@ -19199,7 +19310,7 @@ var Logger = class {
19199
19310
  this.logJson(event, data);
19200
19311
  return;
19201
19312
  }
19202
- this.writeOut(`[${event}] ${data ? JSON.stringify(data) : ""}
19313
+ this.writeOut(` [${event}] ${data ? JSON.stringify(data) : ""}
19203
19314
  `);
19204
19315
  }
19205
19316
  writeOut(text) {
@@ -19384,11 +19495,108 @@ async function startPreviewServer(cwd, options, logger3) {
19384
19495
 
19385
19496
  // src/indexing/sources/build/index.ts
19386
19497
  var logger = new Logger();
19498
+ function extractLinksFromHtml(html, pageUrl, baseOrigin) {
19499
+ const $ = cheerio.load(html);
19500
+ const links = [];
19501
+ $("a[href]").each((_i, el) => {
19502
+ const href = $(el).attr("href");
19503
+ if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:") || href.startsWith("javascript:")) {
19504
+ return;
19505
+ }
19506
+ try {
19507
+ const resolved = new URL(href, `${baseOrigin}${pageUrl}`);
19508
+ if (resolved.origin !== baseOrigin) return;
19509
+ if (!["http:", "https:"].includes(resolved.protocol)) return;
19510
+ links.push(normalizeUrlPath(resolved.pathname));
19511
+ } catch {
19512
+ }
19513
+ });
19514
+ return [...new Set(links)];
19515
+ }
19516
+ async function discoverPages(server, buildConfig, pipelineMaxPages) {
19517
+ const { seedUrls, maxDepth, exclude } = buildConfig;
19518
+ const baseOrigin = new URL(server.baseUrl).origin;
19519
+ let effectiveMax = buildConfig.maxPages;
19520
+ if (typeof pipelineMaxPages === "number") {
19521
+ const floored = Math.max(0, Math.floor(pipelineMaxPages));
19522
+ effectiveMax = Math.min(effectiveMax, floored);
19523
+ }
19524
+ if (effectiveMax === 0) return [];
19525
+ const visited = /* @__PURE__ */ new Set();
19526
+ const pages = [];
19527
+ const queue = [];
19528
+ const limit = pLimit2__default.default(8);
19529
+ for (const seed of seedUrls) {
19530
+ const normalized = normalizeUrlPath(seed);
19531
+ if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
19532
+ visited.add(normalized);
19533
+ queue.push({ url: normalized, depth: 0 });
19534
+ }
19535
+ }
19536
+ while (queue.length > 0 && pages.length < effectiveMax) {
19537
+ const remaining = effectiveMax - pages.length;
19538
+ const batch = queue.splice(0, remaining);
19539
+ const results = await Promise.allSettled(
19540
+ batch.map(
19541
+ (item) => limit(async () => {
19542
+ const fullUrl = joinUrl(server.baseUrl, item.url);
19543
+ const response = await fetch(fullUrl);
19544
+ if (!response.ok) {
19545
+ logger.warn(`Skipping ${item.url}: ${response.status} ${response.statusText}`);
19546
+ return null;
19547
+ }
19548
+ const contentType = response.headers.get("content-type") ?? "";
19549
+ if (!contentType.includes("text/html")) {
19550
+ return null;
19551
+ }
19552
+ const html = await response.text();
19553
+ if (item.depth < maxDepth) {
19554
+ const links = extractLinksFromHtml(html, item.url, baseOrigin);
19555
+ for (const link of links) {
19556
+ if (!visited.has(link) && !isExcluded(link, exclude)) {
19557
+ visited.add(link);
19558
+ queue.push({ url: link, depth: item.depth + 1 });
19559
+ }
19560
+ }
19561
+ }
19562
+ return {
19563
+ url: item.url,
19564
+ html,
19565
+ sourcePath: fullUrl,
19566
+ outgoingLinks: []
19567
+ };
19568
+ })
19569
+ )
19570
+ );
19571
+ for (const result of results) {
19572
+ if (result.status === "fulfilled" && result.value) {
19573
+ pages.push(result.value);
19574
+ }
19575
+ }
19576
+ }
19577
+ if (pages.length >= effectiveMax && queue.length > 0) {
19578
+ logger.warn(`Discovery crawl reached maxPages limit (${effectiveMax}), ${queue.length} URLs not visited.`);
19579
+ }
19580
+ logger.event("build_discover_complete", {
19581
+ pagesFound: pages.length,
19582
+ urlsVisited: visited.size,
19583
+ urlsSkipped: queue.length
19584
+ });
19585
+ return pages;
19586
+ }
19387
19587
  async function loadBuildPages(cwd, config, maxPages) {
19388
19588
  const buildConfig = config.source.build;
19389
19589
  if (!buildConfig) {
19390
19590
  throw new Error("build source config is missing");
19391
19591
  }
19592
+ if (buildConfig.discover) {
19593
+ const server2 = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
19594
+ try {
19595
+ return await discoverPages(server2, buildConfig, maxPages);
19596
+ } finally {
19597
+ await server2.shutdown();
19598
+ }
19599
+ }
19392
19600
  const routes = await parseManifest(cwd, buildConfig.outputDir);
19393
19601
  const expanded = expandRoutes(routes, buildConfig.paramValues, buildConfig.exclude, logger);
19394
19602
  logger.event("build_routes_discovered", {
@@ -19399,7 +19607,7 @@ async function loadBuildPages(cwd, config, maxPages) {
19399
19607
  const selected = typeof maxCount === "number" ? expanded.slice(0, maxCount) : expanded;
19400
19608
  const server = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
19401
19609
  try {
19402
- const concurrencyLimit = pLimit__default.default(8);
19610
+ const concurrencyLimit = pLimit2__default.default(8);
19403
19611
  const results = await Promise.allSettled(
19404
19612
  selected.map(
19405
19613
  (route) => concurrencyLimit(async () => {
@@ -19568,7 +19776,7 @@ async function loadCrawledPages(config, maxPages) {
19568
19776
  const routes = await resolveRoutes(config);
19569
19777
  const maxCount = typeof maxPages === "number" ? Math.max(0, Math.floor(maxPages)) : void 0;
19570
19778
  const selected = typeof maxCount === "number" ? routes.slice(0, maxCount) : routes;
19571
- const concurrencyLimit = pLimit__default.default(8);
19779
+ const concurrencyLimit = pLimit2__default.default(8);
19572
19780
  const results = await Promise.allSettled(
19573
19781
  selected.map(
19574
19782
  (route) => concurrencyLimit(async () => {
@@ -19630,9 +19838,7 @@ function hrTimeMs(start) {
19630
19838
 
19631
19839
  // src/indexing/pipeline.ts
19632
19840
  var EMBEDDING_PRICE_PER_1K_TOKENS_USD = {
19633
- "text-embedding-3-small": 2e-5,
19634
- "text-embedding-3-large": 13e-5,
19635
- "text-embedding-ada-002": 1e-4
19841
+ "jina-embeddings-v3": 2e-5
19636
19842
  };
19637
19843
  var DEFAULT_EMBEDDING_PRICE_PER_1K = 2e-5;
19638
19844
  var IndexPipeline = class _IndexPipeline {
@@ -19678,9 +19884,15 @@ var IndexPipeline = class _IndexPipeline {
19678
19884
  };
19679
19885
  const scope = resolveScope(this.config, options.scopeOverride);
19680
19886
  const { statePath } = ensureStateDirs(this.cwd, this.config.state.dir, scope);
19887
+ const sourceMode = options.sourceOverride ?? this.config.source.mode;
19888
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, model: ${this.config.embeddings.model})`);
19681
19889
  if (options.force) {
19890
+ this.logger.info("Force mode enabled \u2014 full rebuild");
19682
19891
  await cleanMirrorForScope(statePath, scope);
19683
19892
  }
19893
+ if (options.dryRun) {
19894
+ this.logger.info("Dry run \u2014 no writes will be performed");
19895
+ }
19684
19896
  const manifestStart = stageStart();
19685
19897
  const existingHashes = await this.vectorStore.getContentHashes(scope);
19686
19898
  const existingModelId = await this.vectorStore.getScopeModelId(scope);
@@ -19691,8 +19903,9 @@ var IndexPipeline = class _IndexPipeline {
19691
19903
  );
19692
19904
  }
19693
19905
  stageEnd("manifest", manifestStart);
19906
+ this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
19694
19907
  const sourceStart = stageStart();
19695
- const sourceMode = options.sourceOverride ?? this.config.source.mode;
19908
+ this.logger.info(`Loading pages (source: ${sourceMode})...`);
19696
19909
  let sourcePages;
19697
19910
  if (sourceMode === "static-output") {
19698
19911
  sourcePages = await loadStaticOutputPages(this.cwd, this.config, options.maxPages);
@@ -19704,10 +19917,13 @@ var IndexPipeline = class _IndexPipeline {
19704
19917
  sourcePages = await loadContentFilesPages(this.cwd, this.config, options.maxPages);
19705
19918
  }
19706
19919
  stageEnd("source", sourceStart);
19920
+ this.logger.info(`Loaded ${sourcePages.length} page${sourcePages.length === 1 ? "" : "s"} (${stageTimingsMs["source"]}ms)`);
19707
19921
  const routeStart = stageStart();
19708
19922
  const routePatterns = await buildRoutePatterns(this.cwd);
19709
19923
  stageEnd("route_map", routeStart);
19924
+ this.logger.debug(`Route mapping: ${routePatterns.length} pattern${routePatterns.length === 1 ? "" : "s"} discovered (${stageTimingsMs["route_map"]}ms)`);
19710
19925
  const extractStart = stageStart();
19926
+ this.logger.info("Extracting content...");
19711
19927
  const extractedPages = [];
19712
19928
  for (const sourcePage of sourcePages) {
19713
19929
  const extracted = sourcePage.html ? extractFromHtml(sourcePage.url, sourcePage.html, this.config) : extractFromMarkdown(sourcePage.url, sourcePage.markdown ?? "", sourcePage.title);
@@ -19736,6 +19952,8 @@ var IndexPipeline = class _IndexPipeline {
19736
19952
  uniquePages.push(page);
19737
19953
  }
19738
19954
  stageEnd("extract", extractStart);
19955
+ const skippedPages = sourcePages.length - uniquePages.length;
19956
+ this.logger.info(`Extracted ${uniquePages.length} page${uniquePages.length === 1 ? "" : "s"}${skippedPages > 0 ? ` (${skippedPages} skipped)` : ""} (${stageTimingsMs["extract"]}ms)`);
19739
19957
  const linkStart = stageStart();
19740
19958
  const pageSet = new Set(uniquePages.map((page) => normalizeUrlPath(page.url)));
19741
19959
  const incomingLinkCount = /* @__PURE__ */ new Map();
@@ -19751,7 +19969,9 @@ var IndexPipeline = class _IndexPipeline {
19751
19969
  }
19752
19970
  }
19753
19971
  stageEnd("links", linkStart);
19972
+ this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
19754
19973
  const mirrorStart = stageStart();
19974
+ this.logger.info("Writing mirror pages...");
19755
19975
  const mirrorPages = [];
19756
19976
  let routeExact = 0;
19757
19977
  let routeBestEffort = 0;
@@ -19821,7 +20041,9 @@ var IndexPipeline = class _IndexPipeline {
19821
20041
  await this.vectorStore.upsertPages(pageRecords, scope);
19822
20042
  }
19823
20043
  stageEnd("mirror", mirrorStart);
20044
+ this.logger.info(`Mirrored ${mirrorPages.length} page${mirrorPages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["mirror"]}ms)`);
19824
20045
  const chunkStart = stageStart();
20046
+ this.logger.info("Chunking pages...");
19825
20047
  let chunks = mirrorPages.flatMap((page) => chunkMirrorPage(page, this.config, scope));
19826
20048
  const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
19827
20049
  if (typeof maxChunks === "number") {
@@ -19834,6 +20056,7 @@ var IndexPipeline = class _IndexPipeline {
19834
20056
  });
19835
20057
  }
19836
20058
  stageEnd("chunk", chunkStart);
20059
+ this.logger.info(`Chunked into ${chunks.length} chunk${chunks.length === 1 ? "" : "s"} (${stageTimingsMs["chunk"]}ms)`);
19837
20060
  const currentChunkMap = /* @__PURE__ */ new Map();
19838
20061
  for (const chunk of chunks) {
19839
20062
  currentChunkMap.set(chunk.chunkKey, chunk);
@@ -19852,6 +20075,7 @@ var IndexPipeline = class _IndexPipeline {
19852
20075
  return existingHash !== chunk.contentHash;
19853
20076
  });
19854
20077
  const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
20078
+ this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
19855
20079
  const embedStart = stageStart();
19856
20080
  const chunkTokenEstimates = /* @__PURE__ */ new Map();
19857
20081
  for (const chunk of changedChunks) {
@@ -19866,9 +20090,11 @@ var IndexPipeline = class _IndexPipeline {
19866
20090
  let newEmbeddings = 0;
19867
20091
  const vectorsByChunk = /* @__PURE__ */ new Map();
19868
20092
  if (!options.dryRun && changedChunks.length > 0) {
20093
+ this.logger.info(`Embedding ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} (~${estimatedTokens.toLocaleString()} tokens, ~$${estimatedCostUSD.toFixed(6)})...`);
19869
20094
  const embeddings = await this.embeddings.embedTexts(
19870
20095
  changedChunks.map((chunk) => buildEmbeddingText(chunk, this.config.chunking.prependTitle)),
19871
- this.config.embeddings.model
20096
+ this.config.embeddings.model,
20097
+ "retrieval.passage"
19872
20098
  );
19873
20099
  if (embeddings.length !== changedChunks.length) {
19874
20100
  throw new SearchSocketError(
@@ -19891,8 +20117,14 @@ var IndexPipeline = class _IndexPipeline {
19891
20117
  }
19892
20118
  }
19893
20119
  stageEnd("embedding", embedStart);
20120
+ if (changedChunks.length > 0) {
20121
+ this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
20122
+ } else {
20123
+ this.logger.info("No chunks to embed \u2014 all up to date");
20124
+ }
19894
20125
  const syncStart = stageStart();
19895
20126
  if (!options.dryRun) {
20127
+ this.logger.info("Syncing vectors...");
19896
20128
  const upserts = [];
19897
20129
  for (const chunk of changedChunks) {
19898
20130
  const vector = vectorsByChunk.get(chunk.chunkKey);
@@ -19911,6 +20143,8 @@ var IndexPipeline = class _IndexPipeline {
19911
20143
  sectionTitle: chunk.sectionTitle ?? "",
19912
20144
  headingPath: chunk.headingPath,
19913
20145
  snippet: chunk.snippet,
20146
+ chunkText: chunk.chunkText.slice(0, 4e3),
20147
+ ordinal: chunk.ordinal,
19914
20148
  contentHash: chunk.contentHash,
19915
20149
  modelId: this.config.embeddings.model,
19916
20150
  depth: chunk.depth,
@@ -19930,6 +20164,7 @@ var IndexPipeline = class _IndexPipeline {
19930
20164
  }
19931
20165
  }
19932
20166
  stageEnd("sync", syncStart);
20167
+ this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
19933
20168
  const finalizeStart = stageStart();
19934
20169
  if (!options.dryRun) {
19935
20170
  const scopeInfo = {
@@ -19949,6 +20184,7 @@ var IndexPipeline = class _IndexPipeline {
19949
20184
  });
19950
20185
  }
19951
20186
  stageEnd("finalize", finalizeStart);
20187
+ this.logger.info("Done.");
19952
20188
  return {
19953
20189
  pagesProcessed: mirrorPages.length,
19954
20190
  chunksTotal: chunks.length,
@@ -20109,7 +20345,7 @@ var SearchEngine = class _SearchEngine {
20109
20345
  const groupByPage = (input.groupBy ?? "page") === "page";
20110
20346
  const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
20111
20347
  const embedStart = process.hrtime.bigint();
20112
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model);
20348
+ const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
20113
20349
  const queryVector = queryEmbeddings[0];
20114
20350
  if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
20115
20351
  throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
@@ -20137,13 +20373,17 @@ var SearchEngine = class _SearchEngine {
20137
20373
  usedRerank = true;
20138
20374
  }
20139
20375
  let results;
20376
+ const minScore = this.config.ranking.minScore;
20140
20377
  if (groupByPage) {
20141
- const pages = aggregateByPage(ordered, this.config);
20378
+ let pages = aggregateByPage(ordered, this.config);
20379
+ if (minScore > 0) {
20380
+ pages = pages.filter((p) => p.pageScore >= minScore);
20381
+ }
20142
20382
  const minRatio = this.config.ranking.minChunkScoreRatio;
20143
20383
  results = pages.slice(0, topK).map((page) => {
20144
20384
  const bestScore = page.bestChunk.finalScore;
20145
- const minScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
20146
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minScore).slice(0, 5);
20385
+ const minScore2 = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
20386
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minScore2).slice(0, 5);
20147
20387
  return {
20148
20388
  url: page.url,
20149
20389
  title: page.title,
@@ -20160,6 +20400,9 @@ var SearchEngine = class _SearchEngine {
20160
20400
  };
20161
20401
  });
20162
20402
  } else {
20403
+ if (minScore > 0) {
20404
+ ordered = ordered.filter((entry) => entry.finalScore >= minScore);
20405
+ }
20163
20406
  results = ordered.slice(0, topK).map(({ hit, finalScore }) => ({
20164
20407
  url: hit.metadata.url,
20165
20408
  title: hit.metadata.title,
@@ -20231,43 +20474,54 @@ var SearchEngine = class _SearchEngine {
20231
20474
  }
20232
20475
  }
20233
20476
  async rerankHits(query, ranked, topK) {
20234
- if (this.config.rerank.provider !== "jina") {
20477
+ if (!this.config.rerank.enabled) {
20235
20478
  throw new SearchSocketError(
20236
20479
  "INVALID_REQUEST",
20237
- "rerank=true requested but rerank.provider is not configured as 'jina'.",
20480
+ "rerank=true requested but rerank.enabled is not set to true.",
20238
20481
  400
20239
20482
  );
20240
20483
  }
20241
20484
  if (!this.reranker) {
20242
20485
  throw new SearchSocketError(
20243
20486
  "CONFIG_MISSING",
20244
- `rerank=true requested but ${this.config.rerank.jina.apiKeyEnv} is not set.`,
20487
+ `rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
20245
20488
  400
20246
20489
  );
20247
20490
  }
20248
- const candidates = ranked.map(({ hit }) => ({
20249
- id: hit.id,
20250
- text: [hit.metadata.title, hit.metadata.sectionTitle, hit.metadata.snippet].filter(Boolean).join("\n")
20251
- }));
20491
+ const pageGroups = /* @__PURE__ */ new Map();
20492
+ for (const entry of ranked) {
20493
+ const url = entry.hit.metadata.url;
20494
+ const group = pageGroups.get(url);
20495
+ if (group) group.push(entry);
20496
+ else pageGroups.set(url, [entry]);
20497
+ }
20498
+ const pageCandidates = [];
20499
+ for (const [url, chunks] of pageGroups) {
20500
+ const sorted = [...chunks].sort(
20501
+ (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
20502
+ );
20503
+ const title = sorted[0].hit.metadata.title;
20504
+ const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
20505
+ pageCandidates.push({ id: url, text: `${title}
20506
+
20507
+ ${body}` });
20508
+ }
20252
20509
  const reranked = await this.reranker.rerank(
20253
20510
  query,
20254
- candidates,
20511
+ pageCandidates,
20255
20512
  Math.max(topK, this.config.rerank.topN)
20256
20513
  );
20257
- const rerankScoreById = new Map(reranked.map((entry) => [entry.id, entry.score]));
20514
+ const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
20258
20515
  return ranked.map((entry) => {
20259
- const rerankScore = rerankScoreById.get(entry.hit.id);
20260
- const safeBaseScore = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
20261
- if (rerankScore === void 0 || !Number.isFinite(rerankScore)) {
20262
- return {
20263
- ...entry,
20264
- finalScore: safeBaseScore
20265
- };
20516
+ const pageScore = scoreByUrl.get(entry.hit.metadata.url);
20517
+ const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
20518
+ if (pageScore === void 0 || !Number.isFinite(pageScore)) {
20519
+ return { ...entry, finalScore: base };
20266
20520
  }
20267
- const combinedScore = rerankScore * this.config.ranking.weights.rerank + safeBaseScore * 1e-3;
20521
+ const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
20268
20522
  return {
20269
20523
  ...entry,
20270
- finalScore: Number.isFinite(combinedScore) ? combinedScore : safeBaseScore
20524
+ finalScore: Number.isFinite(combined) ? combined : base
20271
20525
  };
20272
20526
  }).sort((a, b) => {
20273
20527
  const delta = b.finalScore - a.finalScore;
@@ -20465,13 +20719,21 @@ function searchsocketHandle(options = {}) {
20465
20719
  let rateLimiter = null;
20466
20720
  const getConfig = async () => {
20467
20721
  if (!configPromise) {
20468
- const configP = options.config ? Promise.resolve(options.config) : loadConfig({
20469
- cwd: options.cwd,
20470
- configPath: options.configPath
20471
- });
20722
+ let configP;
20723
+ if (options.config) {
20724
+ configP = Promise.resolve(options.config);
20725
+ } else if (options.rawConfig) {
20726
+ const cwd = options.cwd ?? process.cwd();
20727
+ configP = Promise.resolve(mergeConfig(cwd, options.rawConfig));
20728
+ } else {
20729
+ configP = loadConfig({
20730
+ cwd: options.cwd,
20731
+ configPath: options.configPath
20732
+ });
20733
+ }
20472
20734
  configPromise = configP.then((config) => {
20473
20735
  apiPath = apiPath ?? config.api.path;
20474
- if (config.api.rateLimit) {
20736
+ if (config.api.rateLimit && !isServerless()) {
20475
20737
  rateLimiter = new InMemoryRateLimiter(config.api.rateLimit.windowMs, config.api.rateLimit.max);
20476
20738
  }
20477
20739
  return config;
@@ -20481,10 +20743,9 @@ function searchsocketHandle(options = {}) {
20481
20743
  };
20482
20744
  const getEngine = async () => {
20483
20745
  if (!enginePromise) {
20484
- const config = options.config;
20746
+ const config = await getConfig();
20485
20747
  enginePromise = SearchEngine.create({
20486
20748
  cwd: options.cwd,
20487
- configPath: options.configPath,
20488
20749
  config
20489
20750
  });
20490
20751
  }
@@ -20757,8 +21018,10 @@ exports.createEmbeddingsProvider = createEmbeddingsProvider;
20757
21018
  exports.createReranker = createReranker;
20758
21019
  exports.createSearchClient = createSearchClient;
20759
21020
  exports.createVectorStore = createVectorStore;
21021
+ exports.isServerless = isServerless;
20760
21022
  exports.loadConfig = loadConfig;
20761
21023
  exports.mergeConfig = mergeConfig;
21024
+ exports.mergeConfigServerless = mergeConfigServerless;
20762
21025
  exports.resolveScope = resolveScope;
20763
21026
  exports.runMcpServer = runMcpServer;
20764
21027
  exports.searchsocketHandle = searchsocketHandle;