searchsocket 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3,8 +3,7 @@ import path from 'path';
3
3
  import { createJiti } from 'jiti';
4
4
  import { z } from 'zod';
5
5
  import { execSync, spawn } from 'child_process';
6
- import OpenAI from 'openai';
7
- import pLimit from 'p-limit';
6
+ import pLimit2 from 'p-limit';
8
7
  import { createHash } from 'crypto';
9
8
  import { load } from 'cheerio';
10
9
  import matter from 'gray-matter';
@@ -16620,7 +16619,11 @@ var searchSocketConfigSchema = z.object({
16620
16619
  outputDir: z.string().min(1).optional(),
16621
16620
  paramValues: z.record(z.string(), z.array(z.string())).optional(),
16622
16621
  exclude: z.array(z.string()).optional(),
16623
- previewTimeout: z.number().int().positive().optional()
16622
+ previewTimeout: z.number().int().positive().optional(),
16623
+ discover: z.boolean().optional(),
16624
+ seedUrls: z.array(z.string()).optional(),
16625
+ maxPages: z.number().int().positive().optional(),
16626
+ maxDepth: z.number().int().nonnegative().optional()
16624
16627
  }).optional()
16625
16628
  }).optional(),
16626
16629
  extract: z.object({
@@ -16647,8 +16650,9 @@ var searchSocketConfigSchema = z.object({
16647
16650
  pageSummaryChunk: z.boolean().optional()
16648
16651
  }).optional(),
16649
16652
  embeddings: z.object({
16650
- provider: z.literal("openai").optional(),
16653
+ provider: z.literal("jina").optional(),
16651
16654
  model: z.string().min(1).optional(),
16655
+ apiKey: z.string().min(1).optional(),
16652
16656
  apiKeyEnv: z.string().min(1).optional(),
16653
16657
  batchSize: z.number().int().positive().optional(),
16654
16658
  concurrency: z.number().int().positive().optional(),
@@ -16657,18 +16661,17 @@ var searchSocketConfigSchema = z.object({
16657
16661
  vector: z.object({
16658
16662
  dimension: z.number().int().positive().optional(),
16659
16663
  turso: z.object({
16664
+ url: z.string().url().optional(),
16665
+ authToken: z.string().min(1).optional(),
16660
16666
  urlEnv: z.string().optional(),
16661
16667
  authTokenEnv: z.string().optional(),
16662
16668
  localPath: z.string().optional()
16663
16669
  }).optional()
16664
16670
  }).optional(),
16665
16671
  rerank: z.object({
16666
- provider: z.enum(["none", "jina"]).optional(),
16672
+ enabled: z.boolean().optional(),
16667
16673
  topN: z.number().int().positive().optional(),
16668
- jina: z.object({
16669
- apiKeyEnv: z.string().optional(),
16670
- model: z.string().optional()
16671
- }).optional()
16674
+ model: z.string().optional()
16672
16675
  }).optional(),
16673
16676
  ranking: z.object({
16674
16677
  enableIncomingLinkBoost: z.boolean().optional(),
@@ -16677,6 +16680,7 @@ var searchSocketConfigSchema = z.object({
16677
16680
  aggregationCap: z.number().int().positive().optional(),
16678
16681
  aggregationDecay: z.number().min(0).max(1).optional(),
16679
16682
  minChunkScoreRatio: z.number().min(0).max(1).optional(),
16683
+ minScore: z.number().min(0).max(1).optional(),
16680
16684
  weights: z.object({
16681
16685
  incomingLinks: z.number().optional(),
16682
16686
  depth: z.number().optional(),
@@ -16757,9 +16761,9 @@ function createDefaultConfig(projectId) {
16757
16761
  pageSummaryChunk: true
16758
16762
  },
16759
16763
  embeddings: {
16760
- provider: "openai",
16761
- model: "text-embedding-3-small",
16762
- apiKeyEnv: "OPENAI_API_KEY",
16764
+ provider: "jina",
16765
+ model: "jina-embeddings-v3",
16766
+ apiKeyEnv: "JINA_API_KEY",
16763
16767
  batchSize: 64,
16764
16768
  concurrency: 4
16765
16769
  },
@@ -16771,12 +16775,9 @@ function createDefaultConfig(projectId) {
16771
16775
  }
16772
16776
  },
16773
16777
  rerank: {
16774
- provider: "none",
16778
+ enabled: false,
16775
16779
  topN: 20,
16776
- jina: {
16777
- apiKeyEnv: "JINA_API_KEY",
16778
- model: "jina-reranker-v2-base-multilingual"
16779
- }
16780
+ model: "jina-reranker-v2-base-multilingual"
16780
16781
  },
16781
16782
  ranking: {
16782
16783
  enableIncomingLinkBoost: true,
@@ -16785,6 +16786,7 @@ function createDefaultConfig(projectId) {
16785
16786
  aggregationCap: 5,
16786
16787
  aggregationDecay: 0.5,
16787
16788
  minChunkScoreRatio: 0.5,
16789
+ minScore: 0,
16788
16790
  weights: {
16789
16791
  incomingLinks: 0.05,
16790
16792
  depth: 0.03,
@@ -16911,7 +16913,11 @@ ${issues}`
16911
16913
  outputDir: parsed.source.build.outputDir ?? ".svelte-kit/output",
16912
16914
  paramValues: parsed.source.build.paramValues ?? {},
16913
16915
  exclude: parsed.source.build.exclude ?? [],
16914
- previewTimeout: parsed.source.build.previewTimeout ?? 3e4
16916
+ previewTimeout: parsed.source.build.previewTimeout ?? 3e4,
16917
+ discover: parsed.source.build.discover ?? false,
16918
+ seedUrls: parsed.source.build.seedUrls ?? ["/"],
16919
+ maxPages: parsed.source.build.maxPages ?? 200,
16920
+ maxDepth: parsed.source.build.maxDepth ?? 10
16915
16921
  } : void 0
16916
16922
  },
16917
16923
  extract: {
@@ -16940,11 +16946,7 @@ ${issues}`
16940
16946
  },
16941
16947
  rerank: {
16942
16948
  ...defaults.rerank,
16943
- ...parsed.rerank,
16944
- jina: {
16945
- ...defaults.rerank.jina,
16946
- ...parsed.rerank?.jina
16947
- }
16949
+ ...parsed.rerank
16948
16950
  },
16949
16951
  ranking: {
16950
16952
  ...defaults.ranking,
@@ -16991,7 +16993,11 @@ ${issues}`
16991
16993
  outputDir: ".svelte-kit/output",
16992
16994
  paramValues: {},
16993
16995
  exclude: [],
16994
- previewTimeout: 3e4
16996
+ previewTimeout: 3e4,
16997
+ discover: false,
16998
+ seedUrls: ["/"],
16999
+ maxPages: 200,
17000
+ maxDepth: 10
16995
17001
  };
16996
17002
  }
16997
17003
  if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
@@ -17005,6 +17011,21 @@ ${issues}`
17005
17011
  }
17006
17012
  return merged;
17007
17013
  }
17014
+ function mergeConfigServerless(rawConfig) {
17015
+ if (!rawConfig.project?.id) {
17016
+ throw new SearchSocketError(
17017
+ "CONFIG_MISSING",
17018
+ "`project.id` is required for serverless config (cannot infer from package.json)."
17019
+ );
17020
+ }
17021
+ if (!rawConfig.source?.mode) {
17022
+ throw new SearchSocketError(
17023
+ "CONFIG_MISSING",
17024
+ "`source.mode` is required for serverless config (cannot auto-detect from filesystem)."
17025
+ );
17026
+ }
17027
+ return mergeConfig(process.cwd(), rawConfig);
17028
+ }
17008
17029
  async function loadConfig(options = {}) {
17009
17030
  const cwd = path.resolve(options.cwd ?? process.cwd());
17010
17031
  const configPath = path.resolve(cwd, options.configPath ?? "searchsocket.config.ts");
@@ -17027,6 +17048,11 @@ async function loadConfig(options = {}) {
17027
17048
  return mergeConfig(cwd, raw);
17028
17049
  }
17029
17050
 
17051
+ // src/core/serverless.ts
17052
+ function isServerless() {
17053
+ return !!(process.env.VERCEL || process.env.NETLIFY || process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.FUNCTIONS_WORKER || process.env.CF_PAGES);
17054
+ }
17055
+
17030
17056
  // src/utils/text.ts
17031
17057
  function normalizeText(input) {
17032
17058
  return input.replace(/\r\n/g, "\n").replace(/\s+/g, " ").trim();
@@ -17104,10 +17130,11 @@ function sleep(ms) {
17104
17130
  setTimeout(resolve, ms);
17105
17131
  });
17106
17132
  }
17107
- var OpenAIEmbeddingsProvider = class {
17108
- client;
17133
+ var JinaEmbeddingsProvider = class {
17134
+ apiKey;
17109
17135
  batchSize;
17110
17136
  concurrency;
17137
+ defaultTask;
17111
17138
  constructor(options) {
17112
17139
  if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
17113
17140
  throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
@@ -17115,11 +17142,10 @@ var OpenAIEmbeddingsProvider = class {
17115
17142
  if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
17116
17143
  throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
17117
17144
  }
17118
- this.client = new OpenAI({
17119
- apiKey: options.apiKey
17120
- });
17145
+ this.apiKey = options.apiKey;
17121
17146
  this.batchSize = options.batchSize;
17122
17147
  this.concurrency = options.concurrency;
17148
+ this.defaultTask = options.task ?? "retrieval.passage";
17123
17149
  }
17124
17150
  estimateTokens(text) {
17125
17151
  const normalized = text.trim();
@@ -17133,7 +17159,7 @@ var OpenAIEmbeddingsProvider = class {
17133
17159
  const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
17134
17160
  return Math.max(1, Math.max(charEstimate, lexicalEstimate));
17135
17161
  }
17136
- async embedTexts(texts, modelId) {
17162
+ async embedTexts(texts, modelId, task) {
17137
17163
  if (texts.length === 0) {
17138
17164
  return [];
17139
17165
  }
@@ -17145,37 +17171,56 @@ var OpenAIEmbeddingsProvider = class {
17145
17171
  });
17146
17172
  }
17147
17173
  const outputs = new Array(batches.length);
17148
- const limit = pLimit(this.concurrency);
17174
+ const limit = pLimit2(this.concurrency);
17149
17175
  await Promise.all(
17150
17176
  batches.map(
17151
17177
  (batch, position) => limit(async () => {
17152
- outputs[position] = await this.embedWithRetry(batch.values, modelId);
17178
+ outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
17153
17179
  })
17154
17180
  )
17155
17181
  );
17156
17182
  return outputs.flat();
17157
17183
  }
17158
- async embedWithRetry(texts, modelId) {
17184
+ async embedWithRetry(texts, modelId, task) {
17159
17185
  const maxAttempts = 5;
17160
17186
  let attempt = 0;
17161
17187
  while (attempt < maxAttempts) {
17162
17188
  attempt += 1;
17189
+ let response;
17163
17190
  try {
17164
- const response = await this.client.embeddings.create({
17165
- model: modelId,
17166
- input: texts,
17167
- encoding_format: "float"
17191
+ response = await fetch("https://api.jina.ai/v1/embeddings", {
17192
+ method: "POST",
17193
+ headers: {
17194
+ "content-type": "application/json",
17195
+ authorization: `Bearer ${this.apiKey}`
17196
+ },
17197
+ body: JSON.stringify({
17198
+ model: modelId,
17199
+ input: texts,
17200
+ task
17201
+ })
17168
17202
  });
17169
- return response.data.map((entry) => entry.embedding);
17170
17203
  } catch (error) {
17171
- const status = error.status;
17172
- const retryable = status === 429 || typeof status === "number" && status >= 500;
17173
- if (!retryable || attempt >= maxAttempts) {
17204
+ if (attempt >= maxAttempts) {
17174
17205
  throw error;
17175
17206
  }
17176
- const delay = Math.min(2 ** attempt * 300, 5e3);
17177
- await sleep(delay);
17207
+ await sleep(Math.min(2 ** attempt * 300, 5e3));
17208
+ continue;
17209
+ }
17210
+ if (!response.ok) {
17211
+ const retryable = response.status === 429 || response.status >= 500;
17212
+ if (!retryable || attempt >= maxAttempts) {
17213
+ const errorBody = await response.text();
17214
+ throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
17215
+ }
17216
+ await sleep(Math.min(2 ** attempt * 300, 5e3));
17217
+ continue;
17178
17218
  }
17219
+ const payload = await response.json();
17220
+ if (!payload.data || !Array.isArray(payload.data)) {
17221
+ throw new Error("Invalid Jina embeddings response format");
17222
+ }
17223
+ return payload.data.map((entry) => entry.embedding);
17179
17224
  }
17180
17225
  throw new Error("Unreachable retry state");
17181
17226
  }
@@ -17183,20 +17228,20 @@ var OpenAIEmbeddingsProvider = class {
17183
17228
 
17184
17229
  // src/embeddings/factory.ts
17185
17230
  function createEmbeddingsProvider(config) {
17186
- if (config.embeddings.provider !== "openai") {
17231
+ if (config.embeddings.provider !== "jina") {
17187
17232
  throw new SearchSocketError(
17188
17233
  "CONFIG_MISSING",
17189
17234
  `Unsupported embeddings provider ${config.embeddings.provider}`
17190
17235
  );
17191
17236
  }
17192
- const apiKey = process.env[config.embeddings.apiKeyEnv];
17237
+ const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17193
17238
  if (!apiKey) {
17194
17239
  throw new SearchSocketError(
17195
17240
  "CONFIG_MISSING",
17196
- `Missing embeddings API key env var: ${config.embeddings.apiKeyEnv}`
17241
+ `Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
17197
17242
  );
17198
17243
  }
17199
- return new OpenAIEmbeddingsProvider({
17244
+ return new JinaEmbeddingsProvider({
17200
17245
  apiKey,
17201
17246
  batchSize: config.embeddings.batchSize,
17202
17247
  concurrency: config.embeddings.concurrency
@@ -17286,20 +17331,17 @@ var JinaReranker = class {
17286
17331
 
17287
17332
  // src/rerank/factory.ts
17288
17333
  function createReranker(config) {
17289
- if (config.rerank.provider === "none") {
17334
+ if (!config.rerank.enabled) {
17290
17335
  return null;
17291
17336
  }
17292
- if (config.rerank.provider === "jina") {
17293
- const apiKey = process.env[config.rerank.jina.apiKeyEnv];
17294
- if (!apiKey) {
17295
- return null;
17296
- }
17297
- return new JinaReranker({
17298
- apiKey,
17299
- model: config.rerank.jina.model
17300
- });
17337
+ const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17338
+ if (!apiKey) {
17339
+ return null;
17301
17340
  }
17302
- return null;
17341
+ return new JinaReranker({
17342
+ apiKey,
17343
+ model: config.rerank.model
17344
+ });
17303
17345
  }
17304
17346
  function ensureStateDirs(cwd, stateDir, scope) {
17305
17347
  const statePath = path.resolve(cwd, stateDir);
@@ -17352,6 +17394,16 @@ var TursoVectorStore = class {
17352
17394
  }
17353
17395
  async ensureChunks(dim) {
17354
17396
  if (this.chunksReady) return;
17397
+ const exists = await this.chunksTableExists();
17398
+ if (exists) {
17399
+ const currentDim = await this.getChunksDimension();
17400
+ if (currentDim !== null && currentDim !== dim) {
17401
+ await this.client.batch([
17402
+ "DROP INDEX IF EXISTS idx",
17403
+ "DROP TABLE IF EXISTS chunks"
17404
+ ]);
17405
+ }
17406
+ }
17355
17407
  await this.client.batch([
17356
17408
  `CREATE TABLE IF NOT EXISTS chunks (
17357
17409
  id TEXT PRIMARY KEY,
@@ -17363,6 +17415,8 @@ var TursoVectorStore = class {
17363
17415
  section_title TEXT NOT NULL DEFAULT '',
17364
17416
  heading_path TEXT NOT NULL DEFAULT '[]',
17365
17417
  snippet TEXT NOT NULL DEFAULT '',
17418
+ chunk_text TEXT NOT NULL DEFAULT '',
17419
+ ordinal INTEGER NOT NULL DEFAULT 0,
17366
17420
  content_hash TEXT NOT NULL DEFAULT '',
17367
17421
  model_id TEXT NOT NULL DEFAULT '',
17368
17422
  depth INTEGER NOT NULL DEFAULT 0,
@@ -17373,6 +17427,19 @@ var TursoVectorStore = class {
17373
17427
  )`,
17374
17428
  `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17375
17429
  ]);
17430
+ const chunkMigrationCols = [
17431
+ { name: "chunk_text", def: "TEXT NOT NULL DEFAULT ''" },
17432
+ { name: "ordinal", def: "INTEGER NOT NULL DEFAULT 0" }
17433
+ ];
17434
+ for (const col of chunkMigrationCols) {
17435
+ try {
17436
+ await this.client.execute(`ALTER TABLE chunks ADD COLUMN ${col.name} ${col.def}`);
17437
+ } catch (error) {
17438
+ if (error instanceof Error && !error.message.includes("duplicate column")) {
17439
+ throw error;
17440
+ }
17441
+ }
17442
+ }
17376
17443
  this.chunksReady = true;
17377
17444
  }
17378
17445
  async ensurePages() {
@@ -17407,6 +17474,38 @@ var TursoVectorStore = class {
17407
17474
  throw error;
17408
17475
  }
17409
17476
  }
17477
+ /**
17478
+ * Read the current F32_BLOB dimension from the chunks table schema.
17479
+ * Returns null if the table doesn't exist or the dimension can't be parsed.
17480
+ */
17481
+ async getChunksDimension() {
17482
+ try {
17483
+ const rs = await this.client.execute(
17484
+ "SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
17485
+ );
17486
+ if (rs.rows.length === 0) return null;
17487
+ const sql = rs.rows[0].sql;
17488
+ const match = sql.match(/F32_BLOB\((\d+)\)/i);
17489
+ return match ? parseInt(match[1], 10) : null;
17490
+ } catch {
17491
+ return null;
17492
+ }
17493
+ }
17494
+ /**
17495
+ * Drop all SearchSocket tables (chunks, registry, pages) and their indexes.
17496
+ * Used by `clean --remote` for a full reset.
17497
+ */
17498
+ async dropAllTables() {
17499
+ await this.client.batch([
17500
+ "DROP INDEX IF EXISTS idx",
17501
+ "DROP TABLE IF EXISTS chunks",
17502
+ "DROP TABLE IF EXISTS registry",
17503
+ "DROP TABLE IF EXISTS pages"
17504
+ ]);
17505
+ this.chunksReady = false;
17506
+ this.registryReady = false;
17507
+ this.pagesReady = false;
17508
+ }
17410
17509
  async upsert(records, _scope) {
17411
17510
  if (records.length === 0) return;
17412
17511
  const dim = this.dimension ?? records[0].vector.length;
@@ -17417,9 +17516,9 @@ var TursoVectorStore = class {
17417
17516
  const stmts = batch.map((r) => ({
17418
17517
  sql: `INSERT OR REPLACE INTO chunks
17419
17518
  (id, project_id, scope_name, url, path, title, section_title,
17420
- heading_path, snippet, content_hash, model_id, depth,
17519
+ heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17421
17520
  incoming_links, route_file, tags, embedding)
17422
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17521
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17423
17522
  args: [
17424
17523
  r.id,
17425
17524
  r.metadata.projectId,
@@ -17430,6 +17529,8 @@ var TursoVectorStore = class {
17430
17529
  r.metadata.sectionTitle,
17431
17530
  JSON.stringify(r.metadata.headingPath),
17432
17531
  r.metadata.snippet,
17532
+ r.metadata.chunkText,
17533
+ r.metadata.ordinal,
17433
17534
  r.metadata.contentHash,
17434
17535
  r.metadata.modelId,
17435
17536
  r.metadata.depth,
@@ -17448,7 +17549,8 @@ var TursoVectorStore = class {
17448
17549
  const queryJson = JSON.stringify(queryVector);
17449
17550
  const rs = await this.client.execute({
17450
17551
  sql: `SELECT c.id, c.project_id, c.scope_name, c.url, c.path, c.title,
17451
- c.section_title, c.heading_path, c.snippet, c.content_hash,
17552
+ c.section_title, c.heading_path, c.snippet, c.chunk_text,
17553
+ c.ordinal, c.content_hash,
17452
17554
  c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17453
17555
  vector_distance_cos(c.embedding, vector(?)) AS distance
17454
17556
  FROM vector_top_k('idx', vector(?), ?) AS v
@@ -17492,6 +17594,8 @@ var TursoVectorStore = class {
17492
17594
  sectionTitle: row.section_title,
17493
17595
  headingPath: JSON.parse(row.heading_path || "[]"),
17494
17596
  snippet: row.snippet,
17597
+ chunkText: row.chunk_text || "",
17598
+ ordinal: row.ordinal || 0,
17495
17599
  contentHash: row.content_hash,
17496
17600
  modelId: row.model_id,
17497
17601
  depth: row.depth,
@@ -17687,10 +17791,10 @@ var TursoVectorStore = class {
17687
17791
  // src/vector/factory.ts
17688
17792
  async function createVectorStore(config, cwd) {
17689
17793
  const turso = config.vector.turso;
17690
- const remoteUrl = process.env[turso.urlEnv];
17794
+ const remoteUrl = turso.url ?? process.env[turso.urlEnv];
17691
17795
  if (remoteUrl) {
17692
17796
  const { createClient: createClient2 } = await import('@libsql/client/http');
17693
- const authToken = process.env[turso.authTokenEnv];
17797
+ const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
17694
17798
  const client2 = createClient2({
17695
17799
  url: remoteUrl,
17696
17800
  authToken
@@ -17700,6 +17804,12 @@ async function createVectorStore(config, cwd) {
17700
17804
  dimension: config.vector.dimension
17701
17805
  });
17702
17806
  }
17807
+ if (isServerless()) {
17808
+ throw new SearchSocketError(
17809
+ "VECTOR_BACKEND_UNAVAILABLE",
17810
+ `No remote vector database URL found (checked vector.turso.url and env var "${turso.urlEnv}"). Local SQLite storage is not available in serverless environments. Set ${turso.urlEnv} or pass vector.turso.url directly.`
17811
+ );
17812
+ }
17703
17813
  const { createClient } = await import('@libsql/client');
17704
17814
  const localPath = path.resolve(cwd, turso.localPath);
17705
17815
  fs.mkdirSync(path.dirname(localPath), { recursive: true });
@@ -19138,14 +19248,16 @@ function mapUrlToRoute(urlPath, patterns) {
19138
19248
  var Logger = class {
19139
19249
  json;
19140
19250
  verbose;
19251
+ quiet;
19141
19252
  stderrOnly;
19142
19253
  constructor(opts = {}) {
19143
19254
  this.json = opts.json ?? false;
19144
19255
  this.verbose = opts.verbose ?? false;
19256
+ this.quiet = opts.quiet ?? false;
19145
19257
  this.stderrOnly = opts.stderrOnly ?? false;
19146
19258
  }
19147
19259
  info(message) {
19148
- if (this.json) {
19260
+ if (this.quiet || this.json) {
19149
19261
  return;
19150
19262
  }
19151
19263
  this.writeOut(`${message}
@@ -19159,7 +19271,7 @@ var Logger = class {
19159
19271
  this.logJson("debug", { message });
19160
19272
  return;
19161
19273
  }
19162
- this.writeOut(`${message}
19274
+ this.writeOut(` ${message}
19163
19275
  `);
19164
19276
  }
19165
19277
  warn(message) {
@@ -19186,7 +19298,7 @@ var Logger = class {
19186
19298
  this.logJson(event, data);
19187
19299
  return;
19188
19300
  }
19189
- this.writeOut(`[${event}] ${data ? JSON.stringify(data) : ""}
19301
+ this.writeOut(` [${event}] ${data ? JSON.stringify(data) : ""}
19190
19302
  `);
19191
19303
  }
19192
19304
  writeOut(text) {
@@ -19371,11 +19483,108 @@ async function startPreviewServer(cwd, options, logger3) {
19371
19483
 
19372
19484
  // src/indexing/sources/build/index.ts
19373
19485
  var logger = new Logger();
19486
+ function extractLinksFromHtml(html, pageUrl, baseOrigin) {
19487
+ const $ = load(html);
19488
+ const links = [];
19489
+ $("a[href]").each((_i, el) => {
19490
+ const href = $(el).attr("href");
19491
+ if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:") || href.startsWith("javascript:")) {
19492
+ return;
19493
+ }
19494
+ try {
19495
+ const resolved = new URL(href, `${baseOrigin}${pageUrl}`);
19496
+ if (resolved.origin !== baseOrigin) return;
19497
+ if (!["http:", "https:"].includes(resolved.protocol)) return;
19498
+ links.push(normalizeUrlPath(resolved.pathname));
19499
+ } catch {
19500
+ }
19501
+ });
19502
+ return [...new Set(links)];
19503
+ }
19504
+ async function discoverPages(server, buildConfig, pipelineMaxPages) {
19505
+ const { seedUrls, maxDepth, exclude } = buildConfig;
19506
+ const baseOrigin = new URL(server.baseUrl).origin;
19507
+ let effectiveMax = buildConfig.maxPages;
19508
+ if (typeof pipelineMaxPages === "number") {
19509
+ const floored = Math.max(0, Math.floor(pipelineMaxPages));
19510
+ effectiveMax = Math.min(effectiveMax, floored);
19511
+ }
19512
+ if (effectiveMax === 0) return [];
19513
+ const visited = /* @__PURE__ */ new Set();
19514
+ const pages = [];
19515
+ const queue = [];
19516
+ const limit = pLimit2(8);
19517
+ for (const seed of seedUrls) {
19518
+ const normalized = normalizeUrlPath(seed);
19519
+ if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
19520
+ visited.add(normalized);
19521
+ queue.push({ url: normalized, depth: 0 });
19522
+ }
19523
+ }
19524
+ while (queue.length > 0 && pages.length < effectiveMax) {
19525
+ const remaining = effectiveMax - pages.length;
19526
+ const batch = queue.splice(0, remaining);
19527
+ const results = await Promise.allSettled(
19528
+ batch.map(
19529
+ (item) => limit(async () => {
19530
+ const fullUrl = joinUrl(server.baseUrl, item.url);
19531
+ const response = await fetch(fullUrl);
19532
+ if (!response.ok) {
19533
+ logger.warn(`Skipping ${item.url}: ${response.status} ${response.statusText}`);
19534
+ return null;
19535
+ }
19536
+ const contentType = response.headers.get("content-type") ?? "";
19537
+ if (!contentType.includes("text/html")) {
19538
+ return null;
19539
+ }
19540
+ const html = await response.text();
19541
+ if (item.depth < maxDepth) {
19542
+ const links = extractLinksFromHtml(html, item.url, baseOrigin);
19543
+ for (const link of links) {
19544
+ if (!visited.has(link) && !isExcluded(link, exclude)) {
19545
+ visited.add(link);
19546
+ queue.push({ url: link, depth: item.depth + 1 });
19547
+ }
19548
+ }
19549
+ }
19550
+ return {
19551
+ url: item.url,
19552
+ html,
19553
+ sourcePath: fullUrl,
19554
+ outgoingLinks: []
19555
+ };
19556
+ })
19557
+ )
19558
+ );
19559
+ for (const result of results) {
19560
+ if (result.status === "fulfilled" && result.value) {
19561
+ pages.push(result.value);
19562
+ }
19563
+ }
19564
+ }
19565
+ if (pages.length >= effectiveMax && queue.length > 0) {
19566
+ logger.warn(`Discovery crawl reached maxPages limit (${effectiveMax}), ${queue.length} URLs not visited.`);
19567
+ }
19568
+ logger.event("build_discover_complete", {
19569
+ pagesFound: pages.length,
19570
+ urlsVisited: visited.size,
19571
+ urlsSkipped: queue.length
19572
+ });
19573
+ return pages;
19574
+ }
19374
19575
  async function loadBuildPages(cwd, config, maxPages) {
19375
19576
  const buildConfig = config.source.build;
19376
19577
  if (!buildConfig) {
19377
19578
  throw new Error("build source config is missing");
19378
19579
  }
19580
+ if (buildConfig.discover) {
19581
+ const server2 = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
19582
+ try {
19583
+ return await discoverPages(server2, buildConfig, maxPages);
19584
+ } finally {
19585
+ await server2.shutdown();
19586
+ }
19587
+ }
19379
19588
  const routes = await parseManifest(cwd, buildConfig.outputDir);
19380
19589
  const expanded = expandRoutes(routes, buildConfig.paramValues, buildConfig.exclude, logger);
19381
19590
  logger.event("build_routes_discovered", {
@@ -19386,7 +19595,7 @@ async function loadBuildPages(cwd, config, maxPages) {
19386
19595
  const selected = typeof maxCount === "number" ? expanded.slice(0, maxCount) : expanded;
19387
19596
  const server = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
19388
19597
  try {
19389
- const concurrencyLimit = pLimit(8);
19598
+ const concurrencyLimit = pLimit2(8);
19390
19599
  const results = await Promise.allSettled(
19391
19600
  selected.map(
19392
19601
  (route) => concurrencyLimit(async () => {
@@ -19555,7 +19764,7 @@ async function loadCrawledPages(config, maxPages) {
19555
19764
  const routes = await resolveRoutes(config);
19556
19765
  const maxCount = typeof maxPages === "number" ? Math.max(0, Math.floor(maxPages)) : void 0;
19557
19766
  const selected = typeof maxCount === "number" ? routes.slice(0, maxCount) : routes;
19558
- const concurrencyLimit = pLimit(8);
19767
+ const concurrencyLimit = pLimit2(8);
19559
19768
  const results = await Promise.allSettled(
19560
19769
  selected.map(
19561
19770
  (route) => concurrencyLimit(async () => {
@@ -19617,9 +19826,7 @@ function hrTimeMs(start) {
19617
19826
 
19618
19827
  // src/indexing/pipeline.ts
19619
19828
  var EMBEDDING_PRICE_PER_1K_TOKENS_USD = {
19620
- "text-embedding-3-small": 2e-5,
19621
- "text-embedding-3-large": 13e-5,
19622
- "text-embedding-ada-002": 1e-4
19829
+ "jina-embeddings-v3": 2e-5
19623
19830
  };
19624
19831
  var DEFAULT_EMBEDDING_PRICE_PER_1K = 2e-5;
19625
19832
  var IndexPipeline = class _IndexPipeline {
@@ -19665,9 +19872,15 @@ var IndexPipeline = class _IndexPipeline {
19665
19872
  };
19666
19873
  const scope = resolveScope(this.config, options.scopeOverride);
19667
19874
  const { statePath } = ensureStateDirs(this.cwd, this.config.state.dir, scope);
19875
+ const sourceMode = options.sourceOverride ?? this.config.source.mode;
19876
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, model: ${this.config.embeddings.model})`);
19668
19877
  if (options.force) {
19878
+ this.logger.info("Force mode enabled \u2014 full rebuild");
19669
19879
  await cleanMirrorForScope(statePath, scope);
19670
19880
  }
19881
+ if (options.dryRun) {
19882
+ this.logger.info("Dry run \u2014 no writes will be performed");
19883
+ }
19671
19884
  const manifestStart = stageStart();
19672
19885
  const existingHashes = await this.vectorStore.getContentHashes(scope);
19673
19886
  const existingModelId = await this.vectorStore.getScopeModelId(scope);
@@ -19678,8 +19891,9 @@ var IndexPipeline = class _IndexPipeline {
19678
19891
  );
19679
19892
  }
19680
19893
  stageEnd("manifest", manifestStart);
19894
+ this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
19681
19895
  const sourceStart = stageStart();
19682
- const sourceMode = options.sourceOverride ?? this.config.source.mode;
19896
+ this.logger.info(`Loading pages (source: ${sourceMode})...`);
19683
19897
  let sourcePages;
19684
19898
  if (sourceMode === "static-output") {
19685
19899
  sourcePages = await loadStaticOutputPages(this.cwd, this.config, options.maxPages);
@@ -19691,10 +19905,13 @@ var IndexPipeline = class _IndexPipeline {
19691
19905
  sourcePages = await loadContentFilesPages(this.cwd, this.config, options.maxPages);
19692
19906
  }
19693
19907
  stageEnd("source", sourceStart);
19908
+ this.logger.info(`Loaded ${sourcePages.length} page${sourcePages.length === 1 ? "" : "s"} (${stageTimingsMs["source"]}ms)`);
19694
19909
  const routeStart = stageStart();
19695
19910
  const routePatterns = await buildRoutePatterns(this.cwd);
19696
19911
  stageEnd("route_map", routeStart);
19912
+ this.logger.debug(`Route mapping: ${routePatterns.length} pattern${routePatterns.length === 1 ? "" : "s"} discovered (${stageTimingsMs["route_map"]}ms)`);
19697
19913
  const extractStart = stageStart();
19914
+ this.logger.info("Extracting content...");
19698
19915
  const extractedPages = [];
19699
19916
  for (const sourcePage of sourcePages) {
19700
19917
  const extracted = sourcePage.html ? extractFromHtml(sourcePage.url, sourcePage.html, this.config) : extractFromMarkdown(sourcePage.url, sourcePage.markdown ?? "", sourcePage.title);
@@ -19723,6 +19940,8 @@ var IndexPipeline = class _IndexPipeline {
19723
19940
  uniquePages.push(page);
19724
19941
  }
19725
19942
  stageEnd("extract", extractStart);
19943
+ const skippedPages = sourcePages.length - uniquePages.length;
19944
+ this.logger.info(`Extracted ${uniquePages.length} page${uniquePages.length === 1 ? "" : "s"}${skippedPages > 0 ? ` (${skippedPages} skipped)` : ""} (${stageTimingsMs["extract"]}ms)`);
19726
19945
  const linkStart = stageStart();
19727
19946
  const pageSet = new Set(uniquePages.map((page) => normalizeUrlPath(page.url)));
19728
19947
  const incomingLinkCount = /* @__PURE__ */ new Map();
@@ -19738,7 +19957,9 @@ var IndexPipeline = class _IndexPipeline {
19738
19957
  }
19739
19958
  }
19740
19959
  stageEnd("links", linkStart);
19960
+ this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
19741
19961
  const mirrorStart = stageStart();
19962
+ this.logger.info("Writing mirror pages...");
19742
19963
  const mirrorPages = [];
19743
19964
  let routeExact = 0;
19744
19965
  let routeBestEffort = 0;
@@ -19808,7 +20029,9 @@ var IndexPipeline = class _IndexPipeline {
19808
20029
  await this.vectorStore.upsertPages(pageRecords, scope);
19809
20030
  }
19810
20031
  stageEnd("mirror", mirrorStart);
20032
+ this.logger.info(`Mirrored ${mirrorPages.length} page${mirrorPages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["mirror"]}ms)`);
19811
20033
  const chunkStart = stageStart();
20034
+ this.logger.info("Chunking pages...");
19812
20035
  let chunks = mirrorPages.flatMap((page) => chunkMirrorPage(page, this.config, scope));
19813
20036
  const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
19814
20037
  if (typeof maxChunks === "number") {
@@ -19821,6 +20044,7 @@ var IndexPipeline = class _IndexPipeline {
19821
20044
  });
19822
20045
  }
19823
20046
  stageEnd("chunk", chunkStart);
20047
+ this.logger.info(`Chunked into ${chunks.length} chunk${chunks.length === 1 ? "" : "s"} (${stageTimingsMs["chunk"]}ms)`);
19824
20048
  const currentChunkMap = /* @__PURE__ */ new Map();
19825
20049
  for (const chunk of chunks) {
19826
20050
  currentChunkMap.set(chunk.chunkKey, chunk);
@@ -19839,6 +20063,7 @@ var IndexPipeline = class _IndexPipeline {
19839
20063
  return existingHash !== chunk.contentHash;
19840
20064
  });
19841
20065
  const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
20066
+ this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
19842
20067
  const embedStart = stageStart();
19843
20068
  const chunkTokenEstimates = /* @__PURE__ */ new Map();
19844
20069
  for (const chunk of changedChunks) {
@@ -19853,9 +20078,11 @@ var IndexPipeline = class _IndexPipeline {
19853
20078
  let newEmbeddings = 0;
19854
20079
  const vectorsByChunk = /* @__PURE__ */ new Map();
19855
20080
  if (!options.dryRun && changedChunks.length > 0) {
20081
+ this.logger.info(`Embedding ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} (~${estimatedTokens.toLocaleString()} tokens, ~$${estimatedCostUSD.toFixed(6)})...`);
19856
20082
  const embeddings = await this.embeddings.embedTexts(
19857
20083
  changedChunks.map((chunk) => buildEmbeddingText(chunk, this.config.chunking.prependTitle)),
19858
- this.config.embeddings.model
20084
+ this.config.embeddings.model,
20085
+ "retrieval.passage"
19859
20086
  );
19860
20087
  if (embeddings.length !== changedChunks.length) {
19861
20088
  throw new SearchSocketError(
@@ -19878,8 +20105,14 @@ var IndexPipeline = class _IndexPipeline {
19878
20105
  }
19879
20106
  }
19880
20107
  stageEnd("embedding", embedStart);
20108
+ if (changedChunks.length > 0) {
20109
+ this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
20110
+ } else {
20111
+ this.logger.info("No chunks to embed \u2014 all up to date");
20112
+ }
19881
20113
  const syncStart = stageStart();
19882
20114
  if (!options.dryRun) {
20115
+ this.logger.info("Syncing vectors...");
19883
20116
  const upserts = [];
19884
20117
  for (const chunk of changedChunks) {
19885
20118
  const vector = vectorsByChunk.get(chunk.chunkKey);
@@ -19898,6 +20131,8 @@ var IndexPipeline = class _IndexPipeline {
19898
20131
  sectionTitle: chunk.sectionTitle ?? "",
19899
20132
  headingPath: chunk.headingPath,
19900
20133
  snippet: chunk.snippet,
20134
+ chunkText: chunk.chunkText.slice(0, 4e3),
20135
+ ordinal: chunk.ordinal,
19901
20136
  contentHash: chunk.contentHash,
19902
20137
  modelId: this.config.embeddings.model,
19903
20138
  depth: chunk.depth,
@@ -19917,6 +20152,7 @@ var IndexPipeline = class _IndexPipeline {
19917
20152
  }
19918
20153
  }
19919
20154
  stageEnd("sync", syncStart);
20155
+ this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
19920
20156
  const finalizeStart = stageStart();
19921
20157
  if (!options.dryRun) {
19922
20158
  const scopeInfo = {
@@ -19936,6 +20172,7 @@ var IndexPipeline = class _IndexPipeline {
19936
20172
  });
19937
20173
  }
19938
20174
  stageEnd("finalize", finalizeStart);
20175
+ this.logger.info("Done.");
19939
20176
  return {
19940
20177
  pagesProcessed: mirrorPages.length,
19941
20178
  chunksTotal: chunks.length,
@@ -20096,7 +20333,7 @@ var SearchEngine = class _SearchEngine {
20096
20333
  const groupByPage = (input.groupBy ?? "page") === "page";
20097
20334
  const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
20098
20335
  const embedStart = process.hrtime.bigint();
20099
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model);
20336
+ const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
20100
20337
  const queryVector = queryEmbeddings[0];
20101
20338
  if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
20102
20339
  throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
@@ -20124,13 +20361,17 @@ var SearchEngine = class _SearchEngine {
20124
20361
  usedRerank = true;
20125
20362
  }
20126
20363
  let results;
20364
+ const minScore = this.config.ranking.minScore;
20127
20365
  if (groupByPage) {
20128
- const pages = aggregateByPage(ordered, this.config);
20366
+ let pages = aggregateByPage(ordered, this.config);
20367
+ if (minScore > 0) {
20368
+ pages = pages.filter((p) => p.pageScore >= minScore);
20369
+ }
20129
20370
  const minRatio = this.config.ranking.minChunkScoreRatio;
20130
20371
  results = pages.slice(0, topK).map((page) => {
20131
20372
  const bestScore = page.bestChunk.finalScore;
20132
- const minScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
20133
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minScore).slice(0, 5);
20373
+ const minScore2 = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
20374
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minScore2).slice(0, 5);
20134
20375
  return {
20135
20376
  url: page.url,
20136
20377
  title: page.title,
@@ -20147,6 +20388,9 @@ var SearchEngine = class _SearchEngine {
20147
20388
  };
20148
20389
  });
20149
20390
  } else {
20391
+ if (minScore > 0) {
20392
+ ordered = ordered.filter((entry) => entry.finalScore >= minScore);
20393
+ }
20150
20394
  results = ordered.slice(0, topK).map(({ hit, finalScore }) => ({
20151
20395
  url: hit.metadata.url,
20152
20396
  title: hit.metadata.title,
@@ -20218,43 +20462,54 @@ var SearchEngine = class _SearchEngine {
20218
20462
  }
20219
20463
  }
20220
20464
  async rerankHits(query, ranked, topK) {
20221
- if (this.config.rerank.provider !== "jina") {
20465
+ if (!this.config.rerank.enabled) {
20222
20466
  throw new SearchSocketError(
20223
20467
  "INVALID_REQUEST",
20224
- "rerank=true requested but rerank.provider is not configured as 'jina'.",
20468
+ "rerank=true requested but rerank.enabled is not set to true.",
20225
20469
  400
20226
20470
  );
20227
20471
  }
20228
20472
  if (!this.reranker) {
20229
20473
  throw new SearchSocketError(
20230
20474
  "CONFIG_MISSING",
20231
- `rerank=true requested but ${this.config.rerank.jina.apiKeyEnv} is not set.`,
20475
+ `rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
20232
20476
  400
20233
20477
  );
20234
20478
  }
20235
- const candidates = ranked.map(({ hit }) => ({
20236
- id: hit.id,
20237
- text: [hit.metadata.title, hit.metadata.sectionTitle, hit.metadata.snippet].filter(Boolean).join("\n")
20238
- }));
20479
+ const pageGroups = /* @__PURE__ */ new Map();
20480
+ for (const entry of ranked) {
20481
+ const url = entry.hit.metadata.url;
20482
+ const group = pageGroups.get(url);
20483
+ if (group) group.push(entry);
20484
+ else pageGroups.set(url, [entry]);
20485
+ }
20486
+ const pageCandidates = [];
20487
+ for (const [url, chunks] of pageGroups) {
20488
+ const sorted = [...chunks].sort(
20489
+ (a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0)
20490
+ );
20491
+ const title = sorted[0].hit.metadata.title;
20492
+ const body = sorted.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
20493
+ pageCandidates.push({ id: url, text: `${title}
20494
+
20495
+ ${body}` });
20496
+ }
20239
20497
  const reranked = await this.reranker.rerank(
20240
20498
  query,
20241
- candidates,
20499
+ pageCandidates,
20242
20500
  Math.max(topK, this.config.rerank.topN)
20243
20501
  );
20244
- const rerankScoreById = new Map(reranked.map((entry) => [entry.id, entry.score]));
20502
+ const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
20245
20503
  return ranked.map((entry) => {
20246
- const rerankScore = rerankScoreById.get(entry.hit.id);
20247
- const safeBaseScore = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
20248
- if (rerankScore === void 0 || !Number.isFinite(rerankScore)) {
20249
- return {
20250
- ...entry,
20251
- finalScore: safeBaseScore
20252
- };
20504
+ const pageScore = scoreByUrl.get(entry.hit.metadata.url);
20505
+ const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
20506
+ if (pageScore === void 0 || !Number.isFinite(pageScore)) {
20507
+ return { ...entry, finalScore: base };
20253
20508
  }
20254
- const combinedScore = rerankScore * this.config.ranking.weights.rerank + safeBaseScore * 1e-3;
20509
+ const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
20255
20510
  return {
20256
20511
  ...entry,
20257
- finalScore: Number.isFinite(combinedScore) ? combinedScore : safeBaseScore
20512
+ finalScore: Number.isFinite(combined) ? combined : base
20258
20513
  };
20259
20514
  }).sort((a, b) => {
20260
20515
  const delta = b.finalScore - a.finalScore;
@@ -20452,13 +20707,21 @@ function searchsocketHandle(options = {}) {
20452
20707
  let rateLimiter = null;
20453
20708
  const getConfig = async () => {
20454
20709
  if (!configPromise) {
20455
- const configP = options.config ? Promise.resolve(options.config) : loadConfig({
20456
- cwd: options.cwd,
20457
- configPath: options.configPath
20458
- });
20710
+ let configP;
20711
+ if (options.config) {
20712
+ configP = Promise.resolve(options.config);
20713
+ } else if (options.rawConfig) {
20714
+ const cwd = options.cwd ?? process.cwd();
20715
+ configP = Promise.resolve(mergeConfig(cwd, options.rawConfig));
20716
+ } else {
20717
+ configP = loadConfig({
20718
+ cwd: options.cwd,
20719
+ configPath: options.configPath
20720
+ });
20721
+ }
20459
20722
  configPromise = configP.then((config) => {
20460
20723
  apiPath = apiPath ?? config.api.path;
20461
- if (config.api.rateLimit) {
20724
+ if (config.api.rateLimit && !isServerless()) {
20462
20725
  rateLimiter = new InMemoryRateLimiter(config.api.rateLimit.windowMs, config.api.rateLimit.max);
20463
20726
  }
20464
20727
  return config;
@@ -20468,10 +20731,9 @@ function searchsocketHandle(options = {}) {
20468
20731
  };
20469
20732
  const getEngine = async () => {
20470
20733
  if (!enginePromise) {
20471
- const config = options.config;
20734
+ const config = await getConfig();
20472
20735
  enginePromise = SearchEngine.create({
20473
20736
  cwd: options.cwd,
20474
- configPath: options.configPath,
20475
20737
  config
20476
20738
  });
20477
20739
  }
@@ -20737,6 +20999,6 @@ function createSearchClient(options = {}) {
20737
20999
  *)
20738
21000
  */
20739
21001
 
20740
- export { IndexPipeline, JinaReranker, SearchEngine, createEmbeddingsProvider, createReranker, createSearchClient, createVectorStore, loadConfig, mergeConfig, resolveScope, runMcpServer, searchsocketHandle, searchsocketVitePlugin };
21002
+ export { IndexPipeline, JinaReranker, SearchEngine, createEmbeddingsProvider, createReranker, createSearchClient, createVectorStore, isServerless, loadConfig, mergeConfig, mergeConfigServerless, resolveScope, runMcpServer, searchsocketHandle, searchsocketVitePlugin };
20741
21003
  //# sourceMappingURL=index.js.map
20742
21004
  //# sourceMappingURL=index.js.map