@tryformation/querylight-cli 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
- export declare const PACKAGE_NAME = "@tryformation/querylight-cli";
2
- export declare const PACKAGE_VERSION = "0.2.3";
1
+ export declare const PACKAGE_NAME: string;
2
+ export declare const PACKAGE_VERSION: string;
3
3
  export declare const DEFAULT_WORKSPACE = ".kb";
4
4
  export declare const DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
5
5
  export declare const LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
package/dist/index.d.ts CHANGED
@@ -6,6 +6,7 @@ export * from "./ingest/ingest-service.js";
6
6
  export * from "./chunk/chunker.js";
7
7
  export * from "./index/querylight-indexer.js";
8
8
  export * from "./query/search-service.js";
9
+ export * from "./server/search-api.js";
9
10
  export * from "./query/related-service.js";
10
11
  export * from "./query/context-builder.js";
11
12
  export * from "./report/diff-service.js";
package/dist/index.js CHANGED
@@ -22,6 +22,11 @@ import path from "path";
22
22
  import YAML from "yaml";
23
23
 
24
24
  // src/core/constants.ts
25
+ import { createRequire } from "module";
26
+ var require2 = createRequire(import.meta.url);
27
+ var packageJson = require2("../../package.json");
28
+ var PACKAGE_NAME = packageJson.name;
29
+ var PACKAGE_VERSION = packageJson.version;
25
30
  var DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
26
31
  var LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
27
32
 
@@ -53,11 +58,14 @@ var defaultConfig = () => ({
53
58
  maxContextChars: 12e3,
54
59
  citationStyle: "markdown"
55
60
  },
61
+ search: {
62
+ defaultTopK: 50
63
+ },
56
64
  retrieval: {
57
65
  defaultMode: "lexical",
58
66
  dense: {
59
67
  enabled: true,
60
- modelId: "Xenova/all-MiniLM-L6-v2",
68
+ modelId: "Xenova/paraphrase-MiniLM-L3-v2",
61
69
  cacheDir: DEFAULT_SHARED_MODEL_CACHE_DIR,
62
70
  indexHashTables: 8,
63
71
  indexRandomSeed: 42,
@@ -65,7 +73,7 @@ var defaultConfig = () => ({
65
73
  },
66
74
  sparse: {
67
75
  enabled: true,
68
- modelId: "opensearch-project/opensearch-neural-sparse-encoding-doc-v3-distill",
76
+ modelId: "opensearch-project/opensearch-neural-sparse-encoding-doc-v2-mini",
69
77
  cacheDir: DEFAULT_SHARED_MODEL_CACHE_DIR,
70
78
  documentTopTokens: 128,
71
79
  queryEncoding: "tokenizer-token-weights",
@@ -74,12 +82,12 @@ var defaultConfig = () => ({
74
82
  }
75
83
  },
76
84
  crawler: {
77
- defaultUserAgent: "querylight-cli/0.1",
85
+ defaultUserAgent: "querylight-cli",
78
86
  obeyRobotsTxt: true,
79
87
  rateLimitMs: 1e3,
80
88
  maxConcurrentRequests: 5,
81
89
  renderJs: false,
82
- retentionDays: 365,
90
+ retentionDays: 30,
83
91
  fetchArticles: true
84
92
  },
85
93
  limits: {
@@ -123,6 +131,10 @@ async function loadConfig(workspacePath, configPath) {
123
131
  ...defaults.rag,
124
132
  ...parsed.rag ?? {}
125
133
  },
134
+ search: {
135
+ ...defaults.search,
136
+ ...parsed.search ?? {}
137
+ },
126
138
  retrieval: {
127
139
  ...defaults.retrieval,
128
140
  ...parsed.retrieval ?? {},
@@ -1069,7 +1081,7 @@ async function fetchUrlDocument({
1069
1081
  publicationDate
1070
1082
  }) {
1071
1083
  const headers = {
1072
- "user-agent": source.crawl?.userAgent ?? "querylight-cli/0.1"
1084
+ "user-agent": source.crawl?.userAgent ?? "querylight-cli"
1073
1085
  };
1074
1086
  if (previous?.httpCache?.etag) {
1075
1087
  headers["if-none-match"] = previous.httpCache.etag;
@@ -1213,13 +1225,17 @@ function isAllowed(url, baseUrl, includePatterns, excludePatterns, disallowRules
1213
1225
  if (url.search.length > 0) {
1214
1226
  return false;
1215
1227
  }
1216
- if (url.pathname.endsWith(".xml")) {
1228
+ const pathname = url.pathname.toLowerCase();
1229
+ if (pathname.endsWith(".xml")) {
1217
1230
  return false;
1218
1231
  }
1219
- if (url.pathname.includes("/cdn-cgi/")) {
1232
+ if (pathname.endsWith(".pdf")) {
1220
1233
  return false;
1221
1234
  }
1222
- if (url.pathname === "/search" || url.pathname === "/search/" || url.pathname.endsWith("/search/")) {
1235
+ if (pathname.includes("/cdn-cgi/")) {
1236
+ return false;
1237
+ }
1238
+ if (pathname === "/search" || pathname === "/search/" || pathname.endsWith("/search/")) {
1223
1239
  return false;
1224
1240
  }
1225
1241
  if (disallowRules.some((rule) => rule !== "/" && url.pathname.startsWith(rule))) {
@@ -1364,7 +1380,7 @@ async function purgeDocuments(workspacePath, documentIds, documents) {
1364
1380
  async function fetchFeedText(source) {
1365
1381
  const response = await fetch(source.uri, {
1366
1382
  headers: {
1367
- "user-agent": source.crawl?.userAgent ?? "querylight-cli/0.1"
1383
+ "user-agent": source.crawl?.userAgent ?? "querylight-cli"
1368
1384
  }
1369
1385
  });
1370
1386
  if (!response.ok) {
@@ -2058,15 +2074,26 @@ function createSparseChunkText(chunk) {
2058
2074
  // src/vector/dense.ts
2059
2075
  var denseEmbedderFactory = null;
2060
2076
  var EXACT_DENSE_RERANK_THRESHOLD = 5e3;
2077
+ function normalizeDenseEmbedder(embedder) {
2078
+ if (typeof embedder === "function") {
2079
+ return { embed: embedder };
2080
+ }
2081
+ return embedder;
2082
+ }
2061
2083
  async function createEmbedder(cacheDir, modelId) {
2062
2084
  if (denseEmbedderFactory) {
2063
- return denseEmbedderFactory(cacheDir, modelId);
2085
+ return normalizeDenseEmbedder(await denseEmbedderFactory(cacheDir, modelId));
2064
2086
  }
2065
2087
  const runtime = await getDenseTransformersRuntime(cacheDir);
2066
2088
  const extractor = await runtime.pipeline("feature-extraction", modelId);
2067
- return async (text) => {
2068
- const output = await extractor(text, { pooling: "mean", normalize: true });
2069
- return output.tolist()[0];
2089
+ return {
2090
+ async embed(text) {
2091
+ const output = await extractor(text, { pooling: "mean", normalize: true });
2092
+ return output.tolist()[0];
2093
+ },
2094
+ async dispose() {
2095
+ await extractor.dispose();
2096
+ }
2070
2097
  };
2071
2098
  }
2072
2099
  function exactDenseQuery(payload, vector, topK) {
@@ -2080,53 +2107,57 @@ async function buildDenseVectors({
2080
2107
  const chunks = await readJsonl(path14.join(workspacePath, "chunks", "chunks.jsonl"));
2081
2108
  const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
2082
2109
  await mkdir7(cacheDir, { recursive: true });
2083
- const embed = await createEmbedder(cacheDir, config.modelId);
2084
- const records = [];
2085
- let dimensions = 0;
2086
- reportProgress(progress, `Encoding ${chunks.length} chunk${chunks.length === 1 ? "" : "s"} for dense retrieval`);
2087
- for (const chunk of chunks) {
2088
- const embedding = await embed(createDenseChunkText(chunk));
2089
- dimensions ||= embedding.length;
2090
- records.push({
2091
- chunkId: chunk.id,
2092
- documentId: chunk.documentId,
2093
- sourceId: chunk.sourceId,
2094
- title: chunk.title,
2095
- uri: chunk.uri,
2096
- headingPath: chunk.headingPath,
2097
- text: chunk.text,
2098
- embedding
2099
- });
2100
- if (records.length === 1 || records.length % 100 === 0 || records.length === chunks.length) {
2101
- reportProgressDetail(progress, `Encoded ${records.length}/${chunks.length} chunks for dense retrieval`);
2110
+ const embedder = await createEmbedder(cacheDir, config.modelId);
2111
+ try {
2112
+ const records = [];
2113
+ let dimensions = 0;
2114
+ reportProgress(progress, `Encoding ${chunks.length} chunk${chunks.length === 1 ? "" : "s"} for dense retrieval`);
2115
+ for (const chunk of chunks) {
2116
+ const embedding = await embedder.embed(createDenseChunkText(chunk));
2117
+ dimensions ||= embedding.length;
2118
+ records.push({
2119
+ chunkId: chunk.id,
2120
+ documentId: chunk.documentId,
2121
+ sourceId: chunk.sourceId,
2122
+ title: chunk.title,
2123
+ uri: chunk.uri,
2124
+ headingPath: chunk.headingPath,
2125
+ text: chunk.text,
2126
+ embedding
2127
+ });
2128
+ if (records.length === 1 || records.length % 100 === 0 || records.length === chunks.length) {
2129
+ reportProgressDetail(progress, `Encoded ${records.length}/${chunks.length} chunks for dense retrieval`);
2130
+ }
2102
2131
  }
2132
+ reportProgress(progress, "Building dense vector index");
2133
+ const index = new VectorFieldIndex({
2134
+ numHashTables: config.indexHashTables,
2135
+ dimensions,
2136
+ random: createSeededRandom(config.indexRandomSeed)
2137
+ });
2138
+ for (const record of records) {
2139
+ index.insert(record.chunkId, [record.embedding]);
2140
+ }
2141
+ const metadata = {
2142
+ createdAt: (/* @__PURE__ */ new Date()).toISOString(),
2143
+ modelId: config.modelId,
2144
+ dimensions,
2145
+ hashTables: config.indexHashTables,
2146
+ randomSeed: config.indexRandomSeed,
2147
+ chunkCount: records.length,
2148
+ indexHash: sha256(JSON.stringify(index.indexState))
2149
+ };
2150
+ const payload = {
2151
+ metadata,
2152
+ indexState: index.indexState,
2153
+ chunks: records
2154
+ };
2155
+ await writeDensePayload(workspacePath, payload);
2156
+ reportProgress(progress, `Dense vectors written for ${records.length} chunk${records.length === 1 ? "" : "s"}`);
2157
+ return payload;
2158
+ } finally {
2159
+ await embedder.dispose?.();
2103
2160
  }
2104
- reportProgress(progress, "Building dense vector index");
2105
- const index = new VectorFieldIndex({
2106
- numHashTables: config.indexHashTables,
2107
- dimensions,
2108
- random: createSeededRandom(config.indexRandomSeed)
2109
- });
2110
- for (const record of records) {
2111
- index.insert(record.chunkId, [record.embedding]);
2112
- }
2113
- const metadata = {
2114
- createdAt: (/* @__PURE__ */ new Date()).toISOString(),
2115
- modelId: config.modelId,
2116
- dimensions,
2117
- hashTables: config.indexHashTables,
2118
- randomSeed: config.indexRandomSeed,
2119
- chunkCount: records.length,
2120
- indexHash: sha256(JSON.stringify(index.indexState))
2121
- };
2122
- const payload = {
2123
- metadata,
2124
- indexState: index.indexState,
2125
- chunks: records
2126
- };
2127
- await writeDensePayload(workspacePath, payload);
2128
- reportProgress(progress, `Dense vectors written for ${records.length} chunk${records.length === 1 ? "" : "s"}`);
2129
- return payload;
2130
2161
  }
2131
2162
  async function denseQuery({
2132
2163
  workspacePath,
@@ -2136,21 +2167,25 @@ async function denseQuery({
2136
2167
  }) {
2137
2168
  const payload = await readDensePayload(workspacePath);
2138
2169
  const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
2139
- const embed = await createEmbedder(cacheDir, config.modelId);
2140
- const vector = await embed(query);
2141
- if (payload.chunks.length <= EXACT_DENSE_RERANK_THRESHOLD) {
2170
+ const embedder = await createEmbedder(cacheDir, config.modelId);
2171
+ try {
2172
+ const vector = await embedder.embed(query);
2173
+ if (payload.chunks.length <= EXACT_DENSE_RERANK_THRESHOLD) {
2174
+ return exactDenseQuery(payload, vector, topK);
2175
+ }
2176
+ const index = new VectorFieldIndex({
2177
+ numHashTables: payload.metadata.hashTables,
2178
+ dimensions: payload.metadata.dimensions,
2179
+ random: createSeededRandom(payload.metadata.randomSeed)
2180
+ }).loadState(payload.indexState);
2181
+ const approximateHits = index.query(vector, topK);
2182
+ if (approximateHits.length >= topK) {
2183
+ return approximateHits;
2184
+ }
2142
2185
  return exactDenseQuery(payload, vector, topK);
2186
+ } finally {
2187
+ await embedder.dispose?.();
2143
2188
  }
2144
- const index = new VectorFieldIndex({
2145
- numHashTables: payload.metadata.hashTables,
2146
- dimensions: payload.metadata.dimensions,
2147
- random: createSeededRandom(payload.metadata.randomSeed)
2148
- }).loadState(payload.indexState);
2149
- const approximateHits = index.query(vector, topK);
2150
- if (approximateHits.length >= topK) {
2151
- return approximateHits;
2152
- }
2153
- return exactDenseQuery(payload, vector, topK);
2154
2189
  }
2155
2190
 
2156
2191
  // src/vector/sparse.ts
@@ -2894,13 +2929,20 @@ function searchResultsFromResponse(response, showChunks = false) {
2894
2929
  metadata: hit._source.metadata
2895
2930
  }));
2896
2931
  }
2932
+ async function searchJsonRequest({
2933
+ index,
2934
+ request,
2935
+ indexName = "querylight"
2936
+ }) {
2937
+ return searchJsonDsl({ index, request, indexName });
2938
+ }
2897
2939
  async function searchJsonIndex({
2898
2940
  workspacePath,
2899
2941
  request,
2900
2942
  indexName = "querylight"
2901
2943
  }) {
2902
2944
  const index = await loadHydratedIndex(workspacePath);
2903
- return searchJsonDsl({ index, request, indexName });
2945
+ return searchJsonRequest({ index, request, indexName });
2904
2946
  }
2905
2947
  function normalizeDisplayTitle(title) {
2906
2948
  return title.replace(/\s*\|\s*Querylight TS Demo\s*$/i, "").replace(/\s+/g, " ").trim();
@@ -3216,8 +3258,197 @@ async function searchIndex({
3216
3258
  return createSearchResponse(mode, finalHits, Date.now() - startedAt);
3217
3259
  }
3218
3260
 
3219
- // src/query/related-service.ts
3261
+ // src/server/search-api.ts
3262
+ import { createServer } from "http";
3263
+ import { readdir, stat as stat4 } from "fs/promises";
3220
3264
  import path19 from "path";
3265
+ async function pathIsDirectory(candidatePath) {
3266
+ try {
3267
+ return (await stat4(candidatePath)).isDirectory();
3268
+ } catch {
3269
+ return false;
3270
+ }
3271
+ }
3272
+ async function discoverKnowledgeBases(workspacePath) {
3273
+ try {
3274
+ const singleWorkspace = await assertWorkspaceExists(workspacePath);
3275
+ const config = await loadConfig(singleWorkspace);
3276
+ const index = await loadHydratedIndex(singleWorkspace);
3277
+ return {
3278
+ mode: "single",
3279
+ knowledgeBases: [{
3280
+ name: config.index.name,
3281
+ workspacePath: singleWorkspace,
3282
+ configuredIndexName: config.index.name,
3283
+ index
3284
+ }]
3285
+ };
3286
+ } catch (error) {
3287
+ if (!(error instanceof CliError) || error.code !== "WORKSPACE_ERROR") {
3288
+ throw error;
3289
+ }
3290
+ }
3291
+ const resolvedRoot = path19.resolve(workspacePath);
3292
+ if (!await pathIsDirectory(resolvedRoot)) {
3293
+ throw new CliError(`workspace path does not exist: ${resolvedRoot}`, "WORKSPACE_ERROR", 3 /* WorkspaceError */);
3294
+ }
3295
+ const entries = await readdir(resolvedRoot, { withFileTypes: true });
3296
+ const knowledgeBases = (await Promise.all(entries.filter((entry) => entry.isDirectory()).map(async (entry) => {
3297
+ const candidateWorkspace = path19.join(resolvedRoot, entry.name, ".kb");
3298
+ try {
3299
+ const workspace = await assertWorkspaceExists(candidateWorkspace);
3300
+ const config = await loadConfig(workspace);
3301
+ const index = await loadHydratedIndex(workspace);
3302
+ return {
3303
+ name: entry.name,
3304
+ workspacePath: workspace,
3305
+ configuredIndexName: config.index.name,
3306
+ index
3307
+ };
3308
+ } catch (error) {
3309
+ if (error instanceof CliError && error.code === "WORKSPACE_ERROR") {
3310
+ return null;
3311
+ }
3312
+ throw error;
3313
+ }
3314
+ }))).filter((knowledgeBase) => knowledgeBase != null);
3315
+ if (knowledgeBases.length === 0) {
3316
+ throw new CliError(
3317
+ `no knowledge bases found at ${resolvedRoot}; use a .kb workspace or a directory of named subdirectories that each contain .kb`,
3318
+ "WORKSPACE_ERROR",
3319
+ 3 /* WorkspaceError */
3320
+ );
3321
+ }
3322
+ return { mode: "multi", knowledgeBases };
3323
+ }
3324
+ function sendJson(response, statusCode, payload) {
3325
+ response.statusCode = statusCode;
3326
+ response.setHeader("content-type", "application/json; charset=utf-8");
3327
+ response.end(JSON.stringify(payload));
3328
+ }
3329
+ function sendError(response, statusCode, type, reason) {
3330
+ sendJson(response, statusCode, {
3331
+ error: {
3332
+ type,
3333
+ reason
3334
+ },
3335
+ status: statusCode
3336
+ });
3337
+ }
3338
+ async function readRequestBody(request) {
3339
+ const chunks = [];
3340
+ for await (const chunk of request) {
3341
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
3342
+ }
3343
+ return Buffer.concat(chunks).toString("utf8");
3344
+ }
3345
+ function parseSearchRequest(raw) {
3346
+ const normalized = raw.trim();
3347
+ if (normalized.length === 0) {
3348
+ return {};
3349
+ }
3350
+ try {
3351
+ const parsed = JSON.parse(normalized);
3352
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
3353
+ throw new Error("expected a JSON object");
3354
+ }
3355
+ return parsed;
3356
+ } catch (error) {
3357
+ const message = error instanceof Error ? error.message : String(error);
3358
+ throw new CliError(`invalid JSON request: ${message}`, "INVALID_ARGUMENT", 2 /* InvalidArguments */);
3359
+ }
3360
+ }
3361
+ function routeForKnowledgeBase(mode, knowledgeBase) {
3362
+ return mode === "single" ? "/_search" : `/${knowledgeBase.name}/_search`;
3363
+ }
3364
+ function resolveKnowledgeBaseForPath(pathname, mode, knowledgeBases) {
3365
+ const segments = pathname.split("/").filter(Boolean);
3366
+ if (mode === "single") {
3367
+ const knowledgeBase = [...knowledgeBases.values()][0];
3368
+ if (!knowledgeBase) {
3369
+ return null;
3370
+ }
3371
+ if (segments.length === 1 && segments[0] === "_search") {
3372
+ return knowledgeBase;
3373
+ }
3374
+ if (segments.length === 2 && segments[1] === "_search" && segments[0] === knowledgeBase.configuredIndexName) {
3375
+ return knowledgeBase;
3376
+ }
3377
+ return null;
3378
+ }
3379
+ if (segments.length === 2 && segments[1] === "_search") {
3380
+ return knowledgeBases.get(segments[0]) ?? null;
3381
+ }
3382
+ return null;
3383
+ }
3384
+ async function handleSearchRequest(request, response, pathname, mode, knowledgeBases) {
3385
+ if (request.method !== "GET" && request.method !== "POST") {
3386
+ response.setHeader("allow", "GET, POST");
3387
+ sendError(response, 405, "method_not_allowed", `unsupported method for ${pathname}`);
3388
+ return;
3389
+ }
3390
+ const knowledgeBase = resolveKnowledgeBaseForPath(pathname, mode, knowledgeBases);
3391
+ if (!knowledgeBase) {
3392
+ sendError(response, 404, "resource_not_found_exception", `unknown search route: ${pathname}`);
3393
+ return;
3394
+ }
3395
+ try {
3396
+ const requestBody = parseSearchRequest(await readRequestBody(request));
3397
+ const indexName = mode === "multi" ? knowledgeBase.name : knowledgeBase.configuredIndexName;
3398
+ const result = await searchJsonRequest({
3399
+ index: knowledgeBase.index,
3400
+ request: requestBody,
3401
+ indexName
3402
+ });
3403
+ sendJson(response, 200, result);
3404
+ } catch (error) {
3405
+ if (error instanceof CliError && error.code === "INVALID_ARGUMENT") {
3406
+ sendError(response, 400, "parse_exception", error.message);
3407
+ return;
3408
+ }
3409
+ const message = error instanceof Error ? error.message : String(error);
3410
+ sendError(response, 500, "search_phase_execution_exception", message);
3411
+ }
3412
+ }
3413
+ async function startSearchApiServer({
3414
+ workspacePath,
3415
+ host = "127.0.0.1",
3416
+ port = 3e3
3417
+ }) {
3418
+ const { mode, knowledgeBases } = await discoverKnowledgeBases(workspacePath);
3419
+ const byName = new Map(knowledgeBases.map((knowledgeBase) => [knowledgeBase.name, knowledgeBase]));
3420
+ const server = createServer(async (request, response) => {
3421
+ const url2 = new URL(request.url ?? "/", `http://${request.headers.host ?? `${host}:${port}`}`);
3422
+ await handleSearchRequest(request, response, url2.pathname, mode, byName);
3423
+ });
3424
+ await new Promise((resolve2, reject) => {
3425
+ server.once("error", reject);
3426
+ server.listen(port, host, () => {
3427
+ server.off("error", reject);
3428
+ resolve2();
3429
+ });
3430
+ });
3431
+ const address = server.address();
3432
+ if (!address || typeof address === "string") {
3433
+ throw new CliError("server failed to bind to a TCP address", "SERVER_ERROR", 1 /* GeneralError */);
3434
+ }
3435
+ const url = `http://${host}:${address.port}`;
3436
+ return {
3437
+ mode,
3438
+ url,
3439
+ knowledgeBases: knowledgeBases.map((knowledgeBase) => ({
3440
+ name: knowledgeBase.name,
3441
+ workspacePath: knowledgeBase.workspacePath,
3442
+ route: routeForKnowledgeBase(mode, knowledgeBase)
3443
+ })),
3444
+ close: async () => new Promise((resolve2, reject) => {
3445
+ server.close((error) => error ? reject(error) : resolve2());
3446
+ })
3447
+ };
3448
+ }
3449
+
3450
+ // src/query/related-service.ts
3451
+ import path20 from "path";
3221
3452
  function cosineSimilarity2(left, right) {
3222
3453
  let dot = 0;
3223
3454
  let leftNorm = 0;
@@ -3293,7 +3524,7 @@ async function findRelatedDocuments({
3293
3524
  if (!await fileExists(denseVectorPath(workspacePath))) {
3294
3525
  throw new CliError("dense vector index is not built; run `qli models pull --dense` and `qli rebuild`", "DENSE_INDEX_MISSING", 7 /* QueryError */);
3295
3526
  }
3296
- const documents = await readJsonl(path19.join(workspacePath, "documents", "documents.jsonl"));
3527
+ const documents = await readJsonl(path20.join(workspacePath, "documents", "documents.jsonl"));
3297
3528
  const selected = resolveDocumentSelector(documents, document);
3298
3529
  const densePayload = await readDensePayload(workspacePath);
3299
3530
  const vectors = buildDocumentVectors(documents, densePayload.chunks, densePayload.metadata.dimensions);
@@ -3366,7 +3597,7 @@ async function createContext({
3366
3597
  }
3367
3598
 
3368
3599
  // src/report/diff-service.ts
3369
- import path20 from "path";
3600
+ import path21 from "path";
3370
3601
  function chooseBaselineRun(runs, since) {
3371
3602
  if (since === "last-run") {
3372
3603
  return runs.at(-1);
@@ -3382,7 +3613,7 @@ async function diffWorkspace({
3382
3613
  documentId,
3383
3614
  since
3384
3615
  }) {
3385
- const current = await readJsonl(path20.join(workspacePath, "documents", "documents.jsonl"));
3616
+ const current = await readJsonl(path21.join(workspacePath, "documents", "documents.jsonl"));
3386
3617
  const baseline = chooseBaselineRun(await listRuns(workspacePath), since);
3387
3618
  const previous = new Map((baseline?.documentsSnapshot ?? []).map((document) => [document.id, document]));
3388
3619
  const changedDocuments = current.filter((document) => (!sourceId || document.sourceId === sourceId) && (!documentId || document.id === documentId)).filter((document) => {
@@ -3438,12 +3669,15 @@ export {
3438
3669
  ingestSources,
3439
3670
  listSources,
3440
3671
  loadConfig,
3672
+ loadHydratedIndex,
3441
3673
  removeSource,
3442
3674
  renderChangeReport,
3443
3675
  reprocessDocuments,
3444
3676
  searchIndex,
3445
3677
  searchJsonIndex,
3678
+ searchJsonRequest,
3446
3679
  searchResultsFromResponse,
3680
+ startSearchApiServer,
3447
3681
  updateSource,
3448
3682
  writeDefaultConfig
3449
3683
  };
@@ -1,5 +1,6 @@
1
- import { type JsonDslRequest, type JsonDslResponse } from "@tryformation/querylight-ts";
1
+ import { type DocumentIndex, type JsonDslRequest, type JsonDslResponse } from "@tryformation/querylight-ts";
2
2
  import type { RetrievalMode, SearchResponseData, SearchResult } from "../types/models.js";
3
+ export declare function loadHydratedIndex(workspacePath: string): Promise<DocumentIndex>;
3
4
  type SearchDateField = "publicationDate" | "firstSeenAt" | "lastSeenAt" | "lastChangedAt" | "crawledAt";
4
5
  type SearchDateRange = {
5
6
  field: SearchDateField;
@@ -7,6 +8,11 @@ type SearchDateRange = {
7
8
  to?: string;
8
9
  };
9
10
  export declare function searchResultsFromResponse(response: SearchResponseData, showChunks?: boolean): SearchResult[];
11
+ export declare function searchJsonRequest({ index, request, indexName }: {
12
+ index: DocumentIndex;
13
+ request: JsonDslRequest;
14
+ indexName?: string;
15
+ }): Promise<JsonDslResponse>;
10
16
  export declare function searchJsonIndex({ workspacePath, request, indexName }: {
11
17
  workspacePath: string;
12
18
  request: JsonDslRequest;
@@ -0,0 +1,15 @@
1
+ export type SearchApiServerInfo = {
2
+ mode: "single" | "multi";
3
+ url: string;
4
+ knowledgeBases: Array<{
5
+ name: string;
6
+ workspacePath: string;
7
+ route: string;
8
+ }>;
9
+ close: () => Promise<void>;
10
+ };
11
+ export declare function startSearchApiServer({ workspacePath, host, port }: {
12
+ workspacePath: string;
13
+ host?: string;
14
+ port?: number;
15
+ }): Promise<SearchApiServerInfo>;
@@ -173,6 +173,9 @@ export type WorkspaceConfig = {
173
173
  maxContextChars: number;
174
174
  citationStyle: "markdown";
175
175
  };
176
+ search: {
177
+ defaultTopK: number;
178
+ };
176
179
  retrieval: {
177
180
  defaultMode: RetrievalMode;
178
181
  dense: DenseVectorModelConfig;
@@ -1,6 +1,10 @@
1
1
  import { type ProgressHandler } from "../core/progress.js";
2
2
  import type { DenseVectorPayload, WorkspaceConfig } from "../types/models.js";
3
- export declare function setDenseEmbedderFactoryForTests(factory: ((cacheDir: string, modelId: string) => Promise<(text: string) => Promise<number[]>>) | null): void;
3
+ type DenseEmbedder = {
4
+ embed(text: string): Promise<number[]>;
5
+ dispose?: () => Promise<void>;
6
+ };
7
+ export declare function setDenseEmbedderFactoryForTests(factory: ((cacheDir: string, modelId: string) => Promise<DenseEmbedder | ((text: string) => Promise<number[]>)>) | null): void;
4
8
  export declare function pullDenseModel(workspacePath: string, config: WorkspaceConfig["retrieval"]["dense"]): Promise<void>;
5
9
  export declare function buildDenseVectors({ workspacePath, config, progress }: {
6
10
  workspacePath: string;
@@ -13,3 +17,4 @@ export declare function denseQuery({ workspacePath, config, query, topK }: {
13
17
  query: string;
14
18
  topK: number;
15
19
  }): Promise<Array<[string, number]>>;
20
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tryformation/querylight-cli",
3
- "version": "0.2.3",
3
+ "version": "0.2.5",
4
4
  "description": "Querylight CLI for building and querying local knowledge bases.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/formation-res/querylight-cli#readme",
@@ -36,7 +36,8 @@
36
36
  "test:watch": "vitest",
37
37
  "lint": "tsc --noEmit",
38
38
  "check": "npm run lint && npm test",
39
- "prepublishOnly": "npm run check && npm run build"
39
+ "prepublishOnly": "npm run check && npm run build && npm run verify:release-version",
40
+ "verify:release-version": "node scripts/assert-release-version.mjs"
40
41
  },
41
42
  "dependencies": {
42
43
  "@huggingface/transformers": "^3.8.1",
@@ -0,0 +1,48 @@
1
+ import assert from "node:assert/strict";
2
+ import { mkdtemp, rm } from "node:fs/promises";
3
+ import os from "node:os";
4
+ import path from "node:path";
5
+ import { spawn } from "node:child_process";
6
+ import packageJson from "../package.json" with { type: "json" };
7
+
8
+ function run(command, args, options = {}) {
9
+ return new Promise((resolve, reject) => {
10
+ const child = spawn(command, args, {
11
+ stdio: ["ignore", "pipe", "pipe"],
12
+ ...options
13
+ });
14
+ let stdout = "";
15
+ let stderr = "";
16
+
17
+ child.stdout.on("data", (chunk) => {
18
+ stdout += String(chunk);
19
+ });
20
+ child.stderr.on("data", (chunk) => {
21
+ stderr += String(chunk);
22
+ });
23
+ child.on("error", reject);
24
+ child.on("close", (code) => {
25
+ if (code === 0) {
26
+ resolve({ stdout, stderr });
27
+ return;
28
+ }
29
+ reject(new Error(`${command} ${args.join(" ")} failed with exit code ${code}\n${stderr}`));
30
+ });
31
+ });
32
+ }
33
+
34
+ const workspaceRoot = await mkdtemp(path.join(os.tmpdir(), "qli-release-version-"));
35
+ const workspacePath = path.join(workspaceRoot, ".kb");
36
+
37
+ try {
38
+ const { stdout } = await run("node", ["dist/cli/main.js", "init", "--workspace", workspacePath, "--json"], {
39
+ cwd: new URL("..", import.meta.url)
40
+ });
41
+ const parsed = JSON.parse(stdout);
42
+
43
+ assert.equal(parsed.ok, true, "Expected qli init --json to succeed");
44
+ assert.equal(parsed.version, packageJson.version, `Built CLI reported version ${parsed.version}, expected ${packageJson.version}`);
45
+ process.stdout.write(`Verified built CLI version ${parsed.version}\n`);
46
+ } finally {
47
+ await rm(workspaceRoot, { recursive: true, force: true });
48
+ }