@tryformation/querylight-cli 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ type WorkspaceArchiveResolution = {
2
+ workspacePath: string;
3
+ archivePath?: string;
4
+ };
5
+ export declare function isWorkspaceArchivePath(workspacePath: string): boolean;
6
+ export declare function packageWorkspaceArchive({ workspacePath, outputPath, force }: {
7
+ workspacePath: string;
8
+ outputPath: string;
9
+ force?: boolean;
10
+ }): Promise<{
11
+ workspacePath: string;
12
+ archivePath: string;
13
+ fileCount: number;
14
+ sizeBytes: number;
15
+ }>;
16
+ export declare function resolveReadableWorkspace(workspacePath: string): Promise<WorkspaceArchiveResolution>;
17
+ export declare function assertWritableWorkspacePath(workspacePath: string): Promise<string>;
18
+ export {};
@@ -1,5 +1,5 @@
1
- export declare const PACKAGE_NAME = "@tryformation/querylight-cli";
2
- export declare const PACKAGE_VERSION = "0.2.3";
1
+ export declare const PACKAGE_NAME: string;
2
+ export declare const PACKAGE_VERSION: string;
3
3
  export declare const DEFAULT_WORKSPACE = ".kb";
4
4
  export declare const DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
5
5
  export declare const LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
package/dist/index.js CHANGED
@@ -22,6 +22,11 @@ import path from "path";
22
22
  import YAML from "yaml";
23
23
 
24
24
  // src/core/constants.ts
25
+ import { createRequire } from "module";
26
+ var require2 = createRequire(import.meta.url);
27
+ var packageJson = require2("../../package.json");
28
+ var PACKAGE_NAME = packageJson.name;
29
+ var PACKAGE_VERSION = packageJson.version;
25
30
  var DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
26
31
  var LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
27
32
 
@@ -53,6 +58,9 @@ var defaultConfig = () => ({
53
58
  maxContextChars: 12e3,
54
59
  citationStyle: "markdown"
55
60
  },
61
+ search: {
62
+ defaultTopK: 50
63
+ },
56
64
  retrieval: {
57
65
  defaultMode: "lexical",
58
66
  dense: {
@@ -74,12 +82,12 @@ var defaultConfig = () => ({
74
82
  }
75
83
  },
76
84
  crawler: {
77
- defaultUserAgent: "querylight-cli/0.1",
85
+ defaultUserAgent: "querylight-cli",
78
86
  obeyRobotsTxt: true,
79
87
  rateLimitMs: 1e3,
80
88
  maxConcurrentRequests: 5,
81
89
  renderJs: false,
82
- retentionDays: 365,
90
+ retentionDays: 30,
83
91
  fetchArticles: true
84
92
  },
85
93
  limits: {
@@ -123,6 +131,10 @@ async function loadConfig(workspacePath, configPath) {
123
131
  ...defaults.rag,
124
132
  ...parsed.rag ?? {}
125
133
  },
134
+ search: {
135
+ ...defaults.search,
136
+ ...parsed.search ?? {}
137
+ },
126
138
  retrieval: {
127
139
  ...defaults.retrieval,
128
140
  ...parsed.retrieval ?? {},
@@ -1069,7 +1081,7 @@ async function fetchUrlDocument({
1069
1081
  publicationDate
1070
1082
  }) {
1071
1083
  const headers = {
1072
- "user-agent": source.crawl?.userAgent ?? "querylight-cli/0.1"
1084
+ "user-agent": source.crawl?.userAgent ?? "querylight-cli"
1073
1085
  };
1074
1086
  if (previous?.httpCache?.etag) {
1075
1087
  headers["if-none-match"] = previous.httpCache.etag;
@@ -1368,7 +1380,7 @@ async function purgeDocuments(workspacePath, documentIds, documents) {
1368
1380
  async function fetchFeedText(source) {
1369
1381
  const response = await fetch(source.uri, {
1370
1382
  headers: {
1371
- "user-agent": source.crawl?.userAgent ?? "querylight-cli/0.1"
1383
+ "user-agent": source.crawl?.userAgent ?? "querylight-cli"
1372
1384
  }
1373
1385
  });
1374
1386
  if (!response.ok) {
@@ -2224,7 +2236,6 @@ async function createSparseQueryEncoder(cacheDir, modelId, queryTokenWeights) {
2224
2236
  return async (text) => {
2225
2237
  const features = await tokenizer([text], {
2226
2238
  truncation: true,
2227
- return_attention_mask: false,
2228
2239
  return_token_type_ids: false
2229
2240
  });
2230
2241
  return buildSparseQueryVector(normalizeTokenIds(features.input_ids), queryTokenWeights);
@@ -3248,18 +3259,68 @@ async function searchIndex({
3248
3259
 
3249
3260
  // src/server/search-api.ts
3250
3261
  import { createServer } from "http";
3251
- import { readdir, stat as stat4 } from "fs/promises";
3262
+ import { readdir as readdir2, stat as stat5 } from "fs/promises";
3263
+ import path20 from "path";
3264
+
3265
+ // src/core/archive.ts
3266
+ import { mkdir as mkdir10, readdir, readFile as readFile11, rm as rm5, stat as stat4, writeFile as writeFile9 } from "fs/promises";
3267
+ import os2 from "os";
3252
3268
  import path19 from "path";
3269
+ import { unzipSync, zipSync } from "fflate";
3270
+ function isWorkspaceArchivePath(workspacePath) {
3271
+ return workspacePath.toLowerCase().endsWith(".zip");
3272
+ }
3273
+ function assertSafeArchiveEntry(name) {
3274
+ const normalized = path19.posix.normalize(name);
3275
+ if (name.startsWith("/") || normalized === "." || normalized.startsWith("../") || normalized.includes("/../")) {
3276
+ throw new CliError(`unsafe archive entry: ${name}`, "WORKSPACE_ERROR", 3 /* WorkspaceError */);
3277
+ }
3278
+ }
3279
+ async function archiveCachePath(archivePath) {
3280
+ const info = await stat4(archivePath);
3281
+ const key = sha256(`${path19.resolve(archivePath)}:${info.size}:${info.mtimeMs}`).slice(0, 24);
3282
+ return path19.join(os2.tmpdir(), "qli-workspace-archives", key);
3283
+ }
3284
+ async function resolveReadableWorkspace(workspacePath) {
3285
+ const resolved = path19.resolve(workspacePath);
3286
+ if (!isWorkspaceArchivePath(resolved)) {
3287
+ return { workspacePath: await assertWorkspaceExists(resolved) };
3288
+ }
3289
+ const archive = await readFile11(resolved);
3290
+ const extractRoot = await archiveCachePath(resolved);
3291
+ const workspaceRoot = path19.join(extractRoot, "workspace");
3292
+ try {
3293
+ await assertWorkspaceExists(workspaceRoot);
3294
+ return { workspacePath: workspaceRoot, archivePath: resolved };
3295
+ } catch {
3296
+ }
3297
+ await rm5(extractRoot, { recursive: true, force: true });
3298
+ await mkdir10(workspaceRoot, { recursive: true });
3299
+ const entries = unzipSync(new Uint8Array(archive));
3300
+ await Promise.all(Object.entries(entries).map(async ([entryName, data]) => {
3301
+ assertSafeArchiveEntry(entryName);
3302
+ const target = path19.join(workspaceRoot, ...entryName.split("/"));
3303
+ if (entryName.endsWith("/")) {
3304
+ await mkdir10(target, { recursive: true });
3305
+ return;
3306
+ }
3307
+ await mkdir10(path19.dirname(target), { recursive: true });
3308
+ await writeFile9(target, Buffer.from(data));
3309
+ }));
3310
+ return { workspacePath: await assertWorkspaceExists(workspaceRoot), archivePath: resolved };
3311
+ }
3312
+
3313
+ // src/server/search-api.ts
3253
3314
  async function pathIsDirectory(candidatePath) {
3254
3315
  try {
3255
- return (await stat4(candidatePath)).isDirectory();
3316
+ return (await stat5(candidatePath)).isDirectory();
3256
3317
  } catch {
3257
3318
  return false;
3258
3319
  }
3259
3320
  }
3260
3321
  async function discoverKnowledgeBases(workspacePath) {
3261
3322
  try {
3262
- const singleWorkspace = await assertWorkspaceExists(workspacePath);
3323
+ const singleWorkspace = (await resolveReadableWorkspace(workspacePath)).workspacePath;
3263
3324
  const config = await loadConfig(singleWorkspace);
3264
3325
  const index = await loadHydratedIndex(singleWorkspace);
3265
3326
  return {
@@ -3276,19 +3337,20 @@ async function discoverKnowledgeBases(workspacePath) {
3276
3337
  throw error;
3277
3338
  }
3278
3339
  }
3279
- const resolvedRoot = path19.resolve(workspacePath);
3340
+ const resolvedRoot = path20.resolve(workspacePath);
3280
3341
  if (!await pathIsDirectory(resolvedRoot)) {
3281
3342
  throw new CliError(`workspace path does not exist: ${resolvedRoot}`, "WORKSPACE_ERROR", 3 /* WorkspaceError */);
3282
3343
  }
3283
- const entries = await readdir(resolvedRoot, { withFileTypes: true });
3284
- const knowledgeBases = (await Promise.all(entries.filter((entry) => entry.isDirectory()).map(async (entry) => {
3285
- const candidateWorkspace = path19.join(resolvedRoot, entry.name, ".kb");
3344
+ const entries = await readdir2(resolvedRoot, { withFileTypes: true });
3345
+ const knowledgeBases = (await Promise.all(entries.filter((entry) => entry.isDirectory() || entry.isFile() && isWorkspaceArchivePath(entry.name)).map(async (entry) => {
3346
+ const candidateWorkspace = entry.isDirectory() ? path20.join(resolvedRoot, entry.name, ".kb") : path20.join(resolvedRoot, entry.name);
3347
+ const knowledgeBaseName = entry.isDirectory() ? entry.name : entry.name.replace(/\.zip$/i, "");
3286
3348
  try {
3287
- const workspace = await assertWorkspaceExists(candidateWorkspace);
3349
+ const workspace = entry.isDirectory() ? await assertWorkspaceExists(candidateWorkspace) : (await resolveReadableWorkspace(candidateWorkspace)).workspacePath;
3288
3350
  const config = await loadConfig(workspace);
3289
3351
  const index = await loadHydratedIndex(workspace);
3290
3352
  return {
3291
- name: entry.name,
3353
+ name: knowledgeBaseName,
3292
3354
  workspacePath: workspace,
3293
3355
  configuredIndexName: config.index.name,
3294
3356
  index
@@ -3302,7 +3364,7 @@ async function discoverKnowledgeBases(workspacePath) {
3302
3364
  }))).filter((knowledgeBase) => knowledgeBase != null);
3303
3365
  if (knowledgeBases.length === 0) {
3304
3366
  throw new CliError(
3305
- `no knowledge bases found at ${resolvedRoot}; use a .kb workspace or a directory of named subdirectories that each contain .kb`,
3367
+ `no knowledge bases found at ${resolvedRoot}; use a .kb workspace, a .zip workspace, or a directory of .zip files or named subdirectories that each contain .kb`,
3306
3368
  "WORKSPACE_ERROR",
3307
3369
  3 /* WorkspaceError */
3308
3370
  );
@@ -3436,7 +3498,7 @@ async function startSearchApiServer({
3436
3498
  }
3437
3499
 
3438
3500
  // src/query/related-service.ts
3439
- import path20 from "path";
3501
+ import path21 from "path";
3440
3502
  function cosineSimilarity2(left, right) {
3441
3503
  let dot = 0;
3442
3504
  let leftNorm = 0;
@@ -3512,7 +3574,7 @@ async function findRelatedDocuments({
3512
3574
  if (!await fileExists(denseVectorPath(workspacePath))) {
3513
3575
  throw new CliError("dense vector index is not built; run `qli models pull --dense` and `qli rebuild`", "DENSE_INDEX_MISSING", 7 /* QueryError */);
3514
3576
  }
3515
- const documents = await readJsonl(path20.join(workspacePath, "documents", "documents.jsonl"));
3577
+ const documents = await readJsonl(path21.join(workspacePath, "documents", "documents.jsonl"));
3516
3578
  const selected = resolveDocumentSelector(documents, document);
3517
3579
  const densePayload = await readDensePayload(workspacePath);
3518
3580
  const vectors = buildDocumentVectors(documents, densePayload.chunks, densePayload.metadata.dimensions);
@@ -3585,7 +3647,7 @@ async function createContext({
3585
3647
  }
3586
3648
 
3587
3649
  // src/report/diff-service.ts
3588
- import path21 from "path";
3650
+ import path22 from "path";
3589
3651
  function chooseBaselineRun(runs, since) {
3590
3652
  if (since === "last-run") {
3591
3653
  return runs.at(-1);
@@ -3601,7 +3663,7 @@ async function diffWorkspace({
3601
3663
  documentId,
3602
3664
  since
3603
3665
  }) {
3604
- const current = await readJsonl(path21.join(workspacePath, "documents", "documents.jsonl"));
3666
+ const current = await readJsonl(path22.join(workspacePath, "documents", "documents.jsonl"));
3605
3667
  const baseline = chooseBaselineRun(await listRuns(workspacePath), since);
3606
3668
  const previous = new Map((baseline?.documentsSnapshot ?? []).map((document) => [document.id, document]));
3607
3669
  const changedDocuments = current.filter((document) => (!sourceId || document.sourceId === sourceId) && (!documentId || document.id === documentId)).filter((document) => {
@@ -173,6 +173,9 @@ export type WorkspaceConfig = {
173
173
  maxContextChars: number;
174
174
  citationStyle: "markdown";
175
175
  };
176
+ search: {
177
+ defaultTopK: number;
178
+ };
176
179
  retrieval: {
177
180
  defaultMode: RetrievalMode;
178
181
  dense: DenseVectorModelConfig;
@@ -19,10 +19,7 @@ export declare function runSparsePython({ workspacePath, config, payload, import
19
19
  importMetaUrl: string;
20
20
  }): Promise<string>;
21
21
  export declare function getDenseTransformersRuntime(cacheDir: string): Promise<{
22
- env: {
23
- cacheDir: string;
24
- allowLocalModels: boolean;
25
- };
22
+ env: typeof import("@huggingface/transformers").env;
26
23
  pipeline: typeof import("@huggingface/transformers").pipeline;
27
24
  }>;
28
25
  export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tryformation/querylight-cli",
3
- "version": "0.2.4",
3
+ "version": "0.2.6",
4
4
  "description": "Querylight CLI for building and querying local knowledge bases.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/formation-res/querylight-cli#readme",
@@ -36,17 +36,19 @@
36
36
  "test:watch": "vitest",
37
37
  "lint": "tsc --noEmit",
38
38
  "check": "npm run lint && npm test",
39
- "prepublishOnly": "npm run check && npm run build"
39
+ "prepublishOnly": "npm run check && npm run build && npm run verify:release-version",
40
+ "verify:release-version": "node scripts/assert-release-version.mjs"
40
41
  },
41
42
  "dependencies": {
42
- "@huggingface/transformers": "^3.8.1",
43
+ "@huggingface/transformers": "^4.2.0",
43
44
  "@tryformation/querylight-ts": "^0.11.0",
44
45
  "cheerio": "^1.2.0",
45
46
  "cli-table3": "^0.6.5",
46
- "commander": "^14.0.3",
47
+ "commander": "^15.0.0",
47
48
  "fast-glob": "^3.3.3",
48
- "feedparser": "^2.2.10",
49
+ "feedparser": "^2.6.0",
49
50
  "feedsmith": "^2.9.4",
51
+ "fflate": "^0.8.3",
50
52
  "gray-matter": "^4.0.3",
51
53
  "mammoth": "^1.12.0",
52
54
  "pdf-parse": "^2.4.5",
@@ -55,12 +57,14 @@
55
57
  "yaml": "^2.9.0"
56
58
  },
57
59
  "devDependencies": {
58
- "@types/feedparser": "^2.2.8",
59
- "@types/node": "^25.8.0",
60
+ "@types/node": "^26.0.1",
60
61
  "@types/pdf-parse": "^1.1.5",
61
62
  "@types/turndown": "^5.0.6",
62
63
  "tsup": "^8.5.1",
63
64
  "typescript": "^6.0.3",
64
- "vitest": "^4.1.6"
65
+ "vitest": "^4.1.9"
66
+ },
67
+ "overrides": {
68
+ "esbuild": "^0.28.1"
65
69
  }
66
70
  }
@@ -0,0 +1,48 @@
1
+ import assert from "node:assert/strict";
2
+ import { mkdtemp, rm } from "node:fs/promises";
3
+ import os from "node:os";
4
+ import path from "node:path";
5
+ import { spawn } from "node:child_process";
6
+ import packageJson from "../package.json" with { type: "json" };
7
+
8
+ function run(command, args, options = {}) {
9
+ return new Promise((resolve, reject) => {
10
+ const child = spawn(command, args, {
11
+ stdio: ["ignore", "pipe", "pipe"],
12
+ ...options
13
+ });
14
+ let stdout = "";
15
+ let stderr = "";
16
+
17
+ child.stdout.on("data", (chunk) => {
18
+ stdout += String(chunk);
19
+ });
20
+ child.stderr.on("data", (chunk) => {
21
+ stderr += String(chunk);
22
+ });
23
+ child.on("error", reject);
24
+ child.on("close", (code) => {
25
+ if (code === 0) {
26
+ resolve({ stdout, stderr });
27
+ return;
28
+ }
29
+ reject(new Error(`${command} ${args.join(" ")} failed with exit code ${code}\n${stderr}`));
30
+ });
31
+ });
32
+ }
33
+
34
+ const workspaceRoot = await mkdtemp(path.join(os.tmpdir(), "qli-release-version-"));
35
+ const workspacePath = path.join(workspaceRoot, ".kb");
36
+
37
+ try {
38
+ const { stdout } = await run("node", ["dist/cli/main.js", "init", "--workspace", workspacePath, "--json"], {
39
+ cwd: new URL("..", import.meta.url)
40
+ });
41
+ const parsed = JSON.parse(stdout);
42
+
43
+ assert.equal(parsed.ok, true, "Expected qli init --json to succeed");
44
+ assert.equal(parsed.version, packageJson.version, `Built CLI reported version ${parsed.version}, expected ${packageJson.version}`);
45
+ process.stdout.write(`Verified built CLI version ${parsed.version}\n`);
46
+ } finally {
47
+ await rm(workspaceRoot, { recursive: true, force: true });
48
+ }