@tryformation/querylight-cli 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +5 -0
- package/README.md +50 -2
- package/dist/cli/main.js +333 -167
- package/dist/core/archive.d.ts +18 -0
- package/dist/core/constants.d.ts +2 -2
- package/dist/index.js +81 -19
- package/dist/types/models.d.ts +3 -0
- package/dist/vector/runtime.d.ts +1 -4
- package/package.json +12 -8
- package/scripts/assert-release-version.mjs +48 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
type WorkspaceArchiveResolution = {
|
|
2
|
+
workspacePath: string;
|
|
3
|
+
archivePath?: string;
|
|
4
|
+
};
|
|
5
|
+
export declare function isWorkspaceArchivePath(workspacePath: string): boolean;
|
|
6
|
+
export declare function packageWorkspaceArchive({ workspacePath, outputPath, force }: {
|
|
7
|
+
workspacePath: string;
|
|
8
|
+
outputPath: string;
|
|
9
|
+
force?: boolean;
|
|
10
|
+
}): Promise<{
|
|
11
|
+
workspacePath: string;
|
|
12
|
+
archivePath: string;
|
|
13
|
+
fileCount: number;
|
|
14
|
+
sizeBytes: number;
|
|
15
|
+
}>;
|
|
16
|
+
export declare function resolveReadableWorkspace(workspacePath: string): Promise<WorkspaceArchiveResolution>;
|
|
17
|
+
export declare function assertWritableWorkspacePath(workspacePath: string): Promise<string>;
|
|
18
|
+
export {};
|
package/dist/core/constants.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
export declare const PACKAGE_NAME
|
|
2
|
-
export declare const PACKAGE_VERSION
|
|
1
|
+
export declare const PACKAGE_NAME: string;
|
|
2
|
+
export declare const PACKAGE_VERSION: string;
|
|
3
3
|
export declare const DEFAULT_WORKSPACE = ".kb";
|
|
4
4
|
export declare const DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
|
|
5
5
|
export declare const LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
|
package/dist/index.js
CHANGED
|
@@ -22,6 +22,11 @@ import path from "path";
|
|
|
22
22
|
import YAML from "yaml";
|
|
23
23
|
|
|
24
24
|
// src/core/constants.ts
|
|
25
|
+
import { createRequire } from "module";
|
|
26
|
+
var require2 = createRequire(import.meta.url);
|
|
27
|
+
var packageJson = require2("../../package.json");
|
|
28
|
+
var PACKAGE_NAME = packageJson.name;
|
|
29
|
+
var PACKAGE_VERSION = packageJson.version;
|
|
25
30
|
var DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
|
|
26
31
|
var LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
|
|
27
32
|
|
|
@@ -53,6 +58,9 @@ var defaultConfig = () => ({
|
|
|
53
58
|
maxContextChars: 12e3,
|
|
54
59
|
citationStyle: "markdown"
|
|
55
60
|
},
|
|
61
|
+
search: {
|
|
62
|
+
defaultTopK: 50
|
|
63
|
+
},
|
|
56
64
|
retrieval: {
|
|
57
65
|
defaultMode: "lexical",
|
|
58
66
|
dense: {
|
|
@@ -74,12 +82,12 @@ var defaultConfig = () => ({
|
|
|
74
82
|
}
|
|
75
83
|
},
|
|
76
84
|
crawler: {
|
|
77
|
-
defaultUserAgent: "querylight-cli
|
|
85
|
+
defaultUserAgent: "querylight-cli",
|
|
78
86
|
obeyRobotsTxt: true,
|
|
79
87
|
rateLimitMs: 1e3,
|
|
80
88
|
maxConcurrentRequests: 5,
|
|
81
89
|
renderJs: false,
|
|
82
|
-
retentionDays:
|
|
90
|
+
retentionDays: 30,
|
|
83
91
|
fetchArticles: true
|
|
84
92
|
},
|
|
85
93
|
limits: {
|
|
@@ -123,6 +131,10 @@ async function loadConfig(workspacePath, configPath) {
|
|
|
123
131
|
...defaults.rag,
|
|
124
132
|
...parsed.rag ?? {}
|
|
125
133
|
},
|
|
134
|
+
search: {
|
|
135
|
+
...defaults.search,
|
|
136
|
+
...parsed.search ?? {}
|
|
137
|
+
},
|
|
126
138
|
retrieval: {
|
|
127
139
|
...defaults.retrieval,
|
|
128
140
|
...parsed.retrieval ?? {},
|
|
@@ -1069,7 +1081,7 @@ async function fetchUrlDocument({
|
|
|
1069
1081
|
publicationDate
|
|
1070
1082
|
}) {
|
|
1071
1083
|
const headers = {
|
|
1072
|
-
"user-agent": source.crawl?.userAgent ?? "querylight-cli
|
|
1084
|
+
"user-agent": source.crawl?.userAgent ?? "querylight-cli"
|
|
1073
1085
|
};
|
|
1074
1086
|
if (previous?.httpCache?.etag) {
|
|
1075
1087
|
headers["if-none-match"] = previous.httpCache.etag;
|
|
@@ -1368,7 +1380,7 @@ async function purgeDocuments(workspacePath, documentIds, documents) {
|
|
|
1368
1380
|
async function fetchFeedText(source) {
|
|
1369
1381
|
const response = await fetch(source.uri, {
|
|
1370
1382
|
headers: {
|
|
1371
|
-
"user-agent": source.crawl?.userAgent ?? "querylight-cli
|
|
1383
|
+
"user-agent": source.crawl?.userAgent ?? "querylight-cli"
|
|
1372
1384
|
}
|
|
1373
1385
|
});
|
|
1374
1386
|
if (!response.ok) {
|
|
@@ -2224,7 +2236,6 @@ async function createSparseQueryEncoder(cacheDir, modelId, queryTokenWeights) {
|
|
|
2224
2236
|
return async (text) => {
|
|
2225
2237
|
const features = await tokenizer([text], {
|
|
2226
2238
|
truncation: true,
|
|
2227
|
-
return_attention_mask: false,
|
|
2228
2239
|
return_token_type_ids: false
|
|
2229
2240
|
});
|
|
2230
2241
|
return buildSparseQueryVector(normalizeTokenIds(features.input_ids), queryTokenWeights);
|
|
@@ -3248,18 +3259,68 @@ async function searchIndex({
|
|
|
3248
3259
|
|
|
3249
3260
|
// src/server/search-api.ts
|
|
3250
3261
|
import { createServer } from "http";
|
|
3251
|
-
import { readdir, stat as
|
|
3262
|
+
import { readdir as readdir2, stat as stat5 } from "fs/promises";
|
|
3263
|
+
import path20 from "path";
|
|
3264
|
+
|
|
3265
|
+
// src/core/archive.ts
|
|
3266
|
+
import { mkdir as mkdir10, readdir, readFile as readFile11, rm as rm5, stat as stat4, writeFile as writeFile9 } from "fs/promises";
|
|
3267
|
+
import os2 from "os";
|
|
3252
3268
|
import path19 from "path";
|
|
3269
|
+
import { unzipSync, zipSync } from "fflate";
|
|
3270
|
+
function isWorkspaceArchivePath(workspacePath) {
|
|
3271
|
+
return workspacePath.toLowerCase().endsWith(".zip");
|
|
3272
|
+
}
|
|
3273
|
+
function assertSafeArchiveEntry(name) {
|
|
3274
|
+
const normalized = path19.posix.normalize(name);
|
|
3275
|
+
if (name.startsWith("/") || normalized === "." || normalized.startsWith("../") || normalized.includes("/../")) {
|
|
3276
|
+
throw new CliError(`unsafe archive entry: ${name}`, "WORKSPACE_ERROR", 3 /* WorkspaceError */);
|
|
3277
|
+
}
|
|
3278
|
+
}
|
|
3279
|
+
async function archiveCachePath(archivePath) {
|
|
3280
|
+
const info = await stat4(archivePath);
|
|
3281
|
+
const key = sha256(`${path19.resolve(archivePath)}:${info.size}:${info.mtimeMs}`).slice(0, 24);
|
|
3282
|
+
return path19.join(os2.tmpdir(), "qli-workspace-archives", key);
|
|
3283
|
+
}
|
|
3284
|
+
async function resolveReadableWorkspace(workspacePath) {
|
|
3285
|
+
const resolved = path19.resolve(workspacePath);
|
|
3286
|
+
if (!isWorkspaceArchivePath(resolved)) {
|
|
3287
|
+
return { workspacePath: await assertWorkspaceExists(resolved) };
|
|
3288
|
+
}
|
|
3289
|
+
const archive = await readFile11(resolved);
|
|
3290
|
+
const extractRoot = await archiveCachePath(resolved);
|
|
3291
|
+
const workspaceRoot = path19.join(extractRoot, "workspace");
|
|
3292
|
+
try {
|
|
3293
|
+
await assertWorkspaceExists(workspaceRoot);
|
|
3294
|
+
return { workspacePath: workspaceRoot, archivePath: resolved };
|
|
3295
|
+
} catch {
|
|
3296
|
+
}
|
|
3297
|
+
await rm5(extractRoot, { recursive: true, force: true });
|
|
3298
|
+
await mkdir10(workspaceRoot, { recursive: true });
|
|
3299
|
+
const entries = unzipSync(new Uint8Array(archive));
|
|
3300
|
+
await Promise.all(Object.entries(entries).map(async ([entryName, data]) => {
|
|
3301
|
+
assertSafeArchiveEntry(entryName);
|
|
3302
|
+
const target = path19.join(workspaceRoot, ...entryName.split("/"));
|
|
3303
|
+
if (entryName.endsWith("/")) {
|
|
3304
|
+
await mkdir10(target, { recursive: true });
|
|
3305
|
+
return;
|
|
3306
|
+
}
|
|
3307
|
+
await mkdir10(path19.dirname(target), { recursive: true });
|
|
3308
|
+
await writeFile9(target, Buffer.from(data));
|
|
3309
|
+
}));
|
|
3310
|
+
return { workspacePath: await assertWorkspaceExists(workspaceRoot), archivePath: resolved };
|
|
3311
|
+
}
|
|
3312
|
+
|
|
3313
|
+
// src/server/search-api.ts
|
|
3253
3314
|
async function pathIsDirectory(candidatePath) {
|
|
3254
3315
|
try {
|
|
3255
|
-
return (await
|
|
3316
|
+
return (await stat5(candidatePath)).isDirectory();
|
|
3256
3317
|
} catch {
|
|
3257
3318
|
return false;
|
|
3258
3319
|
}
|
|
3259
3320
|
}
|
|
3260
3321
|
async function discoverKnowledgeBases(workspacePath) {
|
|
3261
3322
|
try {
|
|
3262
|
-
const singleWorkspace = await
|
|
3323
|
+
const singleWorkspace = (await resolveReadableWorkspace(workspacePath)).workspacePath;
|
|
3263
3324
|
const config = await loadConfig(singleWorkspace);
|
|
3264
3325
|
const index = await loadHydratedIndex(singleWorkspace);
|
|
3265
3326
|
return {
|
|
@@ -3276,19 +3337,20 @@ async function discoverKnowledgeBases(workspacePath) {
|
|
|
3276
3337
|
throw error;
|
|
3277
3338
|
}
|
|
3278
3339
|
}
|
|
3279
|
-
const resolvedRoot =
|
|
3340
|
+
const resolvedRoot = path20.resolve(workspacePath);
|
|
3280
3341
|
if (!await pathIsDirectory(resolvedRoot)) {
|
|
3281
3342
|
throw new CliError(`workspace path does not exist: ${resolvedRoot}`, "WORKSPACE_ERROR", 3 /* WorkspaceError */);
|
|
3282
3343
|
}
|
|
3283
|
-
const entries = await
|
|
3284
|
-
const knowledgeBases = (await Promise.all(entries.filter((entry) => entry.isDirectory()).map(async (entry) => {
|
|
3285
|
-
const candidateWorkspace =
|
|
3344
|
+
const entries = await readdir2(resolvedRoot, { withFileTypes: true });
|
|
3345
|
+
const knowledgeBases = (await Promise.all(entries.filter((entry) => entry.isDirectory() || entry.isFile() && isWorkspaceArchivePath(entry.name)).map(async (entry) => {
|
|
3346
|
+
const candidateWorkspace = entry.isDirectory() ? path20.join(resolvedRoot, entry.name, ".kb") : path20.join(resolvedRoot, entry.name);
|
|
3347
|
+
const knowledgeBaseName = entry.isDirectory() ? entry.name : entry.name.replace(/\.zip$/i, "");
|
|
3286
3348
|
try {
|
|
3287
|
-
const workspace = await assertWorkspaceExists(candidateWorkspace);
|
|
3349
|
+
const workspace = entry.isDirectory() ? await assertWorkspaceExists(candidateWorkspace) : (await resolveReadableWorkspace(candidateWorkspace)).workspacePath;
|
|
3288
3350
|
const config = await loadConfig(workspace);
|
|
3289
3351
|
const index = await loadHydratedIndex(workspace);
|
|
3290
3352
|
return {
|
|
3291
|
-
name:
|
|
3353
|
+
name: knowledgeBaseName,
|
|
3292
3354
|
workspacePath: workspace,
|
|
3293
3355
|
configuredIndexName: config.index.name,
|
|
3294
3356
|
index
|
|
@@ -3302,7 +3364,7 @@ async function discoverKnowledgeBases(workspacePath) {
|
|
|
3302
3364
|
}))).filter((knowledgeBase) => knowledgeBase != null);
|
|
3303
3365
|
if (knowledgeBases.length === 0) {
|
|
3304
3366
|
throw new CliError(
|
|
3305
|
-
`no knowledge bases found at ${resolvedRoot}; use a .kb workspace or a directory of named subdirectories that each contain .kb`,
|
|
3367
|
+
`no knowledge bases found at ${resolvedRoot}; use a .kb workspace, a .zip workspace, or a directory of .zip files or named subdirectories that each contain .kb`,
|
|
3306
3368
|
"WORKSPACE_ERROR",
|
|
3307
3369
|
3 /* WorkspaceError */
|
|
3308
3370
|
);
|
|
@@ -3436,7 +3498,7 @@ async function startSearchApiServer({
|
|
|
3436
3498
|
}
|
|
3437
3499
|
|
|
3438
3500
|
// src/query/related-service.ts
|
|
3439
|
-
import
|
|
3501
|
+
import path21 from "path";
|
|
3440
3502
|
function cosineSimilarity2(left, right) {
|
|
3441
3503
|
let dot = 0;
|
|
3442
3504
|
let leftNorm = 0;
|
|
@@ -3512,7 +3574,7 @@ async function findRelatedDocuments({
|
|
|
3512
3574
|
if (!await fileExists(denseVectorPath(workspacePath))) {
|
|
3513
3575
|
throw new CliError("dense vector index is not built; run `qli models pull --dense` and `qli rebuild`", "DENSE_INDEX_MISSING", 7 /* QueryError */);
|
|
3514
3576
|
}
|
|
3515
|
-
const documents = await readJsonl(
|
|
3577
|
+
const documents = await readJsonl(path21.join(workspacePath, "documents", "documents.jsonl"));
|
|
3516
3578
|
const selected = resolveDocumentSelector(documents, document);
|
|
3517
3579
|
const densePayload = await readDensePayload(workspacePath);
|
|
3518
3580
|
const vectors = buildDocumentVectors(documents, densePayload.chunks, densePayload.metadata.dimensions);
|
|
@@ -3585,7 +3647,7 @@ async function createContext({
|
|
|
3585
3647
|
}
|
|
3586
3648
|
|
|
3587
3649
|
// src/report/diff-service.ts
|
|
3588
|
-
import
|
|
3650
|
+
import path22 from "path";
|
|
3589
3651
|
function chooseBaselineRun(runs, since) {
|
|
3590
3652
|
if (since === "last-run") {
|
|
3591
3653
|
return runs.at(-1);
|
|
@@ -3601,7 +3663,7 @@ async function diffWorkspace({
|
|
|
3601
3663
|
documentId,
|
|
3602
3664
|
since
|
|
3603
3665
|
}) {
|
|
3604
|
-
const current = await readJsonl(
|
|
3666
|
+
const current = await readJsonl(path22.join(workspacePath, "documents", "documents.jsonl"));
|
|
3605
3667
|
const baseline = chooseBaselineRun(await listRuns(workspacePath), since);
|
|
3606
3668
|
const previous = new Map((baseline?.documentsSnapshot ?? []).map((document) => [document.id, document]));
|
|
3607
3669
|
const changedDocuments = current.filter((document) => (!sourceId || document.sourceId === sourceId) && (!documentId || document.id === documentId)).filter((document) => {
|
package/dist/types/models.d.ts
CHANGED
package/dist/vector/runtime.d.ts
CHANGED
|
@@ -19,10 +19,7 @@ export declare function runSparsePython({ workspacePath, config, payload, import
|
|
|
19
19
|
importMetaUrl: string;
|
|
20
20
|
}): Promise<string>;
|
|
21
21
|
export declare function getDenseTransformersRuntime(cacheDir: string): Promise<{
|
|
22
|
-
env:
|
|
23
|
-
cacheDir: string;
|
|
24
|
-
allowLocalModels: boolean;
|
|
25
|
-
};
|
|
22
|
+
env: typeof import("@huggingface/transformers").env;
|
|
26
23
|
pipeline: typeof import("@huggingface/transformers").pipeline;
|
|
27
24
|
}>;
|
|
28
25
|
export {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tryformation/querylight-cli",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.6",
|
|
4
4
|
"description": "Querylight CLI for building and querying local knowledge bases.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"homepage": "https://github.com/formation-res/querylight-cli#readme",
|
|
@@ -36,17 +36,19 @@
|
|
|
36
36
|
"test:watch": "vitest",
|
|
37
37
|
"lint": "tsc --noEmit",
|
|
38
38
|
"check": "npm run lint && npm test",
|
|
39
|
-
"prepublishOnly": "npm run check && npm run build"
|
|
39
|
+
"prepublishOnly": "npm run check && npm run build && npm run verify:release-version",
|
|
40
|
+
"verify:release-version": "node scripts/assert-release-version.mjs"
|
|
40
41
|
},
|
|
41
42
|
"dependencies": {
|
|
42
|
-
"@huggingface/transformers": "^
|
|
43
|
+
"@huggingface/transformers": "^4.2.0",
|
|
43
44
|
"@tryformation/querylight-ts": "^0.11.0",
|
|
44
45
|
"cheerio": "^1.2.0",
|
|
45
46
|
"cli-table3": "^0.6.5",
|
|
46
|
-
"commander": "^
|
|
47
|
+
"commander": "^15.0.0",
|
|
47
48
|
"fast-glob": "^3.3.3",
|
|
48
|
-
"feedparser": "^2.
|
|
49
|
+
"feedparser": "^2.6.0",
|
|
49
50
|
"feedsmith": "^2.9.4",
|
|
51
|
+
"fflate": "^0.8.3",
|
|
50
52
|
"gray-matter": "^4.0.3",
|
|
51
53
|
"mammoth": "^1.12.0",
|
|
52
54
|
"pdf-parse": "^2.4.5",
|
|
@@ -55,12 +57,14 @@
|
|
|
55
57
|
"yaml": "^2.9.0"
|
|
56
58
|
},
|
|
57
59
|
"devDependencies": {
|
|
58
|
-
"@types/
|
|
59
|
-
"@types/node": "^25.8.0",
|
|
60
|
+
"@types/node": "^26.0.1",
|
|
60
61
|
"@types/pdf-parse": "^1.1.5",
|
|
61
62
|
"@types/turndown": "^5.0.6",
|
|
62
63
|
"tsup": "^8.5.1",
|
|
63
64
|
"typescript": "^6.0.3",
|
|
64
|
-
"vitest": "^4.1.
|
|
65
|
+
"vitest": "^4.1.9"
|
|
66
|
+
},
|
|
67
|
+
"overrides": {
|
|
68
|
+
"esbuild": "^0.28.1"
|
|
65
69
|
}
|
|
66
70
|
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { mkdtemp, rm } from "node:fs/promises";
|
|
3
|
+
import os from "node:os";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
import { spawn } from "node:child_process";
|
|
6
|
+
import packageJson from "../package.json" with { type: "json" };
|
|
7
|
+
|
|
8
|
+
function run(command, args, options = {}) {
|
|
9
|
+
return new Promise((resolve, reject) => {
|
|
10
|
+
const child = spawn(command, args, {
|
|
11
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
12
|
+
...options
|
|
13
|
+
});
|
|
14
|
+
let stdout = "";
|
|
15
|
+
let stderr = "";
|
|
16
|
+
|
|
17
|
+
child.stdout.on("data", (chunk) => {
|
|
18
|
+
stdout += String(chunk);
|
|
19
|
+
});
|
|
20
|
+
child.stderr.on("data", (chunk) => {
|
|
21
|
+
stderr += String(chunk);
|
|
22
|
+
});
|
|
23
|
+
child.on("error", reject);
|
|
24
|
+
child.on("close", (code) => {
|
|
25
|
+
if (code === 0) {
|
|
26
|
+
resolve({ stdout, stderr });
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
reject(new Error(`${command} ${args.join(" ")} failed with exit code ${code}\n${stderr}`));
|
|
30
|
+
});
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const workspaceRoot = await mkdtemp(path.join(os.tmpdir(), "qli-release-version-"));
|
|
35
|
+
const workspacePath = path.join(workspaceRoot, ".kb");
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
const { stdout } = await run("node", ["dist/cli/main.js", "init", "--workspace", workspacePath, "--json"], {
|
|
39
|
+
cwd: new URL("..", import.meta.url)
|
|
40
|
+
});
|
|
41
|
+
const parsed = JSON.parse(stdout);
|
|
42
|
+
|
|
43
|
+
assert.equal(parsed.ok, true, "Expected qli init --json to succeed");
|
|
44
|
+
assert.equal(parsed.version, packageJson.version, `Built CLI reported version ${parsed.version}, expected ${packageJson.version}`);
|
|
45
|
+
process.stdout.write(`Verified built CLI version ${parsed.version}\n`);
|
|
46
|
+
} finally {
|
|
47
|
+
await rm(workspaceRoot, { recursive: true, force: true });
|
|
48
|
+
}
|