@tryformation/querylight-cli 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +5 -0
- package/README.md +39 -2
- package/dist/cli/main.js +297 -158
- package/dist/core/archive.d.ts +18 -0
- package/dist/index.js +65 -15
- package/dist/vector/runtime.d.ts +1 -4
- package/package.json +10 -7
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
type WorkspaceArchiveResolution = {
|
|
2
|
+
workspacePath: string;
|
|
3
|
+
archivePath?: string;
|
|
4
|
+
};
|
|
5
|
+
export declare function isWorkspaceArchivePath(workspacePath: string): boolean;
|
|
6
|
+
export declare function packageWorkspaceArchive({ workspacePath, outputPath, force }: {
|
|
7
|
+
workspacePath: string;
|
|
8
|
+
outputPath: string;
|
|
9
|
+
force?: boolean;
|
|
10
|
+
}): Promise<{
|
|
11
|
+
workspacePath: string;
|
|
12
|
+
archivePath: string;
|
|
13
|
+
fileCount: number;
|
|
14
|
+
sizeBytes: number;
|
|
15
|
+
}>;
|
|
16
|
+
export declare function resolveReadableWorkspace(workspacePath: string): Promise<WorkspaceArchiveResolution>;
|
|
17
|
+
export declare function assertWritableWorkspacePath(workspacePath: string): Promise<string>;
|
|
18
|
+
export {};
|
package/dist/index.js
CHANGED
|
@@ -2236,7 +2236,6 @@ async function createSparseQueryEncoder(cacheDir, modelId, queryTokenWeights) {
|
|
|
2236
2236
|
return async (text) => {
|
|
2237
2237
|
const features = await tokenizer([text], {
|
|
2238
2238
|
truncation: true,
|
|
2239
|
-
return_attention_mask: false,
|
|
2240
2239
|
return_token_type_ids: false
|
|
2241
2240
|
});
|
|
2242
2241
|
return buildSparseQueryVector(normalizeTokenIds(features.input_ids), queryTokenWeights);
|
|
@@ -3260,18 +3259,68 @@ async function searchIndex({
|
|
|
3260
3259
|
|
|
3261
3260
|
// src/server/search-api.ts
|
|
3262
3261
|
import { createServer } from "http";
|
|
3263
|
-
import { readdir, stat as
|
|
3262
|
+
import { readdir as readdir2, stat as stat5 } from "fs/promises";
|
|
3263
|
+
import path20 from "path";
|
|
3264
|
+
|
|
3265
|
+
// src/core/archive.ts
|
|
3266
|
+
import { mkdir as mkdir10, readdir, readFile as readFile11, rm as rm5, stat as stat4, writeFile as writeFile9 } from "fs/promises";
|
|
3267
|
+
import os2 from "os";
|
|
3264
3268
|
import path19 from "path";
|
|
3269
|
+
import { unzipSync, zipSync } from "fflate";
|
|
3270
|
+
function isWorkspaceArchivePath(workspacePath) {
|
|
3271
|
+
return workspacePath.toLowerCase().endsWith(".zip");
|
|
3272
|
+
}
|
|
3273
|
+
function assertSafeArchiveEntry(name) {
|
|
3274
|
+
const normalized = path19.posix.normalize(name);
|
|
3275
|
+
if (name.startsWith("/") || normalized === "." || normalized.startsWith("../") || normalized.includes("/../")) {
|
|
3276
|
+
throw new CliError(`unsafe archive entry: ${name}`, "WORKSPACE_ERROR", 3 /* WorkspaceError */);
|
|
3277
|
+
}
|
|
3278
|
+
}
|
|
3279
|
+
async function archiveCachePath(archivePath) {
|
|
3280
|
+
const info = await stat4(archivePath);
|
|
3281
|
+
const key = sha256(`${path19.resolve(archivePath)}:${info.size}:${info.mtimeMs}`).slice(0, 24);
|
|
3282
|
+
return path19.join(os2.tmpdir(), "qli-workspace-archives", key);
|
|
3283
|
+
}
|
|
3284
|
+
async function resolveReadableWorkspace(workspacePath) {
|
|
3285
|
+
const resolved = path19.resolve(workspacePath);
|
|
3286
|
+
if (!isWorkspaceArchivePath(resolved)) {
|
|
3287
|
+
return { workspacePath: await assertWorkspaceExists(resolved) };
|
|
3288
|
+
}
|
|
3289
|
+
const archive = await readFile11(resolved);
|
|
3290
|
+
const extractRoot = await archiveCachePath(resolved);
|
|
3291
|
+
const workspaceRoot = path19.join(extractRoot, "workspace");
|
|
3292
|
+
try {
|
|
3293
|
+
await assertWorkspaceExists(workspaceRoot);
|
|
3294
|
+
return { workspacePath: workspaceRoot, archivePath: resolved };
|
|
3295
|
+
} catch {
|
|
3296
|
+
}
|
|
3297
|
+
await rm5(extractRoot, { recursive: true, force: true });
|
|
3298
|
+
await mkdir10(workspaceRoot, { recursive: true });
|
|
3299
|
+
const entries = unzipSync(new Uint8Array(archive));
|
|
3300
|
+
await Promise.all(Object.entries(entries).map(async ([entryName, data]) => {
|
|
3301
|
+
assertSafeArchiveEntry(entryName);
|
|
3302
|
+
const target = path19.join(workspaceRoot, ...entryName.split("/"));
|
|
3303
|
+
if (entryName.endsWith("/")) {
|
|
3304
|
+
await mkdir10(target, { recursive: true });
|
|
3305
|
+
return;
|
|
3306
|
+
}
|
|
3307
|
+
await mkdir10(path19.dirname(target), { recursive: true });
|
|
3308
|
+
await writeFile9(target, Buffer.from(data));
|
|
3309
|
+
}));
|
|
3310
|
+
return { workspacePath: await assertWorkspaceExists(workspaceRoot), archivePath: resolved };
|
|
3311
|
+
}
|
|
3312
|
+
|
|
3313
|
+
// src/server/search-api.ts
|
|
3265
3314
|
async function pathIsDirectory(candidatePath) {
|
|
3266
3315
|
try {
|
|
3267
|
-
return (await
|
|
3316
|
+
return (await stat5(candidatePath)).isDirectory();
|
|
3268
3317
|
} catch {
|
|
3269
3318
|
return false;
|
|
3270
3319
|
}
|
|
3271
3320
|
}
|
|
3272
3321
|
async function discoverKnowledgeBases(workspacePath) {
|
|
3273
3322
|
try {
|
|
3274
|
-
const singleWorkspace = await
|
|
3323
|
+
const singleWorkspace = (await resolveReadableWorkspace(workspacePath)).workspacePath;
|
|
3275
3324
|
const config = await loadConfig(singleWorkspace);
|
|
3276
3325
|
const index = await loadHydratedIndex(singleWorkspace);
|
|
3277
3326
|
return {
|
|
@@ -3288,19 +3337,20 @@ async function discoverKnowledgeBases(workspacePath) {
|
|
|
3288
3337
|
throw error;
|
|
3289
3338
|
}
|
|
3290
3339
|
}
|
|
3291
|
-
const resolvedRoot =
|
|
3340
|
+
const resolvedRoot = path20.resolve(workspacePath);
|
|
3292
3341
|
if (!await pathIsDirectory(resolvedRoot)) {
|
|
3293
3342
|
throw new CliError(`workspace path does not exist: ${resolvedRoot}`, "WORKSPACE_ERROR", 3 /* WorkspaceError */);
|
|
3294
3343
|
}
|
|
3295
|
-
const entries = await
|
|
3296
|
-
const knowledgeBases = (await Promise.all(entries.filter((entry) => entry.isDirectory()).map(async (entry) => {
|
|
3297
|
-
const candidateWorkspace =
|
|
3344
|
+
const entries = await readdir2(resolvedRoot, { withFileTypes: true });
|
|
3345
|
+
const knowledgeBases = (await Promise.all(entries.filter((entry) => entry.isDirectory() || entry.isFile() && isWorkspaceArchivePath(entry.name)).map(async (entry) => {
|
|
3346
|
+
const candidateWorkspace = entry.isDirectory() ? path20.join(resolvedRoot, entry.name, ".kb") : path20.join(resolvedRoot, entry.name);
|
|
3347
|
+
const knowledgeBaseName = entry.isDirectory() ? entry.name : entry.name.replace(/\.zip$/i, "");
|
|
3298
3348
|
try {
|
|
3299
|
-
const workspace = await assertWorkspaceExists(candidateWorkspace);
|
|
3349
|
+
const workspace = entry.isDirectory() ? await assertWorkspaceExists(candidateWorkspace) : (await resolveReadableWorkspace(candidateWorkspace)).workspacePath;
|
|
3300
3350
|
const config = await loadConfig(workspace);
|
|
3301
3351
|
const index = await loadHydratedIndex(workspace);
|
|
3302
3352
|
return {
|
|
3303
|
-
name:
|
|
3353
|
+
name: knowledgeBaseName,
|
|
3304
3354
|
workspacePath: workspace,
|
|
3305
3355
|
configuredIndexName: config.index.name,
|
|
3306
3356
|
index
|
|
@@ -3314,7 +3364,7 @@ async function discoverKnowledgeBases(workspacePath) {
|
|
|
3314
3364
|
}))).filter((knowledgeBase) => knowledgeBase != null);
|
|
3315
3365
|
if (knowledgeBases.length === 0) {
|
|
3316
3366
|
throw new CliError(
|
|
3317
|
-
`no knowledge bases found at ${resolvedRoot}; use a .kb workspace or a directory of named subdirectories that each contain .kb`,
|
|
3367
|
+
`no knowledge bases found at ${resolvedRoot}; use a .kb workspace, a .zip workspace, or a directory of .zip files or named subdirectories that each contain .kb`,
|
|
3318
3368
|
"WORKSPACE_ERROR",
|
|
3319
3369
|
3 /* WorkspaceError */
|
|
3320
3370
|
);
|
|
@@ -3448,7 +3498,7 @@ async function startSearchApiServer({
|
|
|
3448
3498
|
}
|
|
3449
3499
|
|
|
3450
3500
|
// src/query/related-service.ts
|
|
3451
|
-
import
|
|
3501
|
+
import path21 from "path";
|
|
3452
3502
|
function cosineSimilarity2(left, right) {
|
|
3453
3503
|
let dot = 0;
|
|
3454
3504
|
let leftNorm = 0;
|
|
@@ -3524,7 +3574,7 @@ async function findRelatedDocuments({
|
|
|
3524
3574
|
if (!await fileExists(denseVectorPath(workspacePath))) {
|
|
3525
3575
|
throw new CliError("dense vector index is not built; run `qli models pull --dense` and `qli rebuild`", "DENSE_INDEX_MISSING", 7 /* QueryError */);
|
|
3526
3576
|
}
|
|
3527
|
-
const documents = await readJsonl(
|
|
3577
|
+
const documents = await readJsonl(path21.join(workspacePath, "documents", "documents.jsonl"));
|
|
3528
3578
|
const selected = resolveDocumentSelector(documents, document);
|
|
3529
3579
|
const densePayload = await readDensePayload(workspacePath);
|
|
3530
3580
|
const vectors = buildDocumentVectors(documents, densePayload.chunks, densePayload.metadata.dimensions);
|
|
@@ -3597,7 +3647,7 @@ async function createContext({
|
|
|
3597
3647
|
}
|
|
3598
3648
|
|
|
3599
3649
|
// src/report/diff-service.ts
|
|
3600
|
-
import
|
|
3650
|
+
import path22 from "path";
|
|
3601
3651
|
function chooseBaselineRun(runs, since) {
|
|
3602
3652
|
if (since === "last-run") {
|
|
3603
3653
|
return runs.at(-1);
|
|
@@ -3613,7 +3663,7 @@ async function diffWorkspace({
|
|
|
3613
3663
|
documentId,
|
|
3614
3664
|
since
|
|
3615
3665
|
}) {
|
|
3616
|
-
const current = await readJsonl(
|
|
3666
|
+
const current = await readJsonl(path22.join(workspacePath, "documents", "documents.jsonl"));
|
|
3617
3667
|
const baseline = chooseBaselineRun(await listRuns(workspacePath), since);
|
|
3618
3668
|
const previous = new Map((baseline?.documentsSnapshot ?? []).map((document) => [document.id, document]));
|
|
3619
3669
|
const changedDocuments = current.filter((document) => (!sourceId || document.sourceId === sourceId) && (!documentId || document.id === documentId)).filter((document) => {
|
package/dist/vector/runtime.d.ts
CHANGED
|
@@ -19,10 +19,7 @@ export declare function runSparsePython({ workspacePath, config, payload, import
|
|
|
19
19
|
importMetaUrl: string;
|
|
20
20
|
}): Promise<string>;
|
|
21
21
|
export declare function getDenseTransformersRuntime(cacheDir: string): Promise<{
|
|
22
|
-
env:
|
|
23
|
-
cacheDir: string;
|
|
24
|
-
allowLocalModels: boolean;
|
|
25
|
-
};
|
|
22
|
+
env: typeof import("@huggingface/transformers").env;
|
|
26
23
|
pipeline: typeof import("@huggingface/transformers").pipeline;
|
|
27
24
|
}>;
|
|
28
25
|
export {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tryformation/querylight-cli",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.6",
|
|
4
4
|
"description": "Querylight CLI for building and querying local knowledge bases.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"homepage": "https://github.com/formation-res/querylight-cli#readme",
|
|
@@ -40,14 +40,15 @@
|
|
|
40
40
|
"verify:release-version": "node scripts/assert-release-version.mjs"
|
|
41
41
|
},
|
|
42
42
|
"dependencies": {
|
|
43
|
-
"@huggingface/transformers": "^
|
|
43
|
+
"@huggingface/transformers": "^4.2.0",
|
|
44
44
|
"@tryformation/querylight-ts": "^0.11.0",
|
|
45
45
|
"cheerio": "^1.2.0",
|
|
46
46
|
"cli-table3": "^0.6.5",
|
|
47
|
-
"commander": "^
|
|
47
|
+
"commander": "^15.0.0",
|
|
48
48
|
"fast-glob": "^3.3.3",
|
|
49
|
-
"feedparser": "^2.
|
|
49
|
+
"feedparser": "^2.6.0",
|
|
50
50
|
"feedsmith": "^2.9.4",
|
|
51
|
+
"fflate": "^0.8.3",
|
|
51
52
|
"gray-matter": "^4.0.3",
|
|
52
53
|
"mammoth": "^1.12.0",
|
|
53
54
|
"pdf-parse": "^2.4.5",
|
|
@@ -56,12 +57,14 @@
|
|
|
56
57
|
"yaml": "^2.9.0"
|
|
57
58
|
},
|
|
58
59
|
"devDependencies": {
|
|
59
|
-
"@types/
|
|
60
|
-
"@types/node": "^25.8.0",
|
|
60
|
+
"@types/node": "^26.0.1",
|
|
61
61
|
"@types/pdf-parse": "^1.1.5",
|
|
62
62
|
"@types/turndown": "^5.0.6",
|
|
63
63
|
"tsup": "^8.5.1",
|
|
64
64
|
"typescript": "^6.0.3",
|
|
65
|
-
"vitest": "^4.1.
|
|
65
|
+
"vitest": "^4.1.9"
|
|
66
|
+
},
|
|
67
|
+
"overrides": {
|
|
68
|
+
"esbuild": "^0.28.1"
|
|
66
69
|
}
|
|
67
70
|
}
|