@tryformation/querylight-cli 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +5 -0
- package/README.md +39 -2
- package/dist/cli/main.js +297 -158
- package/dist/core/archive.d.ts +18 -0
- package/dist/index.js +65 -15
- package/dist/vector/runtime.d.ts +1 -4
- package/package.json +10 -7
package/dist/cli/main.js
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
// src/cli/run-cli.ts
|
|
4
4
|
import { Command, Option } from "commander";
|
|
5
|
-
import { readFile as
|
|
6
|
-
import
|
|
5
|
+
import { readFile as readFile12, stat as stat6 } from "fs/promises";
|
|
6
|
+
import path23 from "path";
|
|
7
7
|
|
|
8
8
|
// src/chunk/chunker.ts
|
|
9
9
|
import { readFile as readFile3 } from "fs/promises";
|
|
@@ -384,27 +384,138 @@ async function assertWorkspaceExists(workspacePath) {
|
|
|
384
384
|
}
|
|
385
385
|
}
|
|
386
386
|
|
|
387
|
+
// src/core/archive.ts
|
|
388
|
+
import { mkdir as mkdir3, readdir, readFile as readFile4, rm, stat as stat2, writeFile as writeFile3 } from "fs/promises";
|
|
389
|
+
import os from "os";
|
|
390
|
+
import path6 from "path";
|
|
391
|
+
import { unzipSync, zipSync } from "fflate";
|
|
392
|
+
function isWorkspaceArchivePath(workspacePath) {
|
|
393
|
+
return workspacePath.toLowerCase().endsWith(".zip");
|
|
394
|
+
}
|
|
395
|
+
async function collectFiles(root, outputPath) {
|
|
396
|
+
const files = {};
|
|
397
|
+
const resolvedOutput = path6.resolve(outputPath);
|
|
398
|
+
async function visit(dir) {
|
|
399
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
400
|
+
for (const entry of entries) {
|
|
401
|
+
const absolute = path6.join(dir, entry.name);
|
|
402
|
+
if (path6.resolve(absolute) === resolvedOutput) {
|
|
403
|
+
continue;
|
|
404
|
+
}
|
|
405
|
+
if (entry.isDirectory()) {
|
|
406
|
+
await visit(absolute);
|
|
407
|
+
continue;
|
|
408
|
+
}
|
|
409
|
+
if (!entry.isFile()) {
|
|
410
|
+
continue;
|
|
411
|
+
}
|
|
412
|
+
const relative = path6.relative(root, absolute).split(path6.sep).join("/");
|
|
413
|
+
files[relative] = new Uint8Array(await readFile4(absolute));
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
await visit(root);
|
|
417
|
+
return files;
|
|
418
|
+
}
|
|
419
|
+
async function packageWorkspaceArchive({
|
|
420
|
+
workspacePath,
|
|
421
|
+
outputPath,
|
|
422
|
+
force = false
|
|
423
|
+
}) {
|
|
424
|
+
const workspace = await assertWorkspaceExists(workspacePath);
|
|
425
|
+
const archivePath = path6.resolve(outputPath);
|
|
426
|
+
try {
|
|
427
|
+
await stat2(archivePath);
|
|
428
|
+
if (!force) {
|
|
429
|
+
throw new CliError(`archive already exists: ${archivePath}`, "INVALID_ARGUMENT", 2 /* InvalidArguments */);
|
|
430
|
+
}
|
|
431
|
+
} catch (error) {
|
|
432
|
+
if (error instanceof CliError) {
|
|
433
|
+
throw error;
|
|
434
|
+
}
|
|
435
|
+
if (error.code !== "ENOENT") {
|
|
436
|
+
throw error;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
const files = await collectFiles(workspace, archivePath);
|
|
440
|
+
const archive = zipSync(files, { level: 6 });
|
|
441
|
+
await mkdir3(path6.dirname(archivePath), { recursive: true });
|
|
442
|
+
await writeFile3(archivePath, archive);
|
|
443
|
+
const archiveStat = await stat2(archivePath);
|
|
444
|
+
return {
|
|
445
|
+
workspacePath: workspace,
|
|
446
|
+
archivePath,
|
|
447
|
+
fileCount: Object.keys(files).length,
|
|
448
|
+
sizeBytes: archiveStat.size
|
|
449
|
+
};
|
|
450
|
+
}
|
|
451
|
+
function assertSafeArchiveEntry(name) {
|
|
452
|
+
const normalized = path6.posix.normalize(name);
|
|
453
|
+
if (name.startsWith("/") || normalized === "." || normalized.startsWith("../") || normalized.includes("/../")) {
|
|
454
|
+
throw new CliError(`unsafe archive entry: ${name}`, "WORKSPACE_ERROR", 3 /* WorkspaceError */);
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
async function archiveCachePath(archivePath) {
|
|
458
|
+
const info = await stat2(archivePath);
|
|
459
|
+
const key = sha256(`${path6.resolve(archivePath)}:${info.size}:${info.mtimeMs}`).slice(0, 24);
|
|
460
|
+
return path6.join(os.tmpdir(), "qli-workspace-archives", key);
|
|
461
|
+
}
|
|
462
|
+
async function resolveReadableWorkspace(workspacePath) {
|
|
463
|
+
const resolved = path6.resolve(workspacePath);
|
|
464
|
+
if (!isWorkspaceArchivePath(resolved)) {
|
|
465
|
+
return { workspacePath: await assertWorkspaceExists(resolved) };
|
|
466
|
+
}
|
|
467
|
+
const archive = await readFile4(resolved);
|
|
468
|
+
const extractRoot = await archiveCachePath(resolved);
|
|
469
|
+
const workspaceRoot = path6.join(extractRoot, "workspace");
|
|
470
|
+
try {
|
|
471
|
+
await assertWorkspaceExists(workspaceRoot);
|
|
472
|
+
return { workspacePath: workspaceRoot, archivePath: resolved };
|
|
473
|
+
} catch {
|
|
474
|
+
}
|
|
475
|
+
await rm(extractRoot, { recursive: true, force: true });
|
|
476
|
+
await mkdir3(workspaceRoot, { recursive: true });
|
|
477
|
+
const entries = unzipSync(new Uint8Array(archive));
|
|
478
|
+
await Promise.all(Object.entries(entries).map(async ([entryName, data]) => {
|
|
479
|
+
assertSafeArchiveEntry(entryName);
|
|
480
|
+
const target = path6.join(workspaceRoot, ...entryName.split("/"));
|
|
481
|
+
if (entryName.endsWith("/")) {
|
|
482
|
+
await mkdir3(target, { recursive: true });
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
await mkdir3(path6.dirname(target), { recursive: true });
|
|
486
|
+
await writeFile3(target, Buffer.from(data));
|
|
487
|
+
}));
|
|
488
|
+
return { workspacePath: await assertWorkspaceExists(workspaceRoot), archivePath: resolved };
|
|
489
|
+
}
|
|
490
|
+
async function assertWritableWorkspacePath(workspacePath) {
|
|
491
|
+
const resolved = path6.resolve(workspacePath);
|
|
492
|
+
if (isWorkspaceArchivePath(resolved)) {
|
|
493
|
+
throw new CliError("zip workspaces are read-only; package a rebuilt directory workspace instead", "WORKSPACE_ERROR", 3 /* WorkspaceError */);
|
|
494
|
+
}
|
|
495
|
+
return resolved;
|
|
496
|
+
}
|
|
497
|
+
|
|
387
498
|
// src/index/querylight-indexer.ts
|
|
388
499
|
import { Analyzer, DateFieldIndex, DocumentIndex, KeywordTokenizer, LowerCaseTextFilter, RankingAlgorithm, StoredSourceIndex, TextFieldIndex } from "@tryformation/querylight-ts";
|
|
389
|
-
import
|
|
500
|
+
import path12 from "path";
|
|
390
501
|
|
|
391
502
|
// src/vector/dense.ts
|
|
392
503
|
import { VectorFieldIndex, cosineSimilarity, createSeededRandom } from "@tryformation/querylight-ts";
|
|
393
|
-
import { mkdir as
|
|
394
|
-
import
|
|
504
|
+
import { mkdir as mkdir5 } from "fs/promises";
|
|
505
|
+
import path9 from "path";
|
|
395
506
|
|
|
396
507
|
// src/vector/runtime.ts
|
|
397
|
-
import
|
|
398
|
-
import
|
|
508
|
+
import os2 from "os";
|
|
509
|
+
import path7 from "path";
|
|
399
510
|
import { fileURLToPath } from "url";
|
|
400
511
|
import { execFile, execFileSync } from "child_process";
|
|
401
|
-
import { mkdtemp, rm, writeFile as
|
|
512
|
+
import { mkdtemp, rm as rm2, writeFile as writeFile4 } from "fs/promises";
|
|
402
513
|
|
|
403
514
|
// src/core/files.ts
|
|
404
|
-
import { stat as
|
|
515
|
+
import { stat as stat3 } from "fs/promises";
|
|
405
516
|
async function fileExists(filePath) {
|
|
406
517
|
try {
|
|
407
|
-
await
|
|
518
|
+
await stat3(filePath);
|
|
408
519
|
return true;
|
|
409
520
|
} catch {
|
|
410
521
|
return false;
|
|
@@ -414,35 +525,35 @@ async function fileExists(filePath) {
|
|
|
414
525
|
// src/vector/runtime.ts
|
|
415
526
|
var sparseExecFileSync = execFileSync;
|
|
416
527
|
function resolveQliHomeDir() {
|
|
417
|
-
return
|
|
528
|
+
return path7.resolve(process.env.QLI_HOME ?? path7.join(os2.homedir(), ".qli"));
|
|
418
529
|
}
|
|
419
530
|
function resolveCacheDir(workspacePath, configuredPath) {
|
|
420
531
|
if (configuredPath === "~/.qli") {
|
|
421
532
|
return resolveQliHomeDir();
|
|
422
533
|
}
|
|
423
534
|
if (configuredPath.startsWith("~/.qli/")) {
|
|
424
|
-
return
|
|
535
|
+
return path7.join(resolveQliHomeDir(), configuredPath.slice("~/.qli/".length));
|
|
425
536
|
}
|
|
426
537
|
if (configuredPath === "~") {
|
|
427
|
-
return
|
|
538
|
+
return os2.homedir();
|
|
428
539
|
}
|
|
429
540
|
if (configuredPath.startsWith("~/")) {
|
|
430
|
-
return
|
|
541
|
+
return path7.join(os2.homedir(), configuredPath.slice(2));
|
|
431
542
|
}
|
|
432
|
-
return
|
|
543
|
+
return path7.isAbsolute(configuredPath) ? configuredPath : path7.resolve(workspacePath, configuredPath.replace(/^\.kb\//, ""));
|
|
433
544
|
}
|
|
434
545
|
function packageRootFromImportMeta(importMetaUrl) {
|
|
435
|
-
return
|
|
546
|
+
return path7.resolve(path7.dirname(fileURLToPath(importMetaUrl)), "..");
|
|
436
547
|
}
|
|
437
548
|
async function sparseScriptPath(importMetaUrl) {
|
|
438
549
|
const base = packageRootFromImportMeta(importMetaUrl);
|
|
439
550
|
const candidates = [
|
|
440
|
-
|
|
441
|
-
|
|
551
|
+
path7.join(base, "scripts", "sparse-encode.py"),
|
|
552
|
+
path7.join(base, "..", "scripts", "sparse-encode.py")
|
|
442
553
|
];
|
|
443
554
|
for (const candidate of candidates) {
|
|
444
555
|
if (await fileExists(candidate)) {
|
|
445
|
-
return
|
|
556
|
+
return path7.resolve(candidate);
|
|
446
557
|
}
|
|
447
558
|
}
|
|
448
559
|
throw new Error(`sparse helper script not found; checked ${candidates.join(", ")}`);
|
|
@@ -468,9 +579,9 @@ async function runSparsePython({
|
|
|
468
579
|
}) {
|
|
469
580
|
const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
|
|
470
581
|
const scriptPath = await sparseScriptPath(importMetaUrl);
|
|
471
|
-
const payloadDir = await mkdtemp(
|
|
472
|
-
const payloadPath =
|
|
473
|
-
await
|
|
582
|
+
const payloadDir = await mkdtemp(path7.join(os2.tmpdir(), "qli-sparse-"));
|
|
583
|
+
const payloadPath = path7.join(payloadDir, "payload.json");
|
|
584
|
+
await writeFile4(payloadPath, JSON.stringify(payload), "utf8");
|
|
474
585
|
try {
|
|
475
586
|
return sparseExecFileSync(
|
|
476
587
|
"uv",
|
|
@@ -496,7 +607,7 @@ async function runSparsePython({
|
|
|
496
607
|
}
|
|
497
608
|
);
|
|
498
609
|
} finally {
|
|
499
|
-
await
|
|
610
|
+
await rm2(payloadDir, { recursive: true, force: true });
|
|
500
611
|
}
|
|
501
612
|
}
|
|
502
613
|
async function getDenseTransformersRuntime(cacheDir) {
|
|
@@ -510,28 +621,28 @@ async function getDenseTransformersRuntime(cacheDir) {
|
|
|
510
621
|
}
|
|
511
622
|
|
|
512
623
|
// src/vector/store.ts
|
|
513
|
-
import { mkdir as
|
|
514
|
-
import
|
|
624
|
+
import { mkdir as mkdir4, rm as rm3, writeFile as writeFile6 } from "fs/promises";
|
|
625
|
+
import path8 from "path";
|
|
515
626
|
|
|
516
627
|
// src/core/gzip-json.ts
|
|
517
|
-
import { readFile as
|
|
628
|
+
import { readFile as readFile5, writeFile as writeFile5 } from "fs/promises";
|
|
518
629
|
import { promisify } from "util";
|
|
519
630
|
import { gunzip, gzip } from "zlib";
|
|
520
631
|
var gzipAsync = promisify(gzip);
|
|
521
632
|
var gunzipAsync = promisify(gunzip);
|
|
522
633
|
async function writeGzipJson(filePath, value) {
|
|
523
634
|
const payload = JSON.stringify(value, null, 2);
|
|
524
|
-
await
|
|
635
|
+
await writeFile5(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
|
|
525
636
|
}
|
|
526
637
|
async function readJsonFromGzipOrFile(gzipPath, legacyPath) {
|
|
527
638
|
if (await fileExists(gzipPath)) {
|
|
528
|
-
const payload = await
|
|
639
|
+
const payload = await readFile5(gzipPath);
|
|
529
640
|
return JSON.parse((await gunzipAsync(payload)).toString("utf8"));
|
|
530
641
|
}
|
|
531
642
|
if (legacyPath && await fileExists(legacyPath)) {
|
|
532
|
-
return JSON.parse(await
|
|
643
|
+
return JSON.parse(await readFile5(legacyPath, "utf8"));
|
|
533
644
|
}
|
|
534
|
-
return JSON.parse(await
|
|
645
|
+
return JSON.parse(await readFile5(gzipPath, "utf8"));
|
|
535
646
|
}
|
|
536
647
|
async function resolveExistingGzipOrFilePath(gzipPath, legacyPath) {
|
|
537
648
|
if (await fileExists(gzipPath)) {
|
|
@@ -545,39 +656,39 @@ async function resolveExistingGzipOrFilePath(gzipPath, legacyPath) {
|
|
|
545
656
|
|
|
546
657
|
// src/vector/store.ts
|
|
547
658
|
function vectorsDir(workspacePath) {
|
|
548
|
-
return
|
|
659
|
+
return path8.join(workspacePath, "vectors");
|
|
549
660
|
}
|
|
550
661
|
function sharedModelStateDir() {
|
|
551
|
-
return
|
|
662
|
+
return path8.join(resolveQliHomeDir(), "models", "status");
|
|
552
663
|
}
|
|
553
664
|
function denseVectorPath(workspacePath) {
|
|
554
|
-
return
|
|
665
|
+
return path8.join(vectorsDir(workspacePath), "dense.latest.json.gz");
|
|
555
666
|
}
|
|
556
667
|
function denseMetaPath(workspacePath) {
|
|
557
|
-
return
|
|
668
|
+
return path8.join(vectorsDir(workspacePath), "dense.latest.meta.json.gz");
|
|
558
669
|
}
|
|
559
670
|
function sparseVectorPath(workspacePath) {
|
|
560
|
-
return
|
|
671
|
+
return path8.join(vectorsDir(workspacePath), "sparse.latest.json.gz");
|
|
561
672
|
}
|
|
562
673
|
function sparseMetaPath(workspacePath) {
|
|
563
|
-
return
|
|
674
|
+
return path8.join(vectorsDir(workspacePath), "sparse.latest.meta.json.gz");
|
|
564
675
|
}
|
|
565
676
|
function legacyDenseVectorPath(workspacePath) {
|
|
566
|
-
return
|
|
677
|
+
return path8.join(vectorsDir(workspacePath), "dense.latest.json");
|
|
567
678
|
}
|
|
568
679
|
function legacyDenseMetaPath(workspacePath) {
|
|
569
|
-
return
|
|
680
|
+
return path8.join(vectorsDir(workspacePath), "dense.latest.meta.json");
|
|
570
681
|
}
|
|
571
682
|
function legacySparseVectorPath(workspacePath) {
|
|
572
|
-
return
|
|
683
|
+
return path8.join(vectorsDir(workspacePath), "sparse.latest.json");
|
|
573
684
|
}
|
|
574
685
|
function legacySparseMetaPath(workspacePath) {
|
|
575
|
-
return
|
|
686
|
+
return path8.join(vectorsDir(workspacePath), "sparse.latest.meta.json");
|
|
576
687
|
}
|
|
577
688
|
function pullMarkerPath(type, workspacePath, modelId, cacheDir) {
|
|
578
689
|
const resolvedCacheDir = resolveCacheDir(workspacePath, cacheDir);
|
|
579
690
|
const cacheKey = sha256(resolvedCacheDir).slice(0, 16);
|
|
580
|
-
return
|
|
691
|
+
return path8.join(sharedModelStateDir(), type, `${encodeURIComponent(modelId)}.${cacheKey}.json`);
|
|
581
692
|
}
|
|
582
693
|
function densePullMarker(workspacePath, modelId, cacheDir) {
|
|
583
694
|
return pullMarkerPath("dense", workspacePath, modelId, cacheDir);
|
|
@@ -586,24 +697,24 @@ function sparsePullMarker(workspacePath, modelId, cacheDir) {
|
|
|
586
697
|
return pullMarkerPath("sparse", workspacePath, modelId, cacheDir);
|
|
587
698
|
}
|
|
588
699
|
async function writeDensePayload(workspacePath, payload) {
|
|
589
|
-
await
|
|
700
|
+
await mkdir4(vectorsDir(workspacePath), { recursive: true });
|
|
590
701
|
await writeGzipJson(denseVectorPath(workspacePath), payload);
|
|
591
702
|
await writeGzipJson(denseMetaPath(workspacePath), payload.metadata);
|
|
592
703
|
await Promise.all([
|
|
593
|
-
|
|
594
|
-
|
|
704
|
+
rm3(legacyDenseVectorPath(workspacePath), { force: true }),
|
|
705
|
+
rm3(legacyDenseMetaPath(workspacePath), { force: true })
|
|
595
706
|
]);
|
|
596
707
|
}
|
|
597
708
|
async function readDensePayload(workspacePath) {
|
|
598
709
|
return readJsonFromGzipOrFile(denseVectorPath(workspacePath), legacyDenseVectorPath(workspacePath));
|
|
599
710
|
}
|
|
600
711
|
async function writeSparsePayload(workspacePath, payload) {
|
|
601
|
-
await
|
|
712
|
+
await mkdir4(vectorsDir(workspacePath), { recursive: true });
|
|
602
713
|
await writeGzipJson(sparseVectorPath(workspacePath), payload);
|
|
603
714
|
await writeGzipJson(sparseMetaPath(workspacePath), payload.metadata);
|
|
604
715
|
await Promise.all([
|
|
605
|
-
|
|
606
|
-
|
|
716
|
+
rm3(legacySparseVectorPath(workspacePath), { force: true }),
|
|
717
|
+
rm3(legacySparseMetaPath(workspacePath), { force: true })
|
|
607
718
|
]);
|
|
608
719
|
}
|
|
609
720
|
async function readSparsePayload(workspacePath) {
|
|
@@ -611,13 +722,13 @@ async function readSparsePayload(workspacePath) {
|
|
|
611
722
|
}
|
|
612
723
|
async function writeDensePullMarker(workspacePath, model, value) {
|
|
613
724
|
const markerPath = densePullMarker(workspacePath, model.modelId, model.cacheDir);
|
|
614
|
-
await
|
|
615
|
-
await
|
|
725
|
+
await mkdir4(path8.dirname(markerPath), { recursive: true });
|
|
726
|
+
await writeFile6(markerPath, JSON.stringify(value, null, 2), "utf8");
|
|
616
727
|
}
|
|
617
728
|
async function writeSparsePullMarker(workspacePath, model, value) {
|
|
618
729
|
const markerPath = sparsePullMarker(workspacePath, model.modelId, model.cacheDir);
|
|
619
|
-
await
|
|
620
|
-
await
|
|
730
|
+
await mkdir4(path8.dirname(markerPath), { recursive: true });
|
|
731
|
+
await writeFile6(markerPath, JSON.stringify(value, null, 2), "utf8");
|
|
621
732
|
}
|
|
622
733
|
async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
623
734
|
const denseCacheDir = resolveCacheDir(workspacePath, dense.cacheDir);
|
|
@@ -720,7 +831,7 @@ function exactDenseQuery(payload, vector, topK) {
|
|
|
720
831
|
}
|
|
721
832
|
async function pullDenseModel(workspacePath, config) {
|
|
722
833
|
const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
|
|
723
|
-
await
|
|
834
|
+
await mkdir5(cacheDir, { recursive: true });
|
|
724
835
|
const embedder = await createEmbedder(cacheDir, config.modelId);
|
|
725
836
|
try {
|
|
726
837
|
await embedder.embed("warm dense model cache");
|
|
@@ -733,9 +844,9 @@ async function buildDenseVectors({
|
|
|
733
844
|
config,
|
|
734
845
|
progress
|
|
735
846
|
}) {
|
|
736
|
-
const chunks = await readJsonl(
|
|
847
|
+
const chunks = await readJsonl(path9.join(workspacePath, "chunks", "chunks.jsonl"));
|
|
737
848
|
const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
|
|
738
|
-
await
|
|
849
|
+
await mkdir5(cacheDir, { recursive: true });
|
|
739
850
|
const embedder = await createEmbedder(cacheDir, config.modelId);
|
|
740
851
|
try {
|
|
741
852
|
const records = [];
|
|
@@ -819,8 +930,8 @@ async function denseQuery({
|
|
|
819
930
|
|
|
820
931
|
// src/vector/sparse.ts
|
|
821
932
|
import { SparseVectorFieldIndex } from "@tryformation/querylight-ts";
|
|
822
|
-
import { mkdir as
|
|
823
|
-
import
|
|
933
|
+
import { mkdir as mkdir6 } from "fs/promises";
|
|
934
|
+
import path10 from "path";
|
|
824
935
|
var sparseQueryEncoderFactory = null;
|
|
825
936
|
var sparseDocumentBuilderFactory = null;
|
|
826
937
|
function buildSparseQueryVector(tokenIds, tokenWeights) {
|
|
@@ -865,7 +976,6 @@ async function createSparseQueryEncoder(cacheDir, modelId, queryTokenWeights) {
|
|
|
865
976
|
return async (text) => {
|
|
866
977
|
const features = await tokenizer([text], {
|
|
867
978
|
truncation: true,
|
|
868
|
-
return_attention_mask: false,
|
|
869
979
|
return_token_type_ids: false
|
|
870
980
|
});
|
|
871
981
|
return buildSparseQueryVector(normalizeTokenIds(features.input_ids), queryTokenWeights);
|
|
@@ -874,7 +984,7 @@ async function createSparseQueryEncoder(cacheDir, modelId, queryTokenWeights) {
|
|
|
874
984
|
async function pullSparseModel(workspacePath, config) {
|
|
875
985
|
await ensureUvAvailable();
|
|
876
986
|
const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
|
|
877
|
-
await
|
|
987
|
+
await mkdir6(cacheDir, { recursive: true });
|
|
878
988
|
await runSparsePython({
|
|
879
989
|
workspacePath,
|
|
880
990
|
config,
|
|
@@ -925,7 +1035,7 @@ async function buildSparseVectors({
|
|
|
925
1035
|
config,
|
|
926
1036
|
progress
|
|
927
1037
|
}) {
|
|
928
|
-
const chunks = await readJsonl(
|
|
1038
|
+
const chunks = await readJsonl(path10.join(workspacePath, "chunks", "chunks.jsonl"));
|
|
929
1039
|
reportProgress(progress, `Encoding ${chunks.length} chunk${chunks.length === 1 ? "" : "s"} for sparse retrieval`);
|
|
930
1040
|
const built = await buildSparseDocuments(workspacePath, config, chunks);
|
|
931
1041
|
reportProgress(progress, "Building sparse vector index");
|
|
@@ -1058,31 +1168,31 @@ async function getModelStatus(workspacePath, config) {
|
|
|
1058
1168
|
}
|
|
1059
1169
|
|
|
1060
1170
|
// src/index/index-store.ts
|
|
1061
|
-
import { mkdir as
|
|
1062
|
-
import
|
|
1171
|
+
import { mkdir as mkdir7, rm as rm4 } from "fs/promises";
|
|
1172
|
+
import path11 from "path";
|
|
1063
1173
|
function versionedIndexPath(workspacePath, stamp) {
|
|
1064
|
-
return
|
|
1174
|
+
return path11.join(workspacePath, "indexes", `${stamp}.json.gz`);
|
|
1065
1175
|
}
|
|
1066
1176
|
function versionedLegacyIndexPath(workspacePath, stamp) {
|
|
1067
|
-
return
|
|
1177
|
+
return path11.join(workspacePath, "indexes", `${stamp}.json`);
|
|
1068
1178
|
}
|
|
1069
1179
|
function versionedMetaPath(workspacePath, stamp) {
|
|
1070
|
-
return
|
|
1180
|
+
return path11.join(workspacePath, "indexes", `${stamp}.meta.json.gz`);
|
|
1071
1181
|
}
|
|
1072
1182
|
function versionedLegacyMetaPath(workspacePath, stamp) {
|
|
1073
|
-
return
|
|
1183
|
+
return path11.join(workspacePath, "indexes", `${stamp}.meta.json`);
|
|
1074
1184
|
}
|
|
1075
1185
|
function latestIndexPath(workspacePath) {
|
|
1076
|
-
return
|
|
1186
|
+
return path11.join(workspacePath, "indexes", "latest.json.gz");
|
|
1077
1187
|
}
|
|
1078
1188
|
function legacyLatestIndexPath(workspacePath) {
|
|
1079
|
-
return
|
|
1189
|
+
return path11.join(workspacePath, "indexes", "latest.json");
|
|
1080
1190
|
}
|
|
1081
1191
|
function latestMetaPath(workspacePath) {
|
|
1082
|
-
return
|
|
1192
|
+
return path11.join(workspacePath, "indexes", "latest.meta.json.gz");
|
|
1083
1193
|
}
|
|
1084
1194
|
function legacyLatestMetaPath(workspacePath) {
|
|
1085
|
-
return
|
|
1195
|
+
return path11.join(workspacePath, "indexes", "latest.meta.json");
|
|
1086
1196
|
}
|
|
1087
1197
|
async function writeIndexArtifacts({
|
|
1088
1198
|
workspacePath,
|
|
@@ -1094,16 +1204,16 @@ async function writeIndexArtifacts({
|
|
|
1094
1204
|
const metaPath = versionedMetaPath(workspacePath, stamp);
|
|
1095
1205
|
const latestIndexArtifactPath = latestIndexPath(workspacePath);
|
|
1096
1206
|
const latestMetadataArtifactPath = latestMetaPath(workspacePath);
|
|
1097
|
-
await
|
|
1207
|
+
await mkdir7(path11.join(workspacePath, "indexes"), { recursive: true });
|
|
1098
1208
|
await writeGzipJson(indexPath, indexState);
|
|
1099
1209
|
await writeGzipJson(metaPath, metadata);
|
|
1100
1210
|
await writeGzipJson(latestIndexArtifactPath, indexState);
|
|
1101
1211
|
await writeGzipJson(latestMetadataArtifactPath, metadata);
|
|
1102
1212
|
await Promise.all([
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1213
|
+
rm4(legacyLatestIndexPath(workspacePath), { force: true }),
|
|
1214
|
+
rm4(legacyLatestMetaPath(workspacePath), { force: true }),
|
|
1215
|
+
rm4(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
|
|
1216
|
+
rm4(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
|
|
1107
1217
|
]);
|
|
1108
1218
|
return { indexPath: latestIndexArtifactPath, metadataPath: latestMetadataArtifactPath };
|
|
1109
1219
|
}
|
|
@@ -1168,9 +1278,9 @@ async function buildIndex({
|
|
|
1168
1278
|
}) {
|
|
1169
1279
|
const config = await loadConfig(workspacePath);
|
|
1170
1280
|
reportProgress(progress, "Loading documents, chunks, and sources");
|
|
1171
|
-
const chunks = await readJsonl(
|
|
1172
|
-
const documents = await readJsonl(
|
|
1173
|
-
const sources = await readJsonl(
|
|
1281
|
+
const chunks = await readJsonl(path12.join(workspacePath, "chunks", "chunks.jsonl"));
|
|
1282
|
+
const documents = await readJsonl(path12.join(workspacePath, "documents", "documents.jsonl"));
|
|
1283
|
+
const sources = await readJsonl(path12.join(workspacePath, "sources", "sources.jsonl"));
|
|
1174
1284
|
const metadataFields = [...new Set(chunks.flatMap((chunk) => Object.keys(chunk.metadata).map((key) => `metadata.${key}`)))];
|
|
1175
1285
|
const index = new DocumentIndex(createIndexMapping(metadataFields));
|
|
1176
1286
|
const documentsById = new Map(documents.map((document) => [document.id, document]));
|
|
@@ -1249,7 +1359,7 @@ async function buildIndex({
|
|
|
1249
1359
|
}
|
|
1250
1360
|
|
|
1251
1361
|
// src/ingest/ingest-service.ts
|
|
1252
|
-
import
|
|
1362
|
+
import path18 from "path";
|
|
1253
1363
|
|
|
1254
1364
|
// src/core/concurrency.ts
|
|
1255
1365
|
async function mapWithConcurrency(items, limit, worker) {
|
|
@@ -1273,17 +1383,17 @@ async function mapWithConcurrency(items, limit, worker) {
|
|
|
1273
1383
|
}
|
|
1274
1384
|
|
|
1275
1385
|
// src/core/runs.ts
|
|
1276
|
-
import
|
|
1386
|
+
import path13 from "path";
|
|
1277
1387
|
async function writeRun(workspacePath, run) {
|
|
1278
|
-
await writeJsonl(
|
|
1388
|
+
await writeJsonl(path13.join(workspacePath, "runs", `${run.id}.json`), [run]);
|
|
1279
1389
|
}
|
|
1280
1390
|
async function listRuns(workspacePath) {
|
|
1281
1391
|
const fs = await import("fs/promises");
|
|
1282
|
-
const dir =
|
|
1392
|
+
const dir = path13.join(workspacePath, "runs");
|
|
1283
1393
|
try {
|
|
1284
1394
|
const entries = await fs.readdir(dir);
|
|
1285
1395
|
const records = await Promise.all(entries.filter((name) => name.endsWith(".json")).map(async (name) => {
|
|
1286
|
-
const runs = await readJsonl(
|
|
1396
|
+
const runs = await readJsonl(path13.join(dir, name));
|
|
1287
1397
|
return runs[0];
|
|
1288
1398
|
}));
|
|
1289
1399
|
return records.filter((record) => record != null).sort((a, b) => a.createdAt.localeCompare(b.createdAt));
|
|
@@ -1293,8 +1403,8 @@ async function listRuns(workspacePath) {
|
|
|
1293
1403
|
}
|
|
1294
1404
|
|
|
1295
1405
|
// src/sources/source-store.ts
|
|
1296
|
-
import
|
|
1297
|
-
var sourcesFile = (workspacePath) =>
|
|
1406
|
+
import path14 from "path";
|
|
1407
|
+
var sourcesFile = (workspacePath) => path14.join(workspacePath, "sources", "sources.jsonl");
|
|
1298
1408
|
async function listSources(workspacePath) {
|
|
1299
1409
|
return readJsonl(sourcesFile(workspacePath));
|
|
1300
1410
|
}
|
|
@@ -1340,8 +1450,8 @@ async function removeSource(workspacePath, sourceId) {
|
|
|
1340
1450
|
}
|
|
1341
1451
|
|
|
1342
1452
|
// src/ingest/document-utils.ts
|
|
1343
|
-
import { mkdir as
|
|
1344
|
-
import
|
|
1453
|
+
import { mkdir as mkdir8, rm as rm5, writeFile as writeFile7 } from "fs/promises";
|
|
1454
|
+
import path15 from "path";
|
|
1345
1455
|
|
|
1346
1456
|
// src/normalize/normalize-markdown.ts
|
|
1347
1457
|
import matter2 from "gray-matter";
|
|
@@ -1393,8 +1503,8 @@ async function writeNormalizedDocument({
|
|
|
1393
1503
|
normalizedPath,
|
|
1394
1504
|
markdown
|
|
1395
1505
|
}) {
|
|
1396
|
-
await
|
|
1397
|
-
await
|
|
1506
|
+
await mkdir8(path15.dirname(normalizedPath), { recursive: true });
|
|
1507
|
+
await writeFile7(
|
|
1398
1508
|
normalizedPath,
|
|
1399
1509
|
withFrontmatter(
|
|
1400
1510
|
{
|
|
@@ -1416,14 +1526,14 @@ async function writeNormalizedDocument({
|
|
|
1416
1526
|
}
|
|
1417
1527
|
async function deleteDocumentArtifacts(document) {
|
|
1418
1528
|
await Promise.all([
|
|
1419
|
-
document.rawPath ?
|
|
1420
|
-
|
|
1529
|
+
document.rawPath ? rm5(document.rawPath, { force: true }) : Promise.resolve(),
|
|
1530
|
+
rm5(document.normalizedPath, { force: true })
|
|
1421
1531
|
]);
|
|
1422
1532
|
}
|
|
1423
1533
|
|
|
1424
1534
|
// src/ingest/adapters/directory-adapter.ts
|
|
1425
1535
|
import fg from "fast-glob";
|
|
1426
|
-
import
|
|
1536
|
+
import path16 from "path";
|
|
1427
1537
|
async function listDirectoryFiles(source) {
|
|
1428
1538
|
const include = source.crawl?.includePatterns?.length ? source.crawl.includePatterns : ["**/*.md", "**/*.txt", "**/*.html", "**/*.htm", "**/*.pdf", "**/*.docx"];
|
|
1429
1539
|
const exclude = source.crawl?.excludePatterns ?? [];
|
|
@@ -1436,12 +1546,12 @@ async function listDirectoryFiles(source) {
|
|
|
1436
1546
|
ignore: exclude,
|
|
1437
1547
|
followSymbolicLinks: false
|
|
1438
1548
|
});
|
|
1439
|
-
return matches.map((match) =>
|
|
1549
|
+
return matches.map((match) => path16.resolve(match)).sort();
|
|
1440
1550
|
}
|
|
1441
1551
|
|
|
1442
1552
|
// src/ingest/adapters/file-adapter.ts
|
|
1443
1553
|
import { basename, extname, resolve } from "path";
|
|
1444
|
-
import { mkdir as
|
|
1554
|
+
import { mkdir as mkdir9, readFile as readFile9, stat as stat4, writeFile as writeFile8 } from "fs/promises";
|
|
1445
1555
|
|
|
1446
1556
|
// src/ingest/extractors/docx-extractor.ts
|
|
1447
1557
|
import mammoth from "mammoth";
|
|
@@ -1615,16 +1725,16 @@ function extractPublicationDateFromHtml(html) {
|
|
|
1615
1725
|
}
|
|
1616
1726
|
|
|
1617
1727
|
// src/ingest/extractors/markdown-extractor.ts
|
|
1618
|
-
import { readFile as
|
|
1728
|
+
import { readFile as readFile6 } from "fs/promises";
|
|
1619
1729
|
async function extractMarkdown(filePath) {
|
|
1620
|
-
return
|
|
1730
|
+
return readFile6(filePath, "utf8");
|
|
1621
1731
|
}
|
|
1622
1732
|
|
|
1623
1733
|
// src/ingest/extractors/pdf-extractor.ts
|
|
1624
|
-
import { readFile as
|
|
1734
|
+
import { readFile as readFile7 } from "fs/promises";
|
|
1625
1735
|
import { PDFParse } from "pdf-parse";
|
|
1626
1736
|
async function extractPdf(filePath) {
|
|
1627
|
-
const buffer = await
|
|
1737
|
+
const buffer = await readFile7(filePath);
|
|
1628
1738
|
const parser = new PDFParse({ data: buffer });
|
|
1629
1739
|
try {
|
|
1630
1740
|
const parsed = await parser.getText();
|
|
@@ -1635,9 +1745,9 @@ async function extractPdf(filePath) {
|
|
|
1635
1745
|
}
|
|
1636
1746
|
|
|
1637
1747
|
// src/ingest/extractors/text-extractor.ts
|
|
1638
|
-
import { readFile as
|
|
1748
|
+
import { readFile as readFile8 } from "fs/promises";
|
|
1639
1749
|
async function extractText(filePath) {
|
|
1640
|
-
return
|
|
1750
|
+
return readFile8(filePath, "utf8");
|
|
1641
1751
|
}
|
|
1642
1752
|
|
|
1643
1753
|
// src/ingest/adapters/file-adapter.ts
|
|
@@ -1672,7 +1782,7 @@ async function extractFileContent(filePath, mimeType) {
|
|
|
1672
1782
|
${text}`, raw: text };
|
|
1673
1783
|
}
|
|
1674
1784
|
if (mimeType === "text/html") {
|
|
1675
|
-
const raw = await
|
|
1785
|
+
const raw = await readFile9(filePath, "utf8");
|
|
1676
1786
|
const extracted = extractHtmlToMarkdown(raw);
|
|
1677
1787
|
return { title: extracted.title, markdown: `# ${extracted.title}
|
|
1678
1788
|
|
|
@@ -1717,7 +1827,7 @@ async function ingestFile({
|
|
|
1717
1827
|
previous
|
|
1718
1828
|
}) {
|
|
1719
1829
|
const resolved = resolve(filePath);
|
|
1720
|
-
const fileStat = await
|
|
1830
|
+
const fileStat = await stat4(resolved);
|
|
1721
1831
|
const mimeType = mimeTypeFor(resolved);
|
|
1722
1832
|
const extracted = await extractFileContent(resolved, mimeType);
|
|
1723
1833
|
const documentId = stableId("doc", source.id, resolved);
|
|
@@ -1728,10 +1838,10 @@ async function ingestFile({
|
|
|
1728
1838
|
const lastChangedAt = previous?.contentHash === contentHash ? previous.lastChangedAt : now;
|
|
1729
1839
|
const indexedAt = now;
|
|
1730
1840
|
const crawledAt = now;
|
|
1731
|
-
await
|
|
1732
|
-
await
|
|
1841
|
+
await mkdir9(resolve(workspacePath, "normalized"), { recursive: true });
|
|
1842
|
+
await mkdir9(resolve(workspacePath, "raw", source.id), { recursive: true });
|
|
1733
1843
|
if (extracted.raw) {
|
|
1734
|
-
await
|
|
1844
|
+
await writeFile8(rawPath, extracted.raw, "utf8");
|
|
1735
1845
|
}
|
|
1736
1846
|
await writeNormalizedDocument({
|
|
1737
1847
|
documentId,
|
|
@@ -1794,7 +1904,7 @@ ${content}`;
|
|
|
1794
1904
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1795
1905
|
const lastChangedAt = previous?.contentHash === contentHash ? previous.lastChangedAt : now;
|
|
1796
1906
|
const indexedAt = now;
|
|
1797
|
-
await
|
|
1907
|
+
await mkdir9(resolve(workspacePath, "normalized"), { recursive: true });
|
|
1798
1908
|
await writeNormalizedDocument({
|
|
1799
1909
|
documentId,
|
|
1800
1910
|
sourceId: source.id,
|
|
@@ -1838,7 +1948,7 @@ async function reprocessStoredDocument(document, source) {
|
|
|
1838
1948
|
if (!document.rawPath) {
|
|
1839
1949
|
return null;
|
|
1840
1950
|
}
|
|
1841
|
-
const raw = await
|
|
1951
|
+
const raw = await readFile9(document.rawPath, "utf8");
|
|
1842
1952
|
const fallbackTitle = document.title || basename(document.uri);
|
|
1843
1953
|
const extracted = await extractRawContent(raw, document.mimeType, fallbackTitle);
|
|
1844
1954
|
const contentHash = sha256(extracted.markdown);
|
|
@@ -1955,8 +2065,8 @@ async function parseRssFeedDocument(xml, source) {
|
|
|
1955
2065
|
}
|
|
1956
2066
|
|
|
1957
2067
|
// src/ingest/adapters/url-adapter.ts
|
|
1958
|
-
import { mkdir as
|
|
1959
|
-
import
|
|
2068
|
+
import { mkdir as mkdir10, readFile as readFile10, writeFile as writeFile9 } from "fs/promises";
|
|
2069
|
+
import path17 from "path";
|
|
1960
2070
|
|
|
1961
2071
|
// src/core/urls.ts
|
|
1962
2072
|
function normalizeRemoteUrl(uri) {
|
|
@@ -1999,16 +2109,16 @@ async function normalizeRemoteDocument({
|
|
|
1999
2109
|
|
|
2000
2110
|
${extracted.markdown}`;
|
|
2001
2111
|
const documentId = stableId("doc", source.id, canonicalUri);
|
|
2002
|
-
const normalizedPath =
|
|
2003
|
-
const rawPath =
|
|
2112
|
+
const normalizedPath = path17.resolve(workspacePath, "normalized", `${documentId}.md`);
|
|
2113
|
+
const rawPath = path17.resolve(workspacePath, "raw", source.id, `${sha256(canonicalUri).slice(0, 12)}.html`);
|
|
2004
2114
|
const contentHash = sha256(markdown);
|
|
2005
2115
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
2006
2116
|
const lastChangedAt = previous?.contentHash === contentHash ? previous.lastChangedAt : now;
|
|
2007
2117
|
const indexedAt = now;
|
|
2008
2118
|
const crawledAt = now;
|
|
2009
2119
|
const resolvedPublicationDate = choosePublicationDate(publicationDate, extractPublicationDateFromHtml(body), previous?.publicationDate);
|
|
2010
|
-
await
|
|
2011
|
-
await
|
|
2120
|
+
await mkdir10(path17.resolve(workspacePath, "raw", source.id), { recursive: true });
|
|
2121
|
+
await writeFile9(rawPath, body, "utf8");
|
|
2012
2122
|
await writeNormalizedDocument({
|
|
2013
2123
|
documentId,
|
|
2014
2124
|
sourceId: source.id,
|
|
@@ -2128,7 +2238,7 @@ async function reprocessRemoteDocument(document, source) {
|
|
|
2128
2238
|
if (!document.rawPath || !await fileExists(document.rawPath)) {
|
|
2129
2239
|
return null;
|
|
2130
2240
|
}
|
|
2131
|
-
const raw = await
|
|
2241
|
+
const raw = await readFile10(document.rawPath, "utf8");
|
|
2132
2242
|
const extracted = extractHtmlToMarkdown(raw);
|
|
2133
2243
|
const markdown = `# ${extracted.title}
|
|
2134
2244
|
|
|
@@ -2307,7 +2417,7 @@ async function crawlWebsite(source, defaults, progress) {
|
|
|
2307
2417
|
|
|
2308
2418
|
// src/ingest/ingest-service.ts
|
|
2309
2419
|
function documentsFile(workspacePath) {
|
|
2310
|
-
return
|
|
2420
|
+
return path18.join(workspacePath, "documents", "documents.jsonl");
|
|
2311
2421
|
}
|
|
2312
2422
|
async function loadDocuments(workspacePath) {
|
|
2313
2423
|
return readJsonl(documentsFile(workspacePath));
|
|
@@ -2850,9 +2960,9 @@ async function discoverWebsiteFeed(websiteUrl, userAgent) {
|
|
|
2850
2960
|
}
|
|
2851
2961
|
|
|
2852
2962
|
// src/query/search-service.ts
|
|
2853
|
-
import { readFile as
|
|
2963
|
+
import { readFile as readFile11 } from "fs/promises";
|
|
2854
2964
|
import { reciprocalRankFusion, searchJsonDsl } from "@tryformation/querylight-ts";
|
|
2855
|
-
import
|
|
2965
|
+
import path19 from "path";
|
|
2856
2966
|
async function loadHydratedIndex(workspacePath) {
|
|
2857
2967
|
let state;
|
|
2858
2968
|
try {
|
|
@@ -3062,7 +3172,7 @@ async function buildSnippetWithAdjacentChunks(chunk, query, {
|
|
|
3062
3172
|
if (!await fileExists(document.normalizedPath)) {
|
|
3063
3173
|
return buildSnippet(chunk.text, query);
|
|
3064
3174
|
}
|
|
3065
|
-
const raw = await
|
|
3175
|
+
const raw = await readFile11(document.normalizedPath, "utf8");
|
|
3066
3176
|
orderedChunks = buildChunksForDocument(document, raw, config);
|
|
3067
3177
|
orderedChunkCache.set(document.id, orderedChunks);
|
|
3068
3178
|
}
|
|
@@ -3397,9 +3507,9 @@ async function searchIndex({
|
|
|
3397
3507
|
const config = await loadConfig(workspacePath);
|
|
3398
3508
|
const mode = retrievalMode ?? config.retrieval.defaultMode;
|
|
3399
3509
|
const candidateLimit = Math.max(topK * 5, 50);
|
|
3400
|
-
const chunks = new Map((await readJsonl(
|
|
3401
|
-
const documents = new Map((await readJsonl(
|
|
3402
|
-
const sources = new Map((await readJsonl(
|
|
3510
|
+
const chunks = new Map((await readJsonl(path19.join(workspacePath, "chunks", "chunks.jsonl"))).map((chunk) => [chunk.id, chunk]));
|
|
3511
|
+
const documents = new Map((await readJsonl(path19.join(workspacePath, "documents", "documents.jsonl"))).map((document) => [document.id, document]));
|
|
3512
|
+
const sources = new Map((await readJsonl(path19.join(workspacePath, "sources", "sources.jsonl"))).map((source) => [source.id, source]));
|
|
3403
3513
|
const orderedChunkCache = /* @__PURE__ */ new Map();
|
|
3404
3514
|
const normalizedQuery = query.trim();
|
|
3405
3515
|
const filterIds = [...chunks.values()].filter((chunk) => filterChunk(chunk, documents.get(chunk.documentId), sources.get(chunk.sourceId), { sourceId, sourceIds, sourceName, sourceNames, sourceType, sourceTypes, uriPrefix, uriPrefixes, hasPublicationDate, tag, tags, metadata, dateRanges })).map((chunk) => chunk.id);
|
|
@@ -3572,18 +3682,18 @@ async function searchIndex({
|
|
|
3572
3682
|
|
|
3573
3683
|
// src/server/search-api.ts
|
|
3574
3684
|
import { createServer } from "http";
|
|
3575
|
-
import { readdir, stat as
|
|
3576
|
-
import
|
|
3685
|
+
import { readdir as readdir2, stat as stat5 } from "fs/promises";
|
|
3686
|
+
import path20 from "path";
|
|
3577
3687
|
async function pathIsDirectory(candidatePath) {
|
|
3578
3688
|
try {
|
|
3579
|
-
return (await
|
|
3689
|
+
return (await stat5(candidatePath)).isDirectory();
|
|
3580
3690
|
} catch {
|
|
3581
3691
|
return false;
|
|
3582
3692
|
}
|
|
3583
3693
|
}
|
|
3584
3694
|
async function discoverKnowledgeBases(workspacePath) {
|
|
3585
3695
|
try {
|
|
3586
|
-
const singleWorkspace = await
|
|
3696
|
+
const singleWorkspace = (await resolveReadableWorkspace(workspacePath)).workspacePath;
|
|
3587
3697
|
const config = await loadConfig(singleWorkspace);
|
|
3588
3698
|
const index = await loadHydratedIndex(singleWorkspace);
|
|
3589
3699
|
return {
|
|
@@ -3600,19 +3710,20 @@ async function discoverKnowledgeBases(workspacePath) {
|
|
|
3600
3710
|
throw error;
|
|
3601
3711
|
}
|
|
3602
3712
|
}
|
|
3603
|
-
const resolvedRoot =
|
|
3713
|
+
const resolvedRoot = path20.resolve(workspacePath);
|
|
3604
3714
|
if (!await pathIsDirectory(resolvedRoot)) {
|
|
3605
3715
|
throw new CliError(`workspace path does not exist: ${resolvedRoot}`, "WORKSPACE_ERROR", 3 /* WorkspaceError */);
|
|
3606
3716
|
}
|
|
3607
|
-
const entries = await
|
|
3608
|
-
const knowledgeBases = (await Promise.all(entries.filter((entry) => entry.isDirectory()).map(async (entry) => {
|
|
3609
|
-
const candidateWorkspace =
|
|
3717
|
+
const entries = await readdir2(resolvedRoot, { withFileTypes: true });
|
|
3718
|
+
const knowledgeBases = (await Promise.all(entries.filter((entry) => entry.isDirectory() || entry.isFile() && isWorkspaceArchivePath(entry.name)).map(async (entry) => {
|
|
3719
|
+
const candidateWorkspace = entry.isDirectory() ? path20.join(resolvedRoot, entry.name, ".kb") : path20.join(resolvedRoot, entry.name);
|
|
3720
|
+
const knowledgeBaseName = entry.isDirectory() ? entry.name : entry.name.replace(/\.zip$/i, "");
|
|
3610
3721
|
try {
|
|
3611
|
-
const workspace = await assertWorkspaceExists(candidateWorkspace);
|
|
3722
|
+
const workspace = entry.isDirectory() ? await assertWorkspaceExists(candidateWorkspace) : (await resolveReadableWorkspace(candidateWorkspace)).workspacePath;
|
|
3612
3723
|
const config = await loadConfig(workspace);
|
|
3613
3724
|
const index = await loadHydratedIndex(workspace);
|
|
3614
3725
|
return {
|
|
3615
|
-
name:
|
|
3726
|
+
name: knowledgeBaseName,
|
|
3616
3727
|
workspacePath: workspace,
|
|
3617
3728
|
configuredIndexName: config.index.name,
|
|
3618
3729
|
index
|
|
@@ -3626,7 +3737,7 @@ async function discoverKnowledgeBases(workspacePath) {
|
|
|
3626
3737
|
}))).filter((knowledgeBase) => knowledgeBase != null);
|
|
3627
3738
|
if (knowledgeBases.length === 0) {
|
|
3628
3739
|
throw new CliError(
|
|
3629
|
-
`no knowledge bases found at ${resolvedRoot}; use a .kb workspace or a directory of named subdirectories that each contain .kb`,
|
|
3740
|
+
`no knowledge bases found at ${resolvedRoot}; use a .kb workspace, a .zip workspace, or a directory of .zip files or named subdirectories that each contain .kb`,
|
|
3630
3741
|
"WORKSPACE_ERROR",
|
|
3631
3742
|
3 /* WorkspaceError */
|
|
3632
3743
|
);
|
|
@@ -3760,7 +3871,7 @@ async function startSearchApiServer({
|
|
|
3760
3871
|
}
|
|
3761
3872
|
|
|
3762
3873
|
// src/query/related-service.ts
|
|
3763
|
-
import
|
|
3874
|
+
import path21 from "path";
|
|
3764
3875
|
function cosineSimilarity2(left, right) {
|
|
3765
3876
|
let dot = 0;
|
|
3766
3877
|
let leftNorm = 0;
|
|
@@ -3836,7 +3947,7 @@ async function findRelatedDocuments({
|
|
|
3836
3947
|
if (!await fileExists(denseVectorPath(workspacePath))) {
|
|
3837
3948
|
throw new CliError("dense vector index is not built; run `qli models pull --dense` and `qli rebuild`", "DENSE_INDEX_MISSING", 7 /* QueryError */);
|
|
3838
3949
|
}
|
|
3839
|
-
const documents = await readJsonl(
|
|
3950
|
+
const documents = await readJsonl(path21.join(workspacePath, "documents", "documents.jsonl"));
|
|
3840
3951
|
const selected = resolveDocumentSelector(documents, document);
|
|
3841
3952
|
const densePayload = await readDensePayload(workspacePath);
|
|
3842
3953
|
const vectors = buildDocumentVectors(documents, densePayload.chunks, densePayload.metadata.dimensions);
|
|
@@ -3909,7 +4020,7 @@ async function createContext({
|
|
|
3909
4020
|
}
|
|
3910
4021
|
|
|
3911
4022
|
// src/report/diff-service.ts
|
|
3912
|
-
import
|
|
4023
|
+
import path22 from "path";
|
|
3913
4024
|
function chooseBaselineRun(runs, since) {
|
|
3914
4025
|
if (since === "last-run") {
|
|
3915
4026
|
return runs.at(-1);
|
|
@@ -3925,7 +4036,7 @@ async function diffWorkspace({
|
|
|
3925
4036
|
documentId,
|
|
3926
4037
|
since
|
|
3927
4038
|
}) {
|
|
3928
|
-
const current = await readJsonl(
|
|
4039
|
+
const current = await readJsonl(path22.join(workspacePath, "documents", "documents.jsonl"));
|
|
3929
4040
|
const baseline = chooseBaselineRun(await listRuns(workspacePath), since);
|
|
3930
4041
|
const previous = new Map((baseline?.documentsSnapshot ?? []).map((document) => [document.id, document]));
|
|
3931
4042
|
const changedDocuments = current.filter((document) => (!sourceId || document.sourceId === sourceId) && (!documentId || document.id === documentId)).filter((document) => {
|
|
@@ -4284,7 +4395,7 @@ function parseDateValue(input, optionName) {
|
|
|
4284
4395
|
return parsed.toISOString();
|
|
4285
4396
|
}
|
|
4286
4397
|
async function parseJsonArgument(input) {
|
|
4287
|
-
const raw = input.startsWith("@") ? await
|
|
4398
|
+
const raw = input.startsWith("@") ? await readFile12(path23.resolve(input.slice(1)), "utf8") : input;
|
|
4288
4399
|
try {
|
|
4289
4400
|
const parsed = JSON.parse(raw);
|
|
4290
4401
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
@@ -4338,20 +4449,24 @@ function resolveSearchTopK(optionsTopK, sourceTypes, dateRanges, defaultTopK) {
|
|
|
4338
4449
|
}
|
|
4339
4450
|
return defaultTopK;
|
|
4340
4451
|
}
|
|
4341
|
-
async function resolveWorkspace(options) {
|
|
4342
|
-
|
|
4452
|
+
async function resolveWorkspace(options, mode = {}) {
|
|
4453
|
+
const workspace = options.workspace ?? DEFAULT_WORKSPACE;
|
|
4454
|
+
if (mode.writable) {
|
|
4455
|
+
return assertWritableWorkspacePath(workspace);
|
|
4456
|
+
}
|
|
4457
|
+
return (await resolveReadableWorkspace(workspace)).workspacePath;
|
|
4343
4458
|
}
|
|
4344
4459
|
function workspaceFromArgv(argv) {
|
|
4345
4460
|
const index = argv.findIndex((arg) => arg === "--workspace");
|
|
4346
4461
|
if (index >= 0 && argv[index + 1]) {
|
|
4347
|
-
return
|
|
4462
|
+
return path23.resolve(argv[index + 1]);
|
|
4348
4463
|
}
|
|
4349
|
-
return
|
|
4464
|
+
return path23.resolve(DEFAULT_WORKSPACE);
|
|
4350
4465
|
}
|
|
4351
4466
|
async function runCli(argv, io = {}) {
|
|
4352
4467
|
const capture = { stdout: [], stderr: [], ...io };
|
|
4353
4468
|
const program = new Command();
|
|
4354
|
-
program.name("qli").description("Build and query a local Querylight workspace from files, directories, URLs, websites, and feeds.").showHelpAfterError().option("--workspace <path>", "Workspace directory. Defaults to .kb in the current directory.", DEFAULT_WORKSPACE).option("--config <path>", "Optional config file override. Useful for testing alternate retrieval settings.").option("--json", "Return a stable JSON envelope for automation and agents.").option("--silent", "Suppress progress logging for long-running commands.").option("--verbose", "Print more operational detail when a command supports it.").addOption(new Option("--quiet", "Deprecated alias for --silent.").hideHelp());
|
|
4469
|
+
program.name("qli").description("Build and query a local Querylight workspace from files, directories, URLs, websites, and feeds.").showHelpAfterError().option("--workspace <path>", "Workspace directory, or a packaged .zip workspace for read-only commands. Defaults to .kb in the current directory.", DEFAULT_WORKSPACE).option("--config <path>", "Optional config file override. Useful for testing alternate retrieval settings.").option("--json", "Return a stable JSON envelope for automation and agents.").option("--silent", "Suppress progress logging for long-running commands.").option("--verbose", "Print more operational detail when a command supports it.").addOption(new Option("--quiet", "Deprecated alias for --silent.").hideHelp());
|
|
4355
4470
|
program.addHelpText("after", `
|
|
4356
4471
|
Workflow:
|
|
4357
4472
|
1. Initialize a workspace with qli init
|
|
@@ -4363,12 +4478,15 @@ Examples:
|
|
|
4363
4478
|
qli init
|
|
4364
4479
|
qli source add directory ./docs --name "Product Docs" --tag docs
|
|
4365
4480
|
qli ingest
|
|
4481
|
+
qli package ./docs-kb.zip
|
|
4366
4482
|
qli rebuild --silent
|
|
4367
4483
|
qli search "api authentication" --top-k 8
|
|
4484
|
+
qli search --workspace ./docs-kb.zip "api authentication"
|
|
4368
4485
|
qli context "How do API keys work?" --top-k 8 --max-chars 8000
|
|
4369
4486
|
|
|
4370
4487
|
Long-running commands print progress to stderr by default. Use --silent to suppress it.
|
|
4371
4488
|
Use --json when another tool needs stable structured output.
|
|
4489
|
+
Read-only commands can use --workspace with a packaged .zip workspace.
|
|
4372
4490
|
|
|
4373
4491
|
Use qli <command> --help for command-specific options and examples.`);
|
|
4374
4492
|
program.command("init").description("Create a new workspace with the default directory layout and config, then pull missing retrieval models.").option("--force").addHelpText("after", `
|
|
@@ -4382,7 +4500,7 @@ Notes:
|
|
|
4382
4500
|
init pulls missing model assets for enabled retrieval modes.
|
|
4383
4501
|
Sparse model downloads require uv. If uv is not available, init skips the sparse pull.`).action(async function command(options) {
|
|
4384
4502
|
const global = this.optsWithGlobals();
|
|
4385
|
-
const workspace = await resolveWorkspace({ workspace: this.optsWithGlobals().workspace });
|
|
4503
|
+
const workspace = await resolveWorkspace({ workspace: this.optsWithGlobals().workspace }, { writable: true });
|
|
4386
4504
|
const result = await ensureWorkspace({ workspacePath: workspace, force: Boolean(options.force) });
|
|
4387
4505
|
const config = await loadConfig(workspace, global.config);
|
|
4388
4506
|
const status = await getModelStatus(workspace, config);
|
|
@@ -4423,7 +4541,7 @@ Notes:
|
|
|
4423
4541
|
}
|
|
4424
4542
|
validateSourceAddOptions(type, options);
|
|
4425
4543
|
const global = this.optsWithGlobals();
|
|
4426
|
-
const workspace = await resolveWorkspace({ workspace: global.workspace });
|
|
4544
|
+
const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
|
|
4427
4545
|
const config = await loadConfig(workspace, global.config);
|
|
4428
4546
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
4429
4547
|
const initialCrawl = createSourceCrawlConfig(type, options, { retentionDays: config.crawler.retentionDays });
|
|
@@ -4440,7 +4558,7 @@ Notes:
|
|
|
4440
4558
|
}
|
|
4441
4559
|
const stored = await addSource(workspace, {
|
|
4442
4560
|
type,
|
|
4443
|
-
uri: ["file", "directory"].includes(type) ?
|
|
4561
|
+
uri: ["file", "directory"].includes(type) ? path23.resolve(uri) : uri,
|
|
4444
4562
|
name: options.name,
|
|
4445
4563
|
enabled: true,
|
|
4446
4564
|
tags: options.tag ?? [],
|
|
@@ -4501,7 +4619,7 @@ Notes:
|
|
|
4501
4619
|
qli only exposes settings that the current source type uses at runtime.
|
|
4502
4620
|
URI, source type, and source id do not change here.`).action(async function command(sourceId, options) {
|
|
4503
4621
|
const global = this.optsWithGlobals();
|
|
4504
|
-
const workspace = await resolveWorkspace({ workspace: global.workspace });
|
|
4622
|
+
const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
|
|
4505
4623
|
const sources = await listSources(workspace);
|
|
4506
4624
|
const current = sources.find((source2) => source2.id === sourceId);
|
|
4507
4625
|
if (!current) {
|
|
@@ -4529,7 +4647,7 @@ Examples:
|
|
|
4529
4647
|
qli source remove src_123
|
|
4530
4648
|
qli source list --json`).action(async function command(sourceId) {
|
|
4531
4649
|
const global = this.optsWithGlobals();
|
|
4532
|
-
const workspace = await resolveWorkspace({ workspace: global.workspace });
|
|
4650
|
+
const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
|
|
4533
4651
|
await removeSource(workspace, sourceId);
|
|
4534
4652
|
emit(global.json, capture, response("source remove", workspace, { sourceId }), `Removed source ${sourceId}`);
|
|
4535
4653
|
});
|
|
@@ -4538,7 +4656,7 @@ Examples:
|
|
|
4538
4656
|
qli source disable src_123
|
|
4539
4657
|
qli source enable src_123`).action(async function command(sourceId) {
|
|
4540
4658
|
const global = this.optsWithGlobals();
|
|
4541
|
-
const workspace = await resolveWorkspace({ workspace: global.workspace });
|
|
4659
|
+
const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
|
|
4542
4660
|
const updated = await updateSource(workspace, sourceId, { enabled: false, updatedAt: (/* @__PURE__ */ new Date()).toISOString() });
|
|
4543
4661
|
emit(global.json, capture, response("source disable", workspace, updated), `Disabled source ${sourceId}`);
|
|
4544
4662
|
});
|
|
@@ -4547,7 +4665,7 @@ Examples:
|
|
|
4547
4665
|
qli source enable src_123
|
|
4548
4666
|
qli source list`).action(async function command(sourceId) {
|
|
4549
4667
|
const global = this.optsWithGlobals();
|
|
4550
|
-
const workspace = await resolveWorkspace({ workspace: global.workspace });
|
|
4668
|
+
const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
|
|
4551
4669
|
const updated = await updateSource(workspace, sourceId, { enabled: true, updatedAt: (/* @__PURE__ */ new Date()).toISOString() });
|
|
4552
4670
|
emit(global.json, capture, response("source enable", workspace, updated), `Enabled source ${sourceId}`);
|
|
4553
4671
|
});
|
|
@@ -4559,7 +4677,7 @@ Examples:
|
|
|
4559
4677
|
qli ingest --dense --sparse
|
|
4560
4678
|
qli ingest --silent`).action(async function command(options) {
|
|
4561
4679
|
const global = this.optsWithGlobals();
|
|
4562
|
-
const workspace = await resolveWorkspace({ workspace: global.workspace });
|
|
4680
|
+
const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
|
|
4563
4681
|
const result = await runIngestCommand({
|
|
4564
4682
|
workspace,
|
|
4565
4683
|
sourceId: options.source,
|
|
@@ -4577,7 +4695,7 @@ Examples:
|
|
|
4577
4695
|
qli chunk --document doc_123
|
|
4578
4696
|
qli chunk --silent`).action(async function command(options) {
|
|
4579
4697
|
const global = this.optsWithGlobals();
|
|
4580
|
-
const workspace = await resolveWorkspace({ workspace: global.workspace });
|
|
4698
|
+
const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
|
|
4581
4699
|
const result = await chunkDocuments({
|
|
4582
4700
|
workspacePath: workspace,
|
|
4583
4701
|
sourceId: options.source,
|
|
@@ -4593,7 +4711,7 @@ Examples:
|
|
|
4593
4711
|
qli reprocess --document doc_123
|
|
4594
4712
|
qli reprocess --silent`).action(async function command(options) {
|
|
4595
4713
|
const global = this.optsWithGlobals();
|
|
4596
|
-
const workspace = await resolveWorkspace({ workspace: global.workspace });
|
|
4714
|
+
const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
|
|
4597
4715
|
const result = await reprocessDocuments({
|
|
4598
4716
|
workspacePath: workspace,
|
|
4599
4717
|
sourceId: options.source,
|
|
@@ -4611,7 +4729,7 @@ Examples:
|
|
|
4611
4729
|
qli index build --dense --sparse
|
|
4612
4730
|
qli index build --silent`).action(async function command(options) {
|
|
4613
4731
|
const global = this.optsWithGlobals();
|
|
4614
|
-
const workspace = await resolveWorkspace({ workspace: global.workspace });
|
|
4732
|
+
const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
|
|
4615
4733
|
const result = await buildIndex({
|
|
4616
4734
|
workspacePath: workspace,
|
|
4617
4735
|
denseOverride: options.dense ? true : void 0,
|
|
@@ -4628,7 +4746,7 @@ Examples:
|
|
|
4628
4746
|
qli rebuild --dense --sparse
|
|
4629
4747
|
qli rebuild --silent`).action(async function command(options) {
|
|
4630
4748
|
const global = this.optsWithGlobals();
|
|
4631
|
-
const workspace = await resolveWorkspace({ workspace: global.workspace });
|
|
4749
|
+
const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
|
|
4632
4750
|
const progress = createProgressHandler(capture, global);
|
|
4633
4751
|
progress?.("info", "Rebuild step 1/3: ingest");
|
|
4634
4752
|
const ingest = await ingestSources({
|
|
@@ -4651,6 +4769,25 @@ Examples:
|
|
|
4651
4769
|
progress?.("info", "Rebuild complete");
|
|
4652
4770
|
emit(global.json, capture, response("rebuild", workspace, data), `Processed ${ingest.processedSources} sources, wrote ${chunk.chunksWritten} chunks`);
|
|
4653
4771
|
});
|
|
4772
|
+
program.command("package").description("Write the current workspace to a zip archive that read-only commands can use directly.").argument("<archive>", "Output .zip file.").option("--force", "Replace the output archive if it already exists.").addHelpText("after", `
|
|
4773
|
+
Examples:
|
|
4774
|
+
qli package ./docs-kb.zip
|
|
4775
|
+
qli package ./deploy/docs-kb.zip --workspace ./docs/.kb
|
|
4776
|
+
qli package ./docs-kb.zip --force --json
|
|
4777
|
+
|
|
4778
|
+
Notes:
|
|
4779
|
+
The archive stores the workspace contents at the zip root.
|
|
4780
|
+
Use the zip with read-only commands such as search, search-json, related, context, status, doctor, and serve.
|
|
4781
|
+
Rebuild the directory workspace and package it again when source content changes.`).action(async function command(archive, options) {
|
|
4782
|
+
const global = this.optsWithGlobals();
|
|
4783
|
+
const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
|
|
4784
|
+
const result = await packageWorkspaceArchive({
|
|
4785
|
+
workspacePath: workspace,
|
|
4786
|
+
outputPath: archive,
|
|
4787
|
+
force: Boolean(options.force)
|
|
4788
|
+
});
|
|
4789
|
+
emit(global.json, capture, response("package", workspace, result), `Packaged ${result.fileCount} files to ${result.archivePath}`);
|
|
4790
|
+
});
|
|
4654
4791
|
program.command("search").description("Search the built index and return ranked matching documents or chunks. Use search-json for raw JSON DSL queries.").argument("[query]", "Text query. Omit it to list the latest matching documents.").option("--top-k <n>", "Maximum number of results to return. Defaults to search.defaultTopK in config.yaml. RSS searches with a time window use 500 when omitted.").option("--source <sourceIds>", "Restrict results to one or more source ids. Use comma-separated values.").option("--source-name <names>", "Restrict results to one or more source names. Use comma-separated values.").option("--source-type <types>", `Restrict results to one or more source types. Use comma-separated values: ${SOURCE_TYPE_LIST.join(", ")}`).option("--uri-prefix <prefixes>", "Restrict results to one or more URI prefixes. Use comma-separated values.").option("--tag <tags>", "Restrict results to one or more source tags. Use comma-separated values.").option("--metadata <key=value...>", "Restrict results to sources with matching metadata.").option("--since <date>", "Shortcut for --publication-date-from.").option("--until <date>", "Shortcut for --publication-date-to.").option("--changed-since <date>", "Only include documents changed on or after this date.").option("--has-publication-date", "Only include documents with a publication date.").option("--publication-date-from <date>", "Only include documents published on or after this date.").option("--publication-date-to <date>", "Only include documents published on or before this date.").option("--first-seen-at-from <date>", "Only include documents first seen on or after this date.").option("--first-seen-at-to <date>", "Only include documents first seen on or before this date.").option("--last-seen-at-from <date>", "Only include documents last seen on or after this date.").option("--last-seen-at-to <date>", "Only include documents last seen on or before this date.").option("--last-changed-at-from <date>", "Only include documents changed on or after this date.").option("--last-changed-at-to <date>", "Only include documents changed on or before this date.").option("--crawled-at-from <date>", "Only include documents crawled on or after this date.").option("--crawled-at-to <date>", "Only include documents crawled on or before this date.").option("--retrieval <mode>", `Retrieval mode: ${RETRIEVAL_MODE_LIST.join(", ")}`).option("--show-chunks", "Return chunk-level matches when available.").addHelpText("after", `
|
|
4655
4792
|
Examples:
|
|
4656
4793
|
qli search "pricing api limits"
|
|
@@ -4659,6 +4796,7 @@ Examples:
|
|
|
4659
4796
|
qli search --source-name "Release Feed,Company Blog" --uri-prefix https://example.com/news,https://example.com/blog
|
|
4660
4797
|
qli search "billing" --metadata team=support
|
|
4661
4798
|
qli search "embedding model" --retrieval hybrid --show-chunks
|
|
4799
|
+
qli search --workspace ./docs-kb.zip "authentication"
|
|
4662
4800
|
qli search --source-type rss,page --top-k 25 --json
|
|
4663
4801
|
|
|
4664
4802
|
Notes:
|
|
@@ -4713,6 +4851,7 @@ Notes:
|
|
|
4713
4851
|
Examples:
|
|
4714
4852
|
qli serve
|
|
4715
4853
|
qli serve --workspace ./docs/.kb --port 4000
|
|
4854
|
+
qli serve --workspace ./docs-kb.zip --port 4000
|
|
4716
4855
|
qli serve --workspace ./kbs --host 0.0.0.0 --port 4000
|
|
4717
4856
|
|
|
4718
4857
|
Routes:
|
|
@@ -4723,10 +4862,10 @@ Routes:
|
|
|
4723
4862
|
Notes:
|
|
4724
4863
|
The request body must be a Querylight JSON DSL object.
|
|
4725
4864
|
serve only exposes lexical _search for now.
|
|
4726
|
-
When --workspace points to a directory of knowledge bases,
|
|
4865
|
+
When --workspace points to a directory of knowledge bases, qli serves child .zip files and child directories that contain .kb.
|
|
4727
4866
|
Index files are loaded once at startup and reused across requests.`).action(async function command(options) {
|
|
4728
4867
|
const global = this.optsWithGlobals();
|
|
4729
|
-
const workspace =
|
|
4868
|
+
const workspace = path23.resolve(global.workspace ?? DEFAULT_WORKSPACE);
|
|
4730
4869
|
const port = Number(options.port);
|
|
4731
4870
|
if (!Number.isInteger(port) || port < 0 || port > 65535) {
|
|
4732
4871
|
throw new CliError(`invalid port: ${options.port}`, "INVALID_ARGUMENT", 2 /* InvalidArguments */);
|
|
@@ -4803,7 +4942,7 @@ Use --json when another tool needs structured access to the raw passages and met
|
|
|
4803
4942
|
});
|
|
4804
4943
|
const models = program.command("models");
|
|
4805
4944
|
models.description("Inspect and download retrieval model assets.");
|
|
4806
|
-
models.command("pull").description("Download dense
|
|
4945
|
+
models.command("pull").description("Download dense or sparse retrieval assets required by vector search.").option("--dense", "Only pull dense retrieval assets.").option("--sparse", "Only pull sparse retrieval assets.").addHelpText("after", `
|
|
4807
4946
|
Examples:
|
|
4808
4947
|
qli models pull
|
|
4809
4948
|
qli models pull --dense
|
|
@@ -4813,7 +4952,7 @@ Examples:
|
|
|
4813
4952
|
Pulled model assets are shared under ~/.qli by default.
|
|
4814
4953
|
If you plan to use related, dense search, or hybrid retrieval, pull the models and rebuild the index first.`).action(async function command(options) {
|
|
4815
4954
|
const global = this.optsWithGlobals();
|
|
4816
|
-
const workspace = await resolveWorkspace({ workspace: global.workspace });
|
|
4955
|
+
const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
|
|
4817
4956
|
const config = await loadConfig(workspace, global.config);
|
|
4818
4957
|
const status = await getModelStatus(workspace, config);
|
|
4819
4958
|
const { pullDense, pullSparse } = resolveModelPullPlan({
|
|
@@ -4889,7 +5028,7 @@ Examples:
|
|
|
4889
5028
|
try {
|
|
4890
5029
|
const meta = await readLatestIndexMetadata(workspace);
|
|
4891
5030
|
latestIndex = meta.createdAt;
|
|
4892
|
-
indexSize = (await
|
|
5031
|
+
indexSize = (await stat6(await resolveLatestIndexArtifactPath(workspace))).size;
|
|
4893
5032
|
} catch {
|
|
4894
5033
|
latestIndex = void 0;
|
|
4895
5034
|
}
|