@tryformation/querylight-cli 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/main.js CHANGED
@@ -2,8 +2,8 @@
2
2
 
3
3
  // src/cli/run-cli.ts
4
4
  import { Command, Option } from "commander";
5
- import { readFile as readFile11, stat as stat5 } from "fs/promises";
6
- import path22 from "path";
5
+ import { readFile as readFile12, stat as stat6 } from "fs/promises";
6
+ import path23 from "path";
7
7
 
8
8
  // src/chunk/chunker.ts
9
9
  import { readFile as readFile3 } from "fs/promises";
@@ -384,27 +384,138 @@ async function assertWorkspaceExists(workspacePath) {
384
384
  }
385
385
  }
386
386
 
387
+ // src/core/archive.ts
388
+ import { mkdir as mkdir3, readdir, readFile as readFile4, rm, stat as stat2, writeFile as writeFile3 } from "fs/promises";
389
+ import os from "os";
390
+ import path6 from "path";
391
+ import { unzipSync, zipSync } from "fflate";
392
+ function isWorkspaceArchivePath(workspacePath) {
393
+ return workspacePath.toLowerCase().endsWith(".zip");
394
+ }
395
+ async function collectFiles(root, outputPath) {
396
+ const files = {};
397
+ const resolvedOutput = path6.resolve(outputPath);
398
+ async function visit(dir) {
399
+ const entries = await readdir(dir, { withFileTypes: true });
400
+ for (const entry of entries) {
401
+ const absolute = path6.join(dir, entry.name);
402
+ if (path6.resolve(absolute) === resolvedOutput) {
403
+ continue;
404
+ }
405
+ if (entry.isDirectory()) {
406
+ await visit(absolute);
407
+ continue;
408
+ }
409
+ if (!entry.isFile()) {
410
+ continue;
411
+ }
412
+ const relative = path6.relative(root, absolute).split(path6.sep).join("/");
413
+ files[relative] = new Uint8Array(await readFile4(absolute));
414
+ }
415
+ }
416
+ await visit(root);
417
+ return files;
418
+ }
419
+ async function packageWorkspaceArchive({
420
+ workspacePath,
421
+ outputPath,
422
+ force = false
423
+ }) {
424
+ const workspace = await assertWorkspaceExists(workspacePath);
425
+ const archivePath = path6.resolve(outputPath);
426
+ try {
427
+ await stat2(archivePath);
428
+ if (!force) {
429
+ throw new CliError(`archive already exists: ${archivePath}`, "INVALID_ARGUMENT", 2 /* InvalidArguments */);
430
+ }
431
+ } catch (error) {
432
+ if (error instanceof CliError) {
433
+ throw error;
434
+ }
435
+ if (error.code !== "ENOENT") {
436
+ throw error;
437
+ }
438
+ }
439
+ const files = await collectFiles(workspace, archivePath);
440
+ const archive = zipSync(files, { level: 6 });
441
+ await mkdir3(path6.dirname(archivePath), { recursive: true });
442
+ await writeFile3(archivePath, archive);
443
+ const archiveStat = await stat2(archivePath);
444
+ return {
445
+ workspacePath: workspace,
446
+ archivePath,
447
+ fileCount: Object.keys(files).length,
448
+ sizeBytes: archiveStat.size
449
+ };
450
+ }
451
+ function assertSafeArchiveEntry(name) {
452
+ const normalized = path6.posix.normalize(name);
453
+ if (name.startsWith("/") || normalized === "." || normalized.startsWith("../") || normalized.includes("/../")) {
454
+ throw new CliError(`unsafe archive entry: ${name}`, "WORKSPACE_ERROR", 3 /* WorkspaceError */);
455
+ }
456
+ }
457
+ async function archiveCachePath(archivePath) {
458
+ const info = await stat2(archivePath);
459
+ const key = sha256(`${path6.resolve(archivePath)}:${info.size}:${info.mtimeMs}`).slice(0, 24);
460
+ return path6.join(os.tmpdir(), "qli-workspace-archives", key);
461
+ }
462
+ async function resolveReadableWorkspace(workspacePath) {
463
+ const resolved = path6.resolve(workspacePath);
464
+ if (!isWorkspaceArchivePath(resolved)) {
465
+ return { workspacePath: await assertWorkspaceExists(resolved) };
466
+ }
467
+ const archive = await readFile4(resolved);
468
+ const extractRoot = await archiveCachePath(resolved);
469
+ const workspaceRoot = path6.join(extractRoot, "workspace");
470
+ try {
471
+ await assertWorkspaceExists(workspaceRoot);
472
+ return { workspacePath: workspaceRoot, archivePath: resolved };
473
+ } catch {
474
+ }
475
+ await rm(extractRoot, { recursive: true, force: true });
476
+ await mkdir3(workspaceRoot, { recursive: true });
477
+ const entries = unzipSync(new Uint8Array(archive));
478
+ await Promise.all(Object.entries(entries).map(async ([entryName, data]) => {
479
+ assertSafeArchiveEntry(entryName);
480
+ const target = path6.join(workspaceRoot, ...entryName.split("/"));
481
+ if (entryName.endsWith("/")) {
482
+ await mkdir3(target, { recursive: true });
483
+ return;
484
+ }
485
+ await mkdir3(path6.dirname(target), { recursive: true });
486
+ await writeFile3(target, Buffer.from(data));
487
+ }));
488
+ return { workspacePath: await assertWorkspaceExists(workspaceRoot), archivePath: resolved };
489
+ }
490
+ async function assertWritableWorkspacePath(workspacePath) {
491
+ const resolved = path6.resolve(workspacePath);
492
+ if (isWorkspaceArchivePath(resolved)) {
493
+ throw new CliError("zip workspaces are read-only; package a rebuilt directory workspace instead", "WORKSPACE_ERROR", 3 /* WorkspaceError */);
494
+ }
495
+ return resolved;
496
+ }
497
+
387
498
  // src/index/querylight-indexer.ts
388
499
  import { Analyzer, DateFieldIndex, DocumentIndex, KeywordTokenizer, LowerCaseTextFilter, RankingAlgorithm, StoredSourceIndex, TextFieldIndex } from "@tryformation/querylight-ts";
389
- import path11 from "path";
500
+ import path12 from "path";
390
501
 
391
502
  // src/vector/dense.ts
392
503
  import { VectorFieldIndex, cosineSimilarity, createSeededRandom } from "@tryformation/querylight-ts";
393
- import { mkdir as mkdir4 } from "fs/promises";
394
- import path8 from "path";
504
+ import { mkdir as mkdir5 } from "fs/promises";
505
+ import path9 from "path";
395
506
 
396
507
  // src/vector/runtime.ts
397
- import os from "os";
398
- import path6 from "path";
508
+ import os2 from "os";
509
+ import path7 from "path";
399
510
  import { fileURLToPath } from "url";
400
511
  import { execFile, execFileSync } from "child_process";
401
- import { mkdtemp, rm, writeFile as writeFile3 } from "fs/promises";
512
+ import { mkdtemp, rm as rm2, writeFile as writeFile4 } from "fs/promises";
402
513
 
403
514
  // src/core/files.ts
404
- import { stat as stat2 } from "fs/promises";
515
+ import { stat as stat3 } from "fs/promises";
405
516
  async function fileExists(filePath) {
406
517
  try {
407
- await stat2(filePath);
518
+ await stat3(filePath);
408
519
  return true;
409
520
  } catch {
410
521
  return false;
@@ -414,35 +525,35 @@ async function fileExists(filePath) {
414
525
  // src/vector/runtime.ts
415
526
  var sparseExecFileSync = execFileSync;
416
527
  function resolveQliHomeDir() {
417
- return path6.resolve(process.env.QLI_HOME ?? path6.join(os.homedir(), ".qli"));
528
+ return path7.resolve(process.env.QLI_HOME ?? path7.join(os2.homedir(), ".qli"));
418
529
  }
419
530
  function resolveCacheDir(workspacePath, configuredPath) {
420
531
  if (configuredPath === "~/.qli") {
421
532
  return resolveQliHomeDir();
422
533
  }
423
534
  if (configuredPath.startsWith("~/.qli/")) {
424
- return path6.join(resolveQliHomeDir(), configuredPath.slice("~/.qli/".length));
535
+ return path7.join(resolveQliHomeDir(), configuredPath.slice("~/.qli/".length));
425
536
  }
426
537
  if (configuredPath === "~") {
427
- return os.homedir();
538
+ return os2.homedir();
428
539
  }
429
540
  if (configuredPath.startsWith("~/")) {
430
- return path6.join(os.homedir(), configuredPath.slice(2));
541
+ return path7.join(os2.homedir(), configuredPath.slice(2));
431
542
  }
432
- return path6.isAbsolute(configuredPath) ? configuredPath : path6.resolve(workspacePath, configuredPath.replace(/^\.kb\//, ""));
543
+ return path7.isAbsolute(configuredPath) ? configuredPath : path7.resolve(workspacePath, configuredPath.replace(/^\.kb\//, ""));
433
544
  }
434
545
  function packageRootFromImportMeta(importMetaUrl) {
435
- return path6.resolve(path6.dirname(fileURLToPath(importMetaUrl)), "..");
546
+ return path7.resolve(path7.dirname(fileURLToPath(importMetaUrl)), "..");
436
547
  }
437
548
  async function sparseScriptPath(importMetaUrl) {
438
549
  const base = packageRootFromImportMeta(importMetaUrl);
439
550
  const candidates = [
440
- path6.join(base, "scripts", "sparse-encode.py"),
441
- path6.join(base, "..", "scripts", "sparse-encode.py")
551
+ path7.join(base, "scripts", "sparse-encode.py"),
552
+ path7.join(base, "..", "scripts", "sparse-encode.py")
442
553
  ];
443
554
  for (const candidate of candidates) {
444
555
  if (await fileExists(candidate)) {
445
- return path6.resolve(candidate);
556
+ return path7.resolve(candidate);
446
557
  }
447
558
  }
448
559
  throw new Error(`sparse helper script not found; checked ${candidates.join(", ")}`);
@@ -468,9 +579,9 @@ async function runSparsePython({
468
579
  }) {
469
580
  const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
470
581
  const scriptPath = await sparseScriptPath(importMetaUrl);
471
- const payloadDir = await mkdtemp(path6.join(os.tmpdir(), "qli-sparse-"));
472
- const payloadPath = path6.join(payloadDir, "payload.json");
473
- await writeFile3(payloadPath, JSON.stringify(payload), "utf8");
582
+ const payloadDir = await mkdtemp(path7.join(os2.tmpdir(), "qli-sparse-"));
583
+ const payloadPath = path7.join(payloadDir, "payload.json");
584
+ await writeFile4(payloadPath, JSON.stringify(payload), "utf8");
474
585
  try {
475
586
  return sparseExecFileSync(
476
587
  "uv",
@@ -496,7 +607,7 @@ async function runSparsePython({
496
607
  }
497
608
  );
498
609
  } finally {
499
- await rm(payloadDir, { recursive: true, force: true });
610
+ await rm2(payloadDir, { recursive: true, force: true });
500
611
  }
501
612
  }
502
613
  async function getDenseTransformersRuntime(cacheDir) {
@@ -510,28 +621,28 @@ async function getDenseTransformersRuntime(cacheDir) {
510
621
  }
511
622
 
512
623
  // src/vector/store.ts
513
- import { mkdir as mkdir3, rm as rm2, writeFile as writeFile5 } from "fs/promises";
514
- import path7 from "path";
624
+ import { mkdir as mkdir4, rm as rm3, writeFile as writeFile6 } from "fs/promises";
625
+ import path8 from "path";
515
626
 
516
627
  // src/core/gzip-json.ts
517
- import { readFile as readFile4, writeFile as writeFile4 } from "fs/promises";
628
+ import { readFile as readFile5, writeFile as writeFile5 } from "fs/promises";
518
629
  import { promisify } from "util";
519
630
  import { gunzip, gzip } from "zlib";
520
631
  var gzipAsync = promisify(gzip);
521
632
  var gunzipAsync = promisify(gunzip);
522
633
  async function writeGzipJson(filePath, value) {
523
634
  const payload = JSON.stringify(value, null, 2);
524
- await writeFile4(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
635
+ await writeFile5(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
525
636
  }
526
637
  async function readJsonFromGzipOrFile(gzipPath, legacyPath) {
527
638
  if (await fileExists(gzipPath)) {
528
- const payload = await readFile4(gzipPath);
639
+ const payload = await readFile5(gzipPath);
529
640
  return JSON.parse((await gunzipAsync(payload)).toString("utf8"));
530
641
  }
531
642
  if (legacyPath && await fileExists(legacyPath)) {
532
- return JSON.parse(await readFile4(legacyPath, "utf8"));
643
+ return JSON.parse(await readFile5(legacyPath, "utf8"));
533
644
  }
534
- return JSON.parse(await readFile4(gzipPath, "utf8"));
645
+ return JSON.parse(await readFile5(gzipPath, "utf8"));
535
646
  }
536
647
  async function resolveExistingGzipOrFilePath(gzipPath, legacyPath) {
537
648
  if (await fileExists(gzipPath)) {
@@ -545,39 +656,39 @@ async function resolveExistingGzipOrFilePath(gzipPath, legacyPath) {
545
656
 
546
657
  // src/vector/store.ts
547
658
  function vectorsDir(workspacePath) {
548
- return path7.join(workspacePath, "vectors");
659
+ return path8.join(workspacePath, "vectors");
549
660
  }
550
661
  function sharedModelStateDir() {
551
- return path7.join(resolveQliHomeDir(), "models", "status");
662
+ return path8.join(resolveQliHomeDir(), "models", "status");
552
663
  }
553
664
  function denseVectorPath(workspacePath) {
554
- return path7.join(vectorsDir(workspacePath), "dense.latest.json.gz");
665
+ return path8.join(vectorsDir(workspacePath), "dense.latest.json.gz");
555
666
  }
556
667
  function denseMetaPath(workspacePath) {
557
- return path7.join(vectorsDir(workspacePath), "dense.latest.meta.json.gz");
668
+ return path8.join(vectorsDir(workspacePath), "dense.latest.meta.json.gz");
558
669
  }
559
670
  function sparseVectorPath(workspacePath) {
560
- return path7.join(vectorsDir(workspacePath), "sparse.latest.json.gz");
671
+ return path8.join(vectorsDir(workspacePath), "sparse.latest.json.gz");
561
672
  }
562
673
  function sparseMetaPath(workspacePath) {
563
- return path7.join(vectorsDir(workspacePath), "sparse.latest.meta.json.gz");
674
+ return path8.join(vectorsDir(workspacePath), "sparse.latest.meta.json.gz");
564
675
  }
565
676
  function legacyDenseVectorPath(workspacePath) {
566
- return path7.join(vectorsDir(workspacePath), "dense.latest.json");
677
+ return path8.join(vectorsDir(workspacePath), "dense.latest.json");
567
678
  }
568
679
  function legacyDenseMetaPath(workspacePath) {
569
- return path7.join(vectorsDir(workspacePath), "dense.latest.meta.json");
680
+ return path8.join(vectorsDir(workspacePath), "dense.latest.meta.json");
570
681
  }
571
682
  function legacySparseVectorPath(workspacePath) {
572
- return path7.join(vectorsDir(workspacePath), "sparse.latest.json");
683
+ return path8.join(vectorsDir(workspacePath), "sparse.latest.json");
573
684
  }
574
685
  function legacySparseMetaPath(workspacePath) {
575
- return path7.join(vectorsDir(workspacePath), "sparse.latest.meta.json");
686
+ return path8.join(vectorsDir(workspacePath), "sparse.latest.meta.json");
576
687
  }
577
688
  function pullMarkerPath(type, workspacePath, modelId, cacheDir) {
578
689
  const resolvedCacheDir = resolveCacheDir(workspacePath, cacheDir);
579
690
  const cacheKey = sha256(resolvedCacheDir).slice(0, 16);
580
- return path7.join(sharedModelStateDir(), type, `${encodeURIComponent(modelId)}.${cacheKey}.json`);
691
+ return path8.join(sharedModelStateDir(), type, `${encodeURIComponent(modelId)}.${cacheKey}.json`);
581
692
  }
582
693
  function densePullMarker(workspacePath, modelId, cacheDir) {
583
694
  return pullMarkerPath("dense", workspacePath, modelId, cacheDir);
@@ -586,24 +697,24 @@ function sparsePullMarker(workspacePath, modelId, cacheDir) {
586
697
  return pullMarkerPath("sparse", workspacePath, modelId, cacheDir);
587
698
  }
588
699
  async function writeDensePayload(workspacePath, payload) {
589
- await mkdir3(vectorsDir(workspacePath), { recursive: true });
700
+ await mkdir4(vectorsDir(workspacePath), { recursive: true });
590
701
  await writeGzipJson(denseVectorPath(workspacePath), payload);
591
702
  await writeGzipJson(denseMetaPath(workspacePath), payload.metadata);
592
703
  await Promise.all([
593
- rm2(legacyDenseVectorPath(workspacePath), { force: true }),
594
- rm2(legacyDenseMetaPath(workspacePath), { force: true })
704
+ rm3(legacyDenseVectorPath(workspacePath), { force: true }),
705
+ rm3(legacyDenseMetaPath(workspacePath), { force: true })
595
706
  ]);
596
707
  }
597
708
  async function readDensePayload(workspacePath) {
598
709
  return readJsonFromGzipOrFile(denseVectorPath(workspacePath), legacyDenseVectorPath(workspacePath));
599
710
  }
600
711
  async function writeSparsePayload(workspacePath, payload) {
601
- await mkdir3(vectorsDir(workspacePath), { recursive: true });
712
+ await mkdir4(vectorsDir(workspacePath), { recursive: true });
602
713
  await writeGzipJson(sparseVectorPath(workspacePath), payload);
603
714
  await writeGzipJson(sparseMetaPath(workspacePath), payload.metadata);
604
715
  await Promise.all([
605
- rm2(legacySparseVectorPath(workspacePath), { force: true }),
606
- rm2(legacySparseMetaPath(workspacePath), { force: true })
716
+ rm3(legacySparseVectorPath(workspacePath), { force: true }),
717
+ rm3(legacySparseMetaPath(workspacePath), { force: true })
607
718
  ]);
608
719
  }
609
720
  async function readSparsePayload(workspacePath) {
@@ -611,13 +722,13 @@ async function readSparsePayload(workspacePath) {
611
722
  }
612
723
  async function writeDensePullMarker(workspacePath, model, value) {
613
724
  const markerPath = densePullMarker(workspacePath, model.modelId, model.cacheDir);
614
- await mkdir3(path7.dirname(markerPath), { recursive: true });
615
- await writeFile5(markerPath, JSON.stringify(value, null, 2), "utf8");
725
+ await mkdir4(path8.dirname(markerPath), { recursive: true });
726
+ await writeFile6(markerPath, JSON.stringify(value, null, 2), "utf8");
616
727
  }
617
728
  async function writeSparsePullMarker(workspacePath, model, value) {
618
729
  const markerPath = sparsePullMarker(workspacePath, model.modelId, model.cacheDir);
619
- await mkdir3(path7.dirname(markerPath), { recursive: true });
620
- await writeFile5(markerPath, JSON.stringify(value, null, 2), "utf8");
730
+ await mkdir4(path8.dirname(markerPath), { recursive: true });
731
+ await writeFile6(markerPath, JSON.stringify(value, null, 2), "utf8");
621
732
  }
622
733
  async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
623
734
  const denseCacheDir = resolveCacheDir(workspacePath, dense.cacheDir);
@@ -720,7 +831,7 @@ function exactDenseQuery(payload, vector, topK) {
720
831
  }
721
832
  async function pullDenseModel(workspacePath, config) {
722
833
  const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
723
- await mkdir4(cacheDir, { recursive: true });
834
+ await mkdir5(cacheDir, { recursive: true });
724
835
  const embedder = await createEmbedder(cacheDir, config.modelId);
725
836
  try {
726
837
  await embedder.embed("warm dense model cache");
@@ -733,9 +844,9 @@ async function buildDenseVectors({
733
844
  config,
734
845
  progress
735
846
  }) {
736
- const chunks = await readJsonl(path8.join(workspacePath, "chunks", "chunks.jsonl"));
847
+ const chunks = await readJsonl(path9.join(workspacePath, "chunks", "chunks.jsonl"));
737
848
  const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
738
- await mkdir4(cacheDir, { recursive: true });
849
+ await mkdir5(cacheDir, { recursive: true });
739
850
  const embedder = await createEmbedder(cacheDir, config.modelId);
740
851
  try {
741
852
  const records = [];
@@ -819,8 +930,8 @@ async function denseQuery({
819
930
 
820
931
  // src/vector/sparse.ts
821
932
  import { SparseVectorFieldIndex } from "@tryformation/querylight-ts";
822
- import { mkdir as mkdir5 } from "fs/promises";
823
- import path9 from "path";
933
+ import { mkdir as mkdir6 } from "fs/promises";
934
+ import path10 from "path";
824
935
  var sparseQueryEncoderFactory = null;
825
936
  var sparseDocumentBuilderFactory = null;
826
937
  function buildSparseQueryVector(tokenIds, tokenWeights) {
@@ -865,7 +976,6 @@ async function createSparseQueryEncoder(cacheDir, modelId, queryTokenWeights) {
865
976
  return async (text) => {
866
977
  const features = await tokenizer([text], {
867
978
  truncation: true,
868
- return_attention_mask: false,
869
979
  return_token_type_ids: false
870
980
  });
871
981
  return buildSparseQueryVector(normalizeTokenIds(features.input_ids), queryTokenWeights);
@@ -874,7 +984,7 @@ async function createSparseQueryEncoder(cacheDir, modelId, queryTokenWeights) {
874
984
  async function pullSparseModel(workspacePath, config) {
875
985
  await ensureUvAvailable();
876
986
  const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
877
- await mkdir5(cacheDir, { recursive: true });
987
+ await mkdir6(cacheDir, { recursive: true });
878
988
  await runSparsePython({
879
989
  workspacePath,
880
990
  config,
@@ -925,7 +1035,7 @@ async function buildSparseVectors({
925
1035
  config,
926
1036
  progress
927
1037
  }) {
928
- const chunks = await readJsonl(path9.join(workspacePath, "chunks", "chunks.jsonl"));
1038
+ const chunks = await readJsonl(path10.join(workspacePath, "chunks", "chunks.jsonl"));
929
1039
  reportProgress(progress, `Encoding ${chunks.length} chunk${chunks.length === 1 ? "" : "s"} for sparse retrieval`);
930
1040
  const built = await buildSparseDocuments(workspacePath, config, chunks);
931
1041
  reportProgress(progress, "Building sparse vector index");
@@ -1058,31 +1168,31 @@ async function getModelStatus(workspacePath, config) {
1058
1168
  }
1059
1169
 
1060
1170
  // src/index/index-store.ts
1061
- import { mkdir as mkdir6, rm as rm3 } from "fs/promises";
1062
- import path10 from "path";
1171
+ import { mkdir as mkdir7, rm as rm4 } from "fs/promises";
1172
+ import path11 from "path";
1063
1173
  function versionedIndexPath(workspacePath, stamp) {
1064
- return path10.join(workspacePath, "indexes", `${stamp}.json.gz`);
1174
+ return path11.join(workspacePath, "indexes", `${stamp}.json.gz`);
1065
1175
  }
1066
1176
  function versionedLegacyIndexPath(workspacePath, stamp) {
1067
- return path10.join(workspacePath, "indexes", `${stamp}.json`);
1177
+ return path11.join(workspacePath, "indexes", `${stamp}.json`);
1068
1178
  }
1069
1179
  function versionedMetaPath(workspacePath, stamp) {
1070
- return path10.join(workspacePath, "indexes", `${stamp}.meta.json.gz`);
1180
+ return path11.join(workspacePath, "indexes", `${stamp}.meta.json.gz`);
1071
1181
  }
1072
1182
  function versionedLegacyMetaPath(workspacePath, stamp) {
1073
- return path10.join(workspacePath, "indexes", `${stamp}.meta.json`);
1183
+ return path11.join(workspacePath, "indexes", `${stamp}.meta.json`);
1074
1184
  }
1075
1185
  function latestIndexPath(workspacePath) {
1076
- return path10.join(workspacePath, "indexes", "latest.json.gz");
1186
+ return path11.join(workspacePath, "indexes", "latest.json.gz");
1077
1187
  }
1078
1188
  function legacyLatestIndexPath(workspacePath) {
1079
- return path10.join(workspacePath, "indexes", "latest.json");
1189
+ return path11.join(workspacePath, "indexes", "latest.json");
1080
1190
  }
1081
1191
  function latestMetaPath(workspacePath) {
1082
- return path10.join(workspacePath, "indexes", "latest.meta.json.gz");
1192
+ return path11.join(workspacePath, "indexes", "latest.meta.json.gz");
1083
1193
  }
1084
1194
  function legacyLatestMetaPath(workspacePath) {
1085
- return path10.join(workspacePath, "indexes", "latest.meta.json");
1195
+ return path11.join(workspacePath, "indexes", "latest.meta.json");
1086
1196
  }
1087
1197
  async function writeIndexArtifacts({
1088
1198
  workspacePath,
@@ -1094,16 +1204,16 @@ async function writeIndexArtifacts({
1094
1204
  const metaPath = versionedMetaPath(workspacePath, stamp);
1095
1205
  const latestIndexArtifactPath = latestIndexPath(workspacePath);
1096
1206
  const latestMetadataArtifactPath = latestMetaPath(workspacePath);
1097
- await mkdir6(path10.join(workspacePath, "indexes"), { recursive: true });
1207
+ await mkdir7(path11.join(workspacePath, "indexes"), { recursive: true });
1098
1208
  await writeGzipJson(indexPath, indexState);
1099
1209
  await writeGzipJson(metaPath, metadata);
1100
1210
  await writeGzipJson(latestIndexArtifactPath, indexState);
1101
1211
  await writeGzipJson(latestMetadataArtifactPath, metadata);
1102
1212
  await Promise.all([
1103
- rm3(legacyLatestIndexPath(workspacePath), { force: true }),
1104
- rm3(legacyLatestMetaPath(workspacePath), { force: true }),
1105
- rm3(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
1106
- rm3(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
1213
+ rm4(legacyLatestIndexPath(workspacePath), { force: true }),
1214
+ rm4(legacyLatestMetaPath(workspacePath), { force: true }),
1215
+ rm4(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
1216
+ rm4(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
1107
1217
  ]);
1108
1218
  return { indexPath: latestIndexArtifactPath, metadataPath: latestMetadataArtifactPath };
1109
1219
  }
@@ -1168,9 +1278,9 @@ async function buildIndex({
1168
1278
  }) {
1169
1279
  const config = await loadConfig(workspacePath);
1170
1280
  reportProgress(progress, "Loading documents, chunks, and sources");
1171
- const chunks = await readJsonl(path11.join(workspacePath, "chunks", "chunks.jsonl"));
1172
- const documents = await readJsonl(path11.join(workspacePath, "documents", "documents.jsonl"));
1173
- const sources = await readJsonl(path11.join(workspacePath, "sources", "sources.jsonl"));
1281
+ const chunks = await readJsonl(path12.join(workspacePath, "chunks", "chunks.jsonl"));
1282
+ const documents = await readJsonl(path12.join(workspacePath, "documents", "documents.jsonl"));
1283
+ const sources = await readJsonl(path12.join(workspacePath, "sources", "sources.jsonl"));
1174
1284
  const metadataFields = [...new Set(chunks.flatMap((chunk) => Object.keys(chunk.metadata).map((key) => `metadata.${key}`)))];
1175
1285
  const index = new DocumentIndex(createIndexMapping(metadataFields));
1176
1286
  const documentsById = new Map(documents.map((document) => [document.id, document]));
@@ -1249,7 +1359,7 @@ async function buildIndex({
1249
1359
  }
1250
1360
 
1251
1361
  // src/ingest/ingest-service.ts
1252
- import path17 from "path";
1362
+ import path18 from "path";
1253
1363
 
1254
1364
  // src/core/concurrency.ts
1255
1365
  async function mapWithConcurrency(items, limit, worker) {
@@ -1273,17 +1383,17 @@ async function mapWithConcurrency(items, limit, worker) {
1273
1383
  }
1274
1384
 
1275
1385
  // src/core/runs.ts
1276
- import path12 from "path";
1386
+ import path13 from "path";
1277
1387
  async function writeRun(workspacePath, run) {
1278
- await writeJsonl(path12.join(workspacePath, "runs", `${run.id}.json`), [run]);
1388
+ await writeJsonl(path13.join(workspacePath, "runs", `${run.id}.json`), [run]);
1279
1389
  }
1280
1390
  async function listRuns(workspacePath) {
1281
1391
  const fs = await import("fs/promises");
1282
- const dir = path12.join(workspacePath, "runs");
1392
+ const dir = path13.join(workspacePath, "runs");
1283
1393
  try {
1284
1394
  const entries = await fs.readdir(dir);
1285
1395
  const records = await Promise.all(entries.filter((name) => name.endsWith(".json")).map(async (name) => {
1286
- const runs = await readJsonl(path12.join(dir, name));
1396
+ const runs = await readJsonl(path13.join(dir, name));
1287
1397
  return runs[0];
1288
1398
  }));
1289
1399
  return records.filter((record) => record != null).sort((a, b) => a.createdAt.localeCompare(b.createdAt));
@@ -1293,8 +1403,8 @@ async function listRuns(workspacePath) {
1293
1403
  }
1294
1404
 
1295
1405
  // src/sources/source-store.ts
1296
- import path13 from "path";
1297
- var sourcesFile = (workspacePath) => path13.join(workspacePath, "sources", "sources.jsonl");
1406
+ import path14 from "path";
1407
+ var sourcesFile = (workspacePath) => path14.join(workspacePath, "sources", "sources.jsonl");
1298
1408
  async function listSources(workspacePath) {
1299
1409
  return readJsonl(sourcesFile(workspacePath));
1300
1410
  }
@@ -1340,8 +1450,8 @@ async function removeSource(workspacePath, sourceId) {
1340
1450
  }
1341
1451
 
1342
1452
  // src/ingest/document-utils.ts
1343
- import { mkdir as mkdir7, rm as rm4, writeFile as writeFile6 } from "fs/promises";
1344
- import path14 from "path";
1453
+ import { mkdir as mkdir8, rm as rm5, writeFile as writeFile7 } from "fs/promises";
1454
+ import path15 from "path";
1345
1455
 
1346
1456
  // src/normalize/normalize-markdown.ts
1347
1457
  import matter2 from "gray-matter";
@@ -1393,8 +1503,8 @@ async function writeNormalizedDocument({
1393
1503
  normalizedPath,
1394
1504
  markdown
1395
1505
  }) {
1396
- await mkdir7(path14.dirname(normalizedPath), { recursive: true });
1397
- await writeFile6(
1506
+ await mkdir8(path15.dirname(normalizedPath), { recursive: true });
1507
+ await writeFile7(
1398
1508
  normalizedPath,
1399
1509
  withFrontmatter(
1400
1510
  {
@@ -1416,14 +1526,14 @@ async function writeNormalizedDocument({
1416
1526
  }
1417
1527
  async function deleteDocumentArtifacts(document) {
1418
1528
  await Promise.all([
1419
- document.rawPath ? rm4(document.rawPath, { force: true }) : Promise.resolve(),
1420
- rm4(document.normalizedPath, { force: true })
1529
+ document.rawPath ? rm5(document.rawPath, { force: true }) : Promise.resolve(),
1530
+ rm5(document.normalizedPath, { force: true })
1421
1531
  ]);
1422
1532
  }
1423
1533
 
1424
1534
  // src/ingest/adapters/directory-adapter.ts
1425
1535
  import fg from "fast-glob";
1426
- import path15 from "path";
1536
+ import path16 from "path";
1427
1537
  async function listDirectoryFiles(source) {
1428
1538
  const include = source.crawl?.includePatterns?.length ? source.crawl.includePatterns : ["**/*.md", "**/*.txt", "**/*.html", "**/*.htm", "**/*.pdf", "**/*.docx"];
1429
1539
  const exclude = source.crawl?.excludePatterns ?? [];
@@ -1436,12 +1546,12 @@ async function listDirectoryFiles(source) {
1436
1546
  ignore: exclude,
1437
1547
  followSymbolicLinks: false
1438
1548
  });
1439
- return matches.map((match) => path15.resolve(match)).sort();
1549
+ return matches.map((match) => path16.resolve(match)).sort();
1440
1550
  }
1441
1551
 
1442
1552
  // src/ingest/adapters/file-adapter.ts
1443
1553
  import { basename, extname, resolve } from "path";
1444
- import { mkdir as mkdir8, readFile as readFile8, stat as stat3, writeFile as writeFile7 } from "fs/promises";
1554
+ import { mkdir as mkdir9, readFile as readFile9, stat as stat4, writeFile as writeFile8 } from "fs/promises";
1445
1555
 
1446
1556
  // src/ingest/extractors/docx-extractor.ts
1447
1557
  import mammoth from "mammoth";
@@ -1615,16 +1725,16 @@ function extractPublicationDateFromHtml(html) {
1615
1725
  }
1616
1726
 
1617
1727
  // src/ingest/extractors/markdown-extractor.ts
1618
- import { readFile as readFile5 } from "fs/promises";
1728
+ import { readFile as readFile6 } from "fs/promises";
1619
1729
  async function extractMarkdown(filePath) {
1620
- return readFile5(filePath, "utf8");
1730
+ return readFile6(filePath, "utf8");
1621
1731
  }
1622
1732
 
1623
1733
  // src/ingest/extractors/pdf-extractor.ts
1624
- import { readFile as readFile6 } from "fs/promises";
1734
+ import { readFile as readFile7 } from "fs/promises";
1625
1735
  import { PDFParse } from "pdf-parse";
1626
1736
  async function extractPdf(filePath) {
1627
- const buffer = await readFile6(filePath);
1737
+ const buffer = await readFile7(filePath);
1628
1738
  const parser = new PDFParse({ data: buffer });
1629
1739
  try {
1630
1740
  const parsed = await parser.getText();
@@ -1635,9 +1745,9 @@ async function extractPdf(filePath) {
1635
1745
  }
1636
1746
 
1637
1747
  // src/ingest/extractors/text-extractor.ts
1638
- import { readFile as readFile7 } from "fs/promises";
1748
+ import { readFile as readFile8 } from "fs/promises";
1639
1749
  async function extractText(filePath) {
1640
- return readFile7(filePath, "utf8");
1750
+ return readFile8(filePath, "utf8");
1641
1751
  }
1642
1752
 
1643
1753
  // src/ingest/adapters/file-adapter.ts
@@ -1672,7 +1782,7 @@ async function extractFileContent(filePath, mimeType) {
1672
1782
  ${text}`, raw: text };
1673
1783
  }
1674
1784
  if (mimeType === "text/html") {
1675
- const raw = await readFile8(filePath, "utf8");
1785
+ const raw = await readFile9(filePath, "utf8");
1676
1786
  const extracted = extractHtmlToMarkdown(raw);
1677
1787
  return { title: extracted.title, markdown: `# ${extracted.title}
1678
1788
 
@@ -1717,7 +1827,7 @@ async function ingestFile({
1717
1827
  previous
1718
1828
  }) {
1719
1829
  const resolved = resolve(filePath);
1720
- const fileStat = await stat3(resolved);
1830
+ const fileStat = await stat4(resolved);
1721
1831
  const mimeType = mimeTypeFor(resolved);
1722
1832
  const extracted = await extractFileContent(resolved, mimeType);
1723
1833
  const documentId = stableId("doc", source.id, resolved);
@@ -1728,10 +1838,10 @@ async function ingestFile({
1728
1838
  const lastChangedAt = previous?.contentHash === contentHash ? previous.lastChangedAt : now;
1729
1839
  const indexedAt = now;
1730
1840
  const crawledAt = now;
1731
- await mkdir8(resolve(workspacePath, "normalized"), { recursive: true });
1732
- await mkdir8(resolve(workspacePath, "raw", source.id), { recursive: true });
1841
+ await mkdir9(resolve(workspacePath, "normalized"), { recursive: true });
1842
+ await mkdir9(resolve(workspacePath, "raw", source.id), { recursive: true });
1733
1843
  if (extracted.raw) {
1734
- await writeFile7(rawPath, extracted.raw, "utf8");
1844
+ await writeFile8(rawPath, extracted.raw, "utf8");
1735
1845
  }
1736
1846
  await writeNormalizedDocument({
1737
1847
  documentId,
@@ -1794,7 +1904,7 @@ ${content}`;
1794
1904
  const now = (/* @__PURE__ */ new Date()).toISOString();
1795
1905
  const lastChangedAt = previous?.contentHash === contentHash ? previous.lastChangedAt : now;
1796
1906
  const indexedAt = now;
1797
- await mkdir8(resolve(workspacePath, "normalized"), { recursive: true });
1907
+ await mkdir9(resolve(workspacePath, "normalized"), { recursive: true });
1798
1908
  await writeNormalizedDocument({
1799
1909
  documentId,
1800
1910
  sourceId: source.id,
@@ -1838,7 +1948,7 @@ async function reprocessStoredDocument(document, source) {
1838
1948
  if (!document.rawPath) {
1839
1949
  return null;
1840
1950
  }
1841
- const raw = await readFile8(document.rawPath, "utf8");
1951
+ const raw = await readFile9(document.rawPath, "utf8");
1842
1952
  const fallbackTitle = document.title || basename(document.uri);
1843
1953
  const extracted = await extractRawContent(raw, document.mimeType, fallbackTitle);
1844
1954
  const contentHash = sha256(extracted.markdown);
@@ -1955,8 +2065,8 @@ async function parseRssFeedDocument(xml, source) {
1955
2065
  }
1956
2066
 
1957
2067
  // src/ingest/adapters/url-adapter.ts
1958
- import { mkdir as mkdir9, readFile as readFile9, writeFile as writeFile8 } from "fs/promises";
1959
- import path16 from "path";
2068
+ import { mkdir as mkdir10, readFile as readFile10, writeFile as writeFile9 } from "fs/promises";
2069
+ import path17 from "path";
1960
2070
 
1961
2071
  // src/core/urls.ts
1962
2072
  function normalizeRemoteUrl(uri) {
@@ -1999,16 +2109,16 @@ async function normalizeRemoteDocument({
1999
2109
 
2000
2110
  ${extracted.markdown}`;
2001
2111
  const documentId = stableId("doc", source.id, canonicalUri);
2002
- const normalizedPath = path16.resolve(workspacePath, "normalized", `${documentId}.md`);
2003
- const rawPath = path16.resolve(workspacePath, "raw", source.id, `${sha256(canonicalUri).slice(0, 12)}.html`);
2112
+ const normalizedPath = path17.resolve(workspacePath, "normalized", `${documentId}.md`);
2113
+ const rawPath = path17.resolve(workspacePath, "raw", source.id, `${sha256(canonicalUri).slice(0, 12)}.html`);
2004
2114
  const contentHash = sha256(markdown);
2005
2115
  const now = (/* @__PURE__ */ new Date()).toISOString();
2006
2116
  const lastChangedAt = previous?.contentHash === contentHash ? previous.lastChangedAt : now;
2007
2117
  const indexedAt = now;
2008
2118
  const crawledAt = now;
2009
2119
  const resolvedPublicationDate = choosePublicationDate(publicationDate, extractPublicationDateFromHtml(body), previous?.publicationDate);
2010
- await mkdir9(path16.resolve(workspacePath, "raw", source.id), { recursive: true });
2011
- await writeFile8(rawPath, body, "utf8");
2120
+ await mkdir10(path17.resolve(workspacePath, "raw", source.id), { recursive: true });
2121
+ await writeFile9(rawPath, body, "utf8");
2012
2122
  await writeNormalizedDocument({
2013
2123
  documentId,
2014
2124
  sourceId: source.id,
@@ -2128,7 +2238,7 @@ async function reprocessRemoteDocument(document, source) {
2128
2238
  if (!document.rawPath || !await fileExists(document.rawPath)) {
2129
2239
  return null;
2130
2240
  }
2131
- const raw = await readFile9(document.rawPath, "utf8");
2241
+ const raw = await readFile10(document.rawPath, "utf8");
2132
2242
  const extracted = extractHtmlToMarkdown(raw);
2133
2243
  const markdown = `# ${extracted.title}
2134
2244
 
@@ -2307,7 +2417,7 @@ async function crawlWebsite(source, defaults, progress) {
2307
2417
 
2308
2418
  // src/ingest/ingest-service.ts
2309
2419
  function documentsFile(workspacePath) {
2310
- return path17.join(workspacePath, "documents", "documents.jsonl");
2420
+ return path18.join(workspacePath, "documents", "documents.jsonl");
2311
2421
  }
2312
2422
  async function loadDocuments(workspacePath) {
2313
2423
  return readJsonl(documentsFile(workspacePath));
@@ -2850,9 +2960,9 @@ async function discoverWebsiteFeed(websiteUrl, userAgent) {
2850
2960
  }
2851
2961
 
2852
2962
  // src/query/search-service.ts
2853
- import { readFile as readFile10 } from "fs/promises";
2963
+ import { readFile as readFile11 } from "fs/promises";
2854
2964
  import { reciprocalRankFusion, searchJsonDsl } from "@tryformation/querylight-ts";
2855
- import path18 from "path";
2965
+ import path19 from "path";
2856
2966
  async function loadHydratedIndex(workspacePath) {
2857
2967
  let state;
2858
2968
  try {
@@ -3062,7 +3172,7 @@ async function buildSnippetWithAdjacentChunks(chunk, query, {
3062
3172
  if (!await fileExists(document.normalizedPath)) {
3063
3173
  return buildSnippet(chunk.text, query);
3064
3174
  }
3065
- const raw = await readFile10(document.normalizedPath, "utf8");
3175
+ const raw = await readFile11(document.normalizedPath, "utf8");
3066
3176
  orderedChunks = buildChunksForDocument(document, raw, config);
3067
3177
  orderedChunkCache.set(document.id, orderedChunks);
3068
3178
  }
@@ -3397,9 +3507,9 @@ async function searchIndex({
3397
3507
  const config = await loadConfig(workspacePath);
3398
3508
  const mode = retrievalMode ?? config.retrieval.defaultMode;
3399
3509
  const candidateLimit = Math.max(topK * 5, 50);
3400
- const chunks = new Map((await readJsonl(path18.join(workspacePath, "chunks", "chunks.jsonl"))).map((chunk) => [chunk.id, chunk]));
3401
- const documents = new Map((await readJsonl(path18.join(workspacePath, "documents", "documents.jsonl"))).map((document) => [document.id, document]));
3402
- const sources = new Map((await readJsonl(path18.join(workspacePath, "sources", "sources.jsonl"))).map((source) => [source.id, source]));
3510
+ const chunks = new Map((await readJsonl(path19.join(workspacePath, "chunks", "chunks.jsonl"))).map((chunk) => [chunk.id, chunk]));
3511
+ const documents = new Map((await readJsonl(path19.join(workspacePath, "documents", "documents.jsonl"))).map((document) => [document.id, document]));
3512
+ const sources = new Map((await readJsonl(path19.join(workspacePath, "sources", "sources.jsonl"))).map((source) => [source.id, source]));
3403
3513
  const orderedChunkCache = /* @__PURE__ */ new Map();
3404
3514
  const normalizedQuery = query.trim();
3405
3515
  const filterIds = [...chunks.values()].filter((chunk) => filterChunk(chunk, documents.get(chunk.documentId), sources.get(chunk.sourceId), { sourceId, sourceIds, sourceName, sourceNames, sourceType, sourceTypes, uriPrefix, uriPrefixes, hasPublicationDate, tag, tags, metadata, dateRanges })).map((chunk) => chunk.id);
@@ -3572,18 +3682,18 @@ async function searchIndex({
3572
3682
 
3573
3683
  // src/server/search-api.ts
3574
3684
  import { createServer } from "http";
3575
- import { readdir, stat as stat4 } from "fs/promises";
3576
- import path19 from "path";
3685
+ import { readdir as readdir2, stat as stat5 } from "fs/promises";
3686
+ import path20 from "path";
3577
3687
  async function pathIsDirectory(candidatePath) {
3578
3688
  try {
3579
- return (await stat4(candidatePath)).isDirectory();
3689
+ return (await stat5(candidatePath)).isDirectory();
3580
3690
  } catch {
3581
3691
  return false;
3582
3692
  }
3583
3693
  }
3584
3694
  async function discoverKnowledgeBases(workspacePath) {
3585
3695
  try {
3586
- const singleWorkspace = await assertWorkspaceExists(workspacePath);
3696
+ const singleWorkspace = (await resolveReadableWorkspace(workspacePath)).workspacePath;
3587
3697
  const config = await loadConfig(singleWorkspace);
3588
3698
  const index = await loadHydratedIndex(singleWorkspace);
3589
3699
  return {
@@ -3600,19 +3710,20 @@ async function discoverKnowledgeBases(workspacePath) {
3600
3710
  throw error;
3601
3711
  }
3602
3712
  }
3603
- const resolvedRoot = path19.resolve(workspacePath);
3713
+ const resolvedRoot = path20.resolve(workspacePath);
3604
3714
  if (!await pathIsDirectory(resolvedRoot)) {
3605
3715
  throw new CliError(`workspace path does not exist: ${resolvedRoot}`, "WORKSPACE_ERROR", 3 /* WorkspaceError */);
3606
3716
  }
3607
- const entries = await readdir(resolvedRoot, { withFileTypes: true });
3608
- const knowledgeBases = (await Promise.all(entries.filter((entry) => entry.isDirectory()).map(async (entry) => {
3609
- const candidateWorkspace = path19.join(resolvedRoot, entry.name, ".kb");
3717
+ const entries = await readdir2(resolvedRoot, { withFileTypes: true });
3718
+ const knowledgeBases = (await Promise.all(entries.filter((entry) => entry.isDirectory() || entry.isFile() && isWorkspaceArchivePath(entry.name)).map(async (entry) => {
3719
+ const candidateWorkspace = entry.isDirectory() ? path20.join(resolvedRoot, entry.name, ".kb") : path20.join(resolvedRoot, entry.name);
3720
+ const knowledgeBaseName = entry.isDirectory() ? entry.name : entry.name.replace(/\.zip$/i, "");
3610
3721
  try {
3611
- const workspace = await assertWorkspaceExists(candidateWorkspace);
3722
+ const workspace = entry.isDirectory() ? await assertWorkspaceExists(candidateWorkspace) : (await resolveReadableWorkspace(candidateWorkspace)).workspacePath;
3612
3723
  const config = await loadConfig(workspace);
3613
3724
  const index = await loadHydratedIndex(workspace);
3614
3725
  return {
3615
- name: entry.name,
3726
+ name: knowledgeBaseName,
3616
3727
  workspacePath: workspace,
3617
3728
  configuredIndexName: config.index.name,
3618
3729
  index
@@ -3626,7 +3737,7 @@ async function discoverKnowledgeBases(workspacePath) {
3626
3737
  }))).filter((knowledgeBase) => knowledgeBase != null);
3627
3738
  if (knowledgeBases.length === 0) {
3628
3739
  throw new CliError(
3629
- `no knowledge bases found at ${resolvedRoot}; use a .kb workspace or a directory of named subdirectories that each contain .kb`,
3740
+ `no knowledge bases found at ${resolvedRoot}; use a .kb workspace, a .zip workspace, or a directory of .zip files or named subdirectories that each contain .kb`,
3630
3741
  "WORKSPACE_ERROR",
3631
3742
  3 /* WorkspaceError */
3632
3743
  );
@@ -3760,7 +3871,7 @@ async function startSearchApiServer({
3760
3871
  }
3761
3872
 
3762
3873
  // src/query/related-service.ts
3763
- import path20 from "path";
3874
+ import path21 from "path";
3764
3875
  function cosineSimilarity2(left, right) {
3765
3876
  let dot = 0;
3766
3877
  let leftNorm = 0;
@@ -3836,7 +3947,7 @@ async function findRelatedDocuments({
3836
3947
  if (!await fileExists(denseVectorPath(workspacePath))) {
3837
3948
  throw new CliError("dense vector index is not built; run `qli models pull --dense` and `qli rebuild`", "DENSE_INDEX_MISSING", 7 /* QueryError */);
3838
3949
  }
3839
- const documents = await readJsonl(path20.join(workspacePath, "documents", "documents.jsonl"));
3950
+ const documents = await readJsonl(path21.join(workspacePath, "documents", "documents.jsonl"));
3840
3951
  const selected = resolveDocumentSelector(documents, document);
3841
3952
  const densePayload = await readDensePayload(workspacePath);
3842
3953
  const vectors = buildDocumentVectors(documents, densePayload.chunks, densePayload.metadata.dimensions);
@@ -3909,7 +4020,7 @@ async function createContext({
3909
4020
  }
3910
4021
 
3911
4022
  // src/report/diff-service.ts
3912
- import path21 from "path";
4023
+ import path22 from "path";
3913
4024
  function chooseBaselineRun(runs, since) {
3914
4025
  if (since === "last-run") {
3915
4026
  return runs.at(-1);
@@ -3925,7 +4036,7 @@ async function diffWorkspace({
3925
4036
  documentId,
3926
4037
  since
3927
4038
  }) {
3928
- const current = await readJsonl(path21.join(workspacePath, "documents", "documents.jsonl"));
4039
+ const current = await readJsonl(path22.join(workspacePath, "documents", "documents.jsonl"));
3929
4040
  const baseline = chooseBaselineRun(await listRuns(workspacePath), since);
3930
4041
  const previous = new Map((baseline?.documentsSnapshot ?? []).map((document) => [document.id, document]));
3931
4042
  const changedDocuments = current.filter((document) => (!sourceId || document.sourceId === sourceId) && (!documentId || document.id === documentId)).filter((document) => {
@@ -4284,7 +4395,7 @@ function parseDateValue(input, optionName) {
4284
4395
  return parsed.toISOString();
4285
4396
  }
4286
4397
  async function parseJsonArgument(input) {
4287
- const raw = input.startsWith("@") ? await readFile11(path22.resolve(input.slice(1)), "utf8") : input;
4398
+ const raw = input.startsWith("@") ? await readFile12(path23.resolve(input.slice(1)), "utf8") : input;
4288
4399
  try {
4289
4400
  const parsed = JSON.parse(raw);
4290
4401
  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
@@ -4338,20 +4449,24 @@ function resolveSearchTopK(optionsTopK, sourceTypes, dateRanges, defaultTopK) {
4338
4449
  }
4339
4450
  return defaultTopK;
4340
4451
  }
4341
- async function resolveWorkspace(options) {
4342
- return path22.resolve(options.workspace ?? DEFAULT_WORKSPACE);
4452
+ async function resolveWorkspace(options, mode = {}) {
4453
+ const workspace = options.workspace ?? DEFAULT_WORKSPACE;
4454
+ if (mode.writable) {
4455
+ return assertWritableWorkspacePath(workspace);
4456
+ }
4457
+ return (await resolveReadableWorkspace(workspace)).workspacePath;
4343
4458
  }
4344
4459
  function workspaceFromArgv(argv) {
4345
4460
  const index = argv.findIndex((arg) => arg === "--workspace");
4346
4461
  if (index >= 0 && argv[index + 1]) {
4347
- return path22.resolve(argv[index + 1]);
4462
+ return path23.resolve(argv[index + 1]);
4348
4463
  }
4349
- return path22.resolve(DEFAULT_WORKSPACE);
4464
+ return path23.resolve(DEFAULT_WORKSPACE);
4350
4465
  }
4351
4466
  async function runCli(argv, io = {}) {
4352
4467
  const capture = { stdout: [], stderr: [], ...io };
4353
4468
  const program = new Command();
4354
- program.name("qli").description("Build and query a local Querylight workspace from files, directories, URLs, websites, and feeds.").showHelpAfterError().option("--workspace <path>", "Workspace directory. Defaults to .kb in the current directory.", DEFAULT_WORKSPACE).option("--config <path>", "Optional config file override. Useful for testing alternate retrieval settings.").option("--json", "Return a stable JSON envelope for automation and agents.").option("--silent", "Suppress progress logging for long-running commands.").option("--verbose", "Print more operational detail when a command supports it.").addOption(new Option("--quiet", "Deprecated alias for --silent.").hideHelp());
4469
+ program.name("qli").description("Build and query a local Querylight workspace from files, directories, URLs, websites, and feeds.").showHelpAfterError().option("--workspace <path>", "Workspace directory, or a packaged .zip workspace for read-only commands. Defaults to .kb in the current directory.", DEFAULT_WORKSPACE).option("--config <path>", "Optional config file override. Useful for testing alternate retrieval settings.").option("--json", "Return a stable JSON envelope for automation and agents.").option("--silent", "Suppress progress logging for long-running commands.").option("--verbose", "Print more operational detail when a command supports it.").addOption(new Option("--quiet", "Deprecated alias for --silent.").hideHelp());
4355
4470
  program.addHelpText("after", `
4356
4471
  Workflow:
4357
4472
  1. Initialize a workspace with qli init
@@ -4363,12 +4478,15 @@ Examples:
4363
4478
  qli init
4364
4479
  qli source add directory ./docs --name "Product Docs" --tag docs
4365
4480
  qli ingest
4481
+ qli package ./docs-kb.zip
4366
4482
  qli rebuild --silent
4367
4483
  qli search "api authentication" --top-k 8
4484
+ qli search --workspace ./docs-kb.zip "api authentication"
4368
4485
  qli context "How do API keys work?" --top-k 8 --max-chars 8000
4369
4486
 
4370
4487
  Long-running commands print progress to stderr by default. Use --silent to suppress it.
4371
4488
  Use --json when another tool needs stable structured output.
4489
+ Read-only commands can use --workspace with a packaged .zip workspace.
4372
4490
 
4373
4491
  Use qli <command> --help for command-specific options and examples.`);
4374
4492
  program.command("init").description("Create a new workspace with the default directory layout and config, then pull missing retrieval models.").option("--force").addHelpText("after", `
@@ -4382,7 +4500,7 @@ Notes:
4382
4500
  init pulls missing model assets for enabled retrieval modes.
4383
4501
  Sparse model downloads require uv. If uv is not available, init skips the sparse pull.`).action(async function command(options) {
4384
4502
  const global = this.optsWithGlobals();
4385
- const workspace = await resolveWorkspace({ workspace: this.optsWithGlobals().workspace });
4503
+ const workspace = await resolveWorkspace({ workspace: this.optsWithGlobals().workspace }, { writable: true });
4386
4504
  const result = await ensureWorkspace({ workspacePath: workspace, force: Boolean(options.force) });
4387
4505
  const config = await loadConfig(workspace, global.config);
4388
4506
  const status = await getModelStatus(workspace, config);
@@ -4423,7 +4541,7 @@ Notes:
4423
4541
  }
4424
4542
  validateSourceAddOptions(type, options);
4425
4543
  const global = this.optsWithGlobals();
4426
- const workspace = await resolveWorkspace({ workspace: global.workspace });
4544
+ const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
4427
4545
  const config = await loadConfig(workspace, global.config);
4428
4546
  const now = (/* @__PURE__ */ new Date()).toISOString();
4429
4547
  const initialCrawl = createSourceCrawlConfig(type, options, { retentionDays: config.crawler.retentionDays });
@@ -4440,7 +4558,7 @@ Notes:
4440
4558
  }
4441
4559
  const stored = await addSource(workspace, {
4442
4560
  type,
4443
- uri: ["file", "directory"].includes(type) ? path22.resolve(uri) : uri,
4561
+ uri: ["file", "directory"].includes(type) ? path23.resolve(uri) : uri,
4444
4562
  name: options.name,
4445
4563
  enabled: true,
4446
4564
  tags: options.tag ?? [],
@@ -4501,7 +4619,7 @@ Notes:
4501
4619
  qli only exposes settings that the current source type uses at runtime.
4502
4620
  URI, source type, and source id do not change here.`).action(async function command(sourceId, options) {
4503
4621
  const global = this.optsWithGlobals();
4504
- const workspace = await resolveWorkspace({ workspace: global.workspace });
4622
+ const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
4505
4623
  const sources = await listSources(workspace);
4506
4624
  const current = sources.find((source2) => source2.id === sourceId);
4507
4625
  if (!current) {
@@ -4529,7 +4647,7 @@ Examples:
4529
4647
  qli source remove src_123
4530
4648
  qli source list --json`).action(async function command(sourceId) {
4531
4649
  const global = this.optsWithGlobals();
4532
- const workspace = await resolveWorkspace({ workspace: global.workspace });
4650
+ const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
4533
4651
  await removeSource(workspace, sourceId);
4534
4652
  emit(global.json, capture, response("source remove", workspace, { sourceId }), `Removed source ${sourceId}`);
4535
4653
  });
@@ -4538,7 +4656,7 @@ Examples:
4538
4656
  qli source disable src_123
4539
4657
  qli source enable src_123`).action(async function command(sourceId) {
4540
4658
  const global = this.optsWithGlobals();
4541
- const workspace = await resolveWorkspace({ workspace: global.workspace });
4659
+ const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
4542
4660
  const updated = await updateSource(workspace, sourceId, { enabled: false, updatedAt: (/* @__PURE__ */ new Date()).toISOString() });
4543
4661
  emit(global.json, capture, response("source disable", workspace, updated), `Disabled source ${sourceId}`);
4544
4662
  });
@@ -4547,7 +4665,7 @@ Examples:
4547
4665
  qli source enable src_123
4548
4666
  qli source list`).action(async function command(sourceId) {
4549
4667
  const global = this.optsWithGlobals();
4550
- const workspace = await resolveWorkspace({ workspace: global.workspace });
4668
+ const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
4551
4669
  const updated = await updateSource(workspace, sourceId, { enabled: true, updatedAt: (/* @__PURE__ */ new Date()).toISOString() });
4552
4670
  emit(global.json, capture, response("source enable", workspace, updated), `Enabled source ${sourceId}`);
4553
4671
  });
@@ -4559,7 +4677,7 @@ Examples:
4559
4677
  qli ingest --dense --sparse
4560
4678
  qli ingest --silent`).action(async function command(options) {
4561
4679
  const global = this.optsWithGlobals();
4562
- const workspace = await resolveWorkspace({ workspace: global.workspace });
4680
+ const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
4563
4681
  const result = await runIngestCommand({
4564
4682
  workspace,
4565
4683
  sourceId: options.source,
@@ -4577,7 +4695,7 @@ Examples:
4577
4695
  qli chunk --document doc_123
4578
4696
  qli chunk --silent`).action(async function command(options) {
4579
4697
  const global = this.optsWithGlobals();
4580
- const workspace = await resolveWorkspace({ workspace: global.workspace });
4698
+ const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
4581
4699
  const result = await chunkDocuments({
4582
4700
  workspacePath: workspace,
4583
4701
  sourceId: options.source,
@@ -4593,7 +4711,7 @@ Examples:
4593
4711
  qli reprocess --document doc_123
4594
4712
  qli reprocess --silent`).action(async function command(options) {
4595
4713
  const global = this.optsWithGlobals();
4596
- const workspace = await resolveWorkspace({ workspace: global.workspace });
4714
+ const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
4597
4715
  const result = await reprocessDocuments({
4598
4716
  workspacePath: workspace,
4599
4717
  sourceId: options.source,
@@ -4611,7 +4729,7 @@ Examples:
4611
4729
  qli index build --dense --sparse
4612
4730
  qli index build --silent`).action(async function command(options) {
4613
4731
  const global = this.optsWithGlobals();
4614
- const workspace = await resolveWorkspace({ workspace: global.workspace });
4732
+ const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
4615
4733
  const result = await buildIndex({
4616
4734
  workspacePath: workspace,
4617
4735
  denseOverride: options.dense ? true : void 0,
@@ -4628,7 +4746,7 @@ Examples:
4628
4746
  qli rebuild --dense --sparse
4629
4747
  qli rebuild --silent`).action(async function command(options) {
4630
4748
  const global = this.optsWithGlobals();
4631
- const workspace = await resolveWorkspace({ workspace: global.workspace });
4749
+ const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
4632
4750
  const progress = createProgressHandler(capture, global);
4633
4751
  progress?.("info", "Rebuild step 1/3: ingest");
4634
4752
  const ingest = await ingestSources({
@@ -4651,6 +4769,25 @@ Examples:
4651
4769
  progress?.("info", "Rebuild complete");
4652
4770
  emit(global.json, capture, response("rebuild", workspace, data), `Processed ${ingest.processedSources} sources, wrote ${chunk.chunksWritten} chunks`);
4653
4771
  });
4772
+ program.command("package").description("Write the current workspace to a zip archive that read-only commands can use directly.").argument("<archive>", "Output .zip file.").option("--force", "Replace the output archive if it already exists.").addHelpText("after", `
4773
+ Examples:
4774
+ qli package ./docs-kb.zip
4775
+ qli package ./deploy/docs-kb.zip --workspace ./docs/.kb
4776
+ qli package ./docs-kb.zip --force --json
4777
+
4778
+ Notes:
4779
+ The archive stores the workspace contents at the zip root.
4780
+ Use the zip with read-only commands such as search, search-json, related, context, status, doctor, and serve.
4781
+ Rebuild the directory workspace and package it again when source content changes.`).action(async function command(archive, options) {
4782
+ const global = this.optsWithGlobals();
4783
+ const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
4784
+ const result = await packageWorkspaceArchive({
4785
+ workspacePath: workspace,
4786
+ outputPath: archive,
4787
+ force: Boolean(options.force)
4788
+ });
4789
+ emit(global.json, capture, response("package", workspace, result), `Packaged ${result.fileCount} files to ${result.archivePath}`);
4790
+ });
4654
4791
  program.command("search").description("Search the built index and return ranked matching documents or chunks. Use search-json for raw JSON DSL queries.").argument("[query]", "Text query. Omit it to list the latest matching documents.").option("--top-k <n>", "Maximum number of results to return. Defaults to search.defaultTopK in config.yaml. RSS searches with a time window use 500 when omitted.").option("--source <sourceIds>", "Restrict results to one or more source ids. Use comma-separated values.").option("--source-name <names>", "Restrict results to one or more source names. Use comma-separated values.").option("--source-type <types>", `Restrict results to one or more source types. Use comma-separated values: ${SOURCE_TYPE_LIST.join(", ")}`).option("--uri-prefix <prefixes>", "Restrict results to one or more URI prefixes. Use comma-separated values.").option("--tag <tags>", "Restrict results to one or more source tags. Use comma-separated values.").option("--metadata <key=value...>", "Restrict results to sources with matching metadata.").option("--since <date>", "Shortcut for --publication-date-from.").option("--until <date>", "Shortcut for --publication-date-to.").option("--changed-since <date>", "Only include documents changed on or after this date.").option("--has-publication-date", "Only include documents with a publication date.").option("--publication-date-from <date>", "Only include documents published on or after this date.").option("--publication-date-to <date>", "Only include documents published on or before this date.").option("--first-seen-at-from <date>", "Only include documents first seen on or after this date.").option("--first-seen-at-to <date>", "Only include documents first seen on or before this date.").option("--last-seen-at-from <date>", "Only include documents last seen on or after this date.").option("--last-seen-at-to <date>", "Only include documents last seen on or before this date.").option("--last-changed-at-from <date>", "Only include documents changed on or after this date.").option("--last-changed-at-to <date>", "Only include documents changed on or before this date.").option("--crawled-at-from <date>", "Only include documents crawled on or after this date.").option("--crawled-at-to <date>", "Only include documents crawled on or before this date.").option("--retrieval <mode>", `Retrieval mode: ${RETRIEVAL_MODE_LIST.join(", ")}`).option("--show-chunks", "Return chunk-level matches when available.").addHelpText("after", `
4655
4792
  Examples:
4656
4793
  qli search "pricing api limits"
@@ -4659,6 +4796,7 @@ Examples:
4659
4796
  qli search --source-name "Release Feed,Company Blog" --uri-prefix https://example.com/news,https://example.com/blog
4660
4797
  qli search "billing" --metadata team=support
4661
4798
  qli search "embedding model" --retrieval hybrid --show-chunks
4799
+ qli search --workspace ./docs-kb.zip "authentication"
4662
4800
  qli search --source-type rss,page --top-k 25 --json
4663
4801
 
4664
4802
  Notes:
@@ -4713,6 +4851,7 @@ Notes:
4713
4851
  Examples:
4714
4852
  qli serve
4715
4853
  qli serve --workspace ./docs/.kb --port 4000
4854
+ qli serve --workspace ./docs-kb.zip --port 4000
4716
4855
  qli serve --workspace ./kbs --host 0.0.0.0 --port 4000
4717
4856
 
4718
4857
  Routes:
@@ -4723,10 +4862,10 @@ Routes:
4723
4862
  Notes:
4724
4863
  The request body must be a Querylight JSON DSL object.
4725
4864
  serve only exposes lexical _search for now.
4726
- When --workspace points to a directory of knowledge bases, each child directory must contain its own .kb workspace.
4865
+ When --workspace points to a directory of knowledge bases, qli serves child .zip files and child directories that contain .kb.
4727
4866
  Index files are loaded once at startup and reused across requests.`).action(async function command(options) {
4728
4867
  const global = this.optsWithGlobals();
4729
- const workspace = await resolveWorkspace({ workspace: global.workspace });
4868
+ const workspace = path23.resolve(global.workspace ?? DEFAULT_WORKSPACE);
4730
4869
  const port = Number(options.port);
4731
4870
  if (!Number.isInteger(port) || port < 0 || port > 65535) {
4732
4871
  throw new CliError(`invalid port: ${options.port}`, "INVALID_ARGUMENT", 2 /* InvalidArguments */);
@@ -4803,7 +4942,7 @@ Use --json when another tool needs structured access to the raw passages and met
4803
4942
  });
4804
4943
  const models = program.command("models");
4805
4944
  models.description("Inspect and download retrieval model assets.");
4806
- models.command("pull").description("Download dense and or sparse retrieval assets required by vector search.").option("--dense", "Only pull dense retrieval assets.").option("--sparse", "Only pull sparse retrieval assets.").addHelpText("after", `
4945
+ models.command("pull").description("Download dense or sparse retrieval assets required by vector search.").option("--dense", "Only pull dense retrieval assets.").option("--sparse", "Only pull sparse retrieval assets.").addHelpText("after", `
4807
4946
  Examples:
4808
4947
  qli models pull
4809
4948
  qli models pull --dense
@@ -4813,7 +4952,7 @@ Examples:
4813
4952
  Pulled model assets are shared under ~/.qli by default.
4814
4953
  If you plan to use related, dense search, or hybrid retrieval, pull the models and rebuild the index first.`).action(async function command(options) {
4815
4954
  const global = this.optsWithGlobals();
4816
- const workspace = await resolveWorkspace({ workspace: global.workspace });
4955
+ const workspace = await resolveWorkspace({ workspace: global.workspace }, { writable: true });
4817
4956
  const config = await loadConfig(workspace, global.config);
4818
4957
  const status = await getModelStatus(workspace, config);
4819
4958
  const { pullDense, pullSparse } = resolveModelPullPlan({
@@ -4889,7 +5028,7 @@ Examples:
4889
5028
  try {
4890
5029
  const meta = await readLatestIndexMetadata(workspace);
4891
5030
  latestIndex = meta.createdAt;
4892
- indexSize = (await stat5(await resolveLatestIndexArtifactPath(workspace))).size;
5031
+ indexSize = (await stat6(await resolveLatestIndexArtifactPath(workspace))).size;
4893
5032
  } catch {
4894
5033
  latestIndex = void 0;
4895
5034
  }