@tryformation/querylight-cli 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -208,8 +208,8 @@ The default workspace is `.kb/`.
208
208
  raw/
209
209
  normalized/
210
210
  indexes/
211
- latest.json
212
- latest.meta.json
211
+ latest.json.gz
212
+ latest.meta.json.gz
213
213
  runs/
214
214
  logs/
215
215
  ```
package/dist/cli/main.js CHANGED
@@ -16,7 +16,7 @@ import path from "path";
16
16
  import YAML from "yaml";
17
17
 
18
18
  // src/core/constants.ts
19
- var PACKAGE_VERSION = "0.2.0";
19
+ var PACKAGE_VERSION = "0.2.1";
20
20
  var DEFAULT_WORKSPACE = ".kb";
21
21
  var DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
22
22
  var LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
@@ -490,8 +490,40 @@ async function getDenseTransformersRuntime(cacheDir) {
490
490
  }
491
491
 
492
492
  // src/vector/store.ts
493
- import { mkdir as mkdir3, readFile as readFile4, writeFile as writeFile3 } from "fs/promises";
493
+ import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
494
494
  import path7 from "path";
495
+
496
+ // src/core/gzip-json.ts
497
+ import { readFile as readFile4, writeFile as writeFile3 } from "fs/promises";
498
+ import { promisify } from "util";
499
+ import { gunzip, gzip } from "zlib";
500
+ var gzipAsync = promisify(gzip);
501
+ var gunzipAsync = promisify(gunzip);
502
+ async function writeGzipJson(filePath, value) {
503
+ const payload = JSON.stringify(value, null, 2);
504
+ await writeFile3(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
505
+ }
506
+ async function readJsonFromGzipOrFile(gzipPath, legacyPath) {
507
+ if (await fileExists(gzipPath)) {
508
+ const payload = await readFile4(gzipPath);
509
+ return JSON.parse((await gunzipAsync(payload)).toString("utf8"));
510
+ }
511
+ if (legacyPath && await fileExists(legacyPath)) {
512
+ return JSON.parse(await readFile4(legacyPath, "utf8"));
513
+ }
514
+ return JSON.parse(await readFile4(gzipPath, "utf8"));
515
+ }
516
+ async function resolveExistingGzipOrFilePath(gzipPath, legacyPath) {
517
+ if (await fileExists(gzipPath)) {
518
+ return gzipPath;
519
+ }
520
+ if (legacyPath && await fileExists(legacyPath)) {
521
+ return legacyPath;
522
+ }
523
+ return gzipPath;
524
+ }
525
+
526
+ // src/vector/store.ts
495
527
  function vectorsDir(workspacePath) {
496
528
  return path7.join(workspacePath, "vectors");
497
529
  }
@@ -499,15 +531,27 @@ function sharedModelStateDir() {
499
531
  return path7.join(resolveQliHomeDir(), "models", "status");
500
532
  }
501
533
  function denseVectorPath(workspacePath) {
502
- return path7.join(vectorsDir(workspacePath), "dense.latest.json");
534
+ return path7.join(vectorsDir(workspacePath), "dense.latest.json.gz");
503
535
  }
504
536
  function denseMetaPath(workspacePath) {
505
- return path7.join(vectorsDir(workspacePath), "dense.latest.meta.json");
537
+ return path7.join(vectorsDir(workspacePath), "dense.latest.meta.json.gz");
506
538
  }
507
539
  function sparseVectorPath(workspacePath) {
508
- return path7.join(vectorsDir(workspacePath), "sparse.latest.json");
540
+ return path7.join(vectorsDir(workspacePath), "sparse.latest.json.gz");
509
541
  }
510
542
  function sparseMetaPath(workspacePath) {
543
+ return path7.join(vectorsDir(workspacePath), "sparse.latest.meta.json.gz");
544
+ }
545
+ function legacyDenseVectorPath(workspacePath) {
546
+ return path7.join(vectorsDir(workspacePath), "dense.latest.json");
547
+ }
548
+ function legacyDenseMetaPath(workspacePath) {
549
+ return path7.join(vectorsDir(workspacePath), "dense.latest.meta.json");
550
+ }
551
+ function legacySparseVectorPath(workspacePath) {
552
+ return path7.join(vectorsDir(workspacePath), "sparse.latest.json");
553
+ }
554
+ function legacySparseMetaPath(workspacePath) {
511
555
  return path7.join(vectorsDir(workspacePath), "sparse.latest.meta.json");
512
556
  }
513
557
  function pullMarkerPath(type, workspacePath, modelId, cacheDir) {
@@ -523,29 +567,37 @@ function sparsePullMarker(workspacePath, modelId, cacheDir) {
523
567
  }
524
568
  async function writeDensePayload(workspacePath, payload) {
525
569
  await mkdir3(vectorsDir(workspacePath), { recursive: true });
526
- await writeFile3(denseVectorPath(workspacePath), JSON.stringify(payload, null, 2), "utf8");
527
- await writeFile3(denseMetaPath(workspacePath), JSON.stringify(payload.metadata, null, 2), "utf8");
570
+ await writeGzipJson(denseVectorPath(workspacePath), payload);
571
+ await writeGzipJson(denseMetaPath(workspacePath), payload.metadata);
572
+ await Promise.all([
573
+ rm(legacyDenseVectorPath(workspacePath), { force: true }),
574
+ rm(legacyDenseMetaPath(workspacePath), { force: true })
575
+ ]);
528
576
  }
529
577
  async function readDensePayload(workspacePath) {
530
- return JSON.parse(await readFile4(denseVectorPath(workspacePath), "utf8"));
578
+ return readJsonFromGzipOrFile(denseVectorPath(workspacePath), legacyDenseVectorPath(workspacePath));
531
579
  }
532
580
  async function writeSparsePayload(workspacePath, payload) {
533
581
  await mkdir3(vectorsDir(workspacePath), { recursive: true });
534
- await writeFile3(sparseVectorPath(workspacePath), JSON.stringify(payload, null, 2), "utf8");
535
- await writeFile3(sparseMetaPath(workspacePath), JSON.stringify(payload.metadata, null, 2), "utf8");
582
+ await writeGzipJson(sparseVectorPath(workspacePath), payload);
583
+ await writeGzipJson(sparseMetaPath(workspacePath), payload.metadata);
584
+ await Promise.all([
585
+ rm(legacySparseVectorPath(workspacePath), { force: true }),
586
+ rm(legacySparseMetaPath(workspacePath), { force: true })
587
+ ]);
536
588
  }
537
589
  async function readSparsePayload(workspacePath) {
538
- return JSON.parse(await readFile4(sparseVectorPath(workspacePath), "utf8"));
590
+ return readJsonFromGzipOrFile(sparseVectorPath(workspacePath), legacySparseVectorPath(workspacePath));
539
591
  }
540
592
  async function writeDensePullMarker(workspacePath, model, value) {
541
593
  const markerPath = densePullMarker(workspacePath, model.modelId, model.cacheDir);
542
594
  await mkdir3(path7.dirname(markerPath), { recursive: true });
543
- await writeFile3(markerPath, JSON.stringify(value, null, 2), "utf8");
595
+ await writeFile4(markerPath, JSON.stringify(value, null, 2), "utf8");
544
596
  }
545
597
  async function writeSparsePullMarker(workspacePath, model, value) {
546
598
  const markerPath = sparsePullMarker(workspacePath, model.modelId, model.cacheDir);
547
599
  await mkdir3(path7.dirname(markerPath), { recursive: true });
548
- await writeFile3(markerPath, JSON.stringify(value, null, 2), "utf8");
600
+ await writeFile4(markerPath, JSON.stringify(value, null, 2), "utf8");
549
601
  }
550
602
  async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
551
603
  const denseCacheDir = resolveCacheDir(workspacePath, dense.cacheDir);
@@ -556,7 +608,7 @@ async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
556
608
  modelId: dense.modelId,
557
609
  cacheDir: denseCacheDir,
558
610
  available: await fileExists(densePullMarker(workspacePath, dense.modelId, dense.cacheDir)),
559
- artifactExists: await fileExists(denseVectorPath(workspacePath))
611
+ artifactExists: await fileExists(denseVectorPath(workspacePath)) || await fileExists(legacyDenseVectorPath(workspacePath))
560
612
  },
561
613
  sparse: {
562
614
  configured: sparse.enabled,
@@ -564,7 +616,7 @@ async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
564
616
  cacheDir: sparseCacheDir,
565
617
  uvAvailable,
566
618
  available: await fileExists(sparsePullMarker(workspacePath, sparse.modelId, sparse.cacheDir)),
567
- artifactExists: await fileExists(sparseVectorPath(workspacePath))
619
+ artifactExists: await fileExists(sparseVectorPath(workspacePath)) || await fileExists(legacySparseVectorPath(workspacePath))
568
620
  }
569
621
  };
570
622
  }
@@ -963,31 +1015,63 @@ async function getModelStatus(workspacePath, config) {
963
1015
  }
964
1016
 
965
1017
  // src/index/index-store.ts
966
- import { readFile as readFile5, writeFile as writeFile4 } from "fs/promises";
1018
+ import { mkdir as mkdir6, rm as rm2 } from "fs/promises";
967
1019
  import path10 from "path";
1020
+ function versionedIndexPath(workspacePath, stamp) {
1021
+ return path10.join(workspacePath, "indexes", `${stamp}.json.gz`);
1022
+ }
1023
+ function versionedLegacyIndexPath(workspacePath, stamp) {
1024
+ return path10.join(workspacePath, "indexes", `${stamp}.json`);
1025
+ }
1026
+ function versionedMetaPath(workspacePath, stamp) {
1027
+ return path10.join(workspacePath, "indexes", `${stamp}.meta.json.gz`);
1028
+ }
1029
+ function versionedLegacyMetaPath(workspacePath, stamp) {
1030
+ return path10.join(workspacePath, "indexes", `${stamp}.meta.json`);
1031
+ }
1032
+ function latestIndexPath(workspacePath) {
1033
+ return path10.join(workspacePath, "indexes", "latest.json.gz");
1034
+ }
1035
+ function legacyLatestIndexPath(workspacePath) {
1036
+ return path10.join(workspacePath, "indexes", "latest.json");
1037
+ }
1038
+ function latestMetaPath(workspacePath) {
1039
+ return path10.join(workspacePath, "indexes", "latest.meta.json.gz");
1040
+ }
1041
+ function legacyLatestMetaPath(workspacePath) {
1042
+ return path10.join(workspacePath, "indexes", "latest.meta.json");
1043
+ }
968
1044
  async function writeIndexArtifacts({
969
1045
  workspacePath,
970
1046
  indexState,
971
1047
  metadata
972
1048
  }) {
973
1049
  const stamp = metadata.createdAt.replace(/[:.]/g, "-");
974
- const indexPath = path10.join(workspacePath, "indexes", `${stamp}.json`);
975
- const metaPath = path10.join(workspacePath, "indexes", `${stamp}.meta.json`);
976
- const latestIndexPath = path10.join(workspacePath, "indexes", "latest.json");
977
- const latestMetaPath = path10.join(workspacePath, "indexes", "latest.meta.json");
978
- const indexPayload = JSON.stringify(indexState, null, 2);
979
- const metaPayload = JSON.stringify(metadata, null, 2);
980
- await writeFile4(indexPath, indexPayload, "utf8");
981
- await writeFile4(metaPath, metaPayload, "utf8");
982
- await writeFile4(latestIndexPath, indexPayload, "utf8");
983
- await writeFile4(latestMetaPath, metaPayload, "utf8");
984
- return { indexPath: latestIndexPath, metadataPath: latestMetaPath };
1050
+ const indexPath = versionedIndexPath(workspacePath, stamp);
1051
+ const metaPath = versionedMetaPath(workspacePath, stamp);
1052
+ const latestIndexArtifactPath = latestIndexPath(workspacePath);
1053
+ const latestMetadataArtifactPath = latestMetaPath(workspacePath);
1054
+ await mkdir6(path10.join(workspacePath, "indexes"), { recursive: true });
1055
+ await writeGzipJson(indexPath, indexState);
1056
+ await writeGzipJson(metaPath, metadata);
1057
+ await writeGzipJson(latestIndexArtifactPath, indexState);
1058
+ await writeGzipJson(latestMetadataArtifactPath, metadata);
1059
+ await Promise.all([
1060
+ rm2(legacyLatestIndexPath(workspacePath), { force: true }),
1061
+ rm2(legacyLatestMetaPath(workspacePath), { force: true }),
1062
+ rm2(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
1063
+ rm2(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
1064
+ ]);
1065
+ return { indexPath: latestIndexArtifactPath, metadataPath: latestMetadataArtifactPath };
985
1066
  }
986
1067
  async function readLatestIndexState(workspacePath) {
987
- return JSON.parse(await readFile5(path10.join(workspacePath, "indexes", "latest.json"), "utf8"));
1068
+ return readJsonFromGzipOrFile(latestIndexPath(workspacePath), legacyLatestIndexPath(workspacePath));
988
1069
  }
989
1070
  async function readLatestIndexMetadata(workspacePath) {
990
- return JSON.parse(await readFile5(path10.join(workspacePath, "indexes", "latest.meta.json"), "utf8"));
1071
+ return readJsonFromGzipOrFile(latestMetaPath(workspacePath), legacyLatestMetaPath(workspacePath));
1072
+ }
1073
+ async function resolveLatestIndexArtifactPath(workspacePath) {
1074
+ return resolveExistingGzipOrFilePath(latestIndexPath(workspacePath), legacyLatestIndexPath(workspacePath));
991
1075
  }
992
1076
 
993
1077
  // src/index/querylight-indexer.ts
@@ -1178,7 +1262,7 @@ async function removeSource(workspacePath, sourceId) {
1178
1262
  }
1179
1263
 
1180
1264
  // src/ingest/document-utils.ts
1181
- import { mkdir as mkdir6, rm, writeFile as writeFile5 } from "fs/promises";
1265
+ import { mkdir as mkdir7, rm as rm3, writeFile as writeFile5 } from "fs/promises";
1182
1266
  import path14 from "path";
1183
1267
 
1184
1268
  // src/normalize/normalize-markdown.ts
@@ -1231,7 +1315,7 @@ async function writeNormalizedDocument({
1231
1315
  normalizedPath,
1232
1316
  markdown
1233
1317
  }) {
1234
- await mkdir6(path14.dirname(normalizedPath), { recursive: true });
1318
+ await mkdir7(path14.dirname(normalizedPath), { recursive: true });
1235
1319
  await writeFile5(
1236
1320
  normalizedPath,
1237
1321
  withFrontmatter(
@@ -1254,8 +1338,8 @@ async function writeNormalizedDocument({
1254
1338
  }
1255
1339
  async function deleteDocumentArtifacts(document) {
1256
1340
  await Promise.all([
1257
- document.rawPath ? rm(document.rawPath, { force: true }) : Promise.resolve(),
1258
- rm(document.normalizedPath, { force: true })
1341
+ document.rawPath ? rm3(document.rawPath, { force: true }) : Promise.resolve(),
1342
+ rm3(document.normalizedPath, { force: true })
1259
1343
  ]);
1260
1344
  }
1261
1345
 
@@ -1279,7 +1363,7 @@ async function listDirectoryFiles(source) {
1279
1363
 
1280
1364
  // src/ingest/adapters/file-adapter.ts
1281
1365
  import { basename, extname, resolve } from "path";
1282
- import { mkdir as mkdir7, readFile as readFile9, stat as stat3, writeFile as writeFile6 } from "fs/promises";
1366
+ import { mkdir as mkdir8, readFile as readFile8, stat as stat3, writeFile as writeFile6 } from "fs/promises";
1283
1367
 
1284
1368
  // src/ingest/extractors/docx-extractor.ts
1285
1369
  import mammoth from "mammoth";
@@ -1453,16 +1537,16 @@ function extractPublicationDateFromHtml(html) {
1453
1537
  }
1454
1538
 
1455
1539
  // src/ingest/extractors/markdown-extractor.ts
1456
- import { readFile as readFile6 } from "fs/promises";
1540
+ import { readFile as readFile5 } from "fs/promises";
1457
1541
  async function extractMarkdown(filePath) {
1458
- return readFile6(filePath, "utf8");
1542
+ return readFile5(filePath, "utf8");
1459
1543
  }
1460
1544
 
1461
1545
  // src/ingest/extractors/pdf-extractor.ts
1462
- import { readFile as readFile7 } from "fs/promises";
1546
+ import { readFile as readFile6 } from "fs/promises";
1463
1547
  import { PDFParse } from "pdf-parse";
1464
1548
  async function extractPdf(filePath) {
1465
- const buffer = await readFile7(filePath);
1549
+ const buffer = await readFile6(filePath);
1466
1550
  const parser = new PDFParse({ data: buffer });
1467
1551
  try {
1468
1552
  const parsed = await parser.getText();
@@ -1473,9 +1557,9 @@ async function extractPdf(filePath) {
1473
1557
  }
1474
1558
 
1475
1559
  // src/ingest/extractors/text-extractor.ts
1476
- import { readFile as readFile8 } from "fs/promises";
1560
+ import { readFile as readFile7 } from "fs/promises";
1477
1561
  async function extractText(filePath) {
1478
- return readFile8(filePath, "utf8");
1562
+ return readFile7(filePath, "utf8");
1479
1563
  }
1480
1564
 
1481
1565
  // src/ingest/adapters/file-adapter.ts
@@ -1510,7 +1594,7 @@ async function extractFileContent(filePath, mimeType) {
1510
1594
  ${text}`, raw: text };
1511
1595
  }
1512
1596
  if (mimeType === "text/html") {
1513
- const raw = await readFile9(filePath, "utf8");
1597
+ const raw = await readFile8(filePath, "utf8");
1514
1598
  const extracted = extractHtmlToMarkdown(raw);
1515
1599
  return { title: extracted.title, markdown: `# ${extracted.title}
1516
1600
 
@@ -1566,8 +1650,8 @@ async function ingestFile({
1566
1650
  const lastChangedAt = previous?.contentHash === contentHash ? previous.lastChangedAt : now;
1567
1651
  const indexedAt = now;
1568
1652
  const crawledAt = now;
1569
- await mkdir7(resolve(workspacePath, "normalized"), { recursive: true });
1570
- await mkdir7(resolve(workspacePath, "raw", source.id), { recursive: true });
1653
+ await mkdir8(resolve(workspacePath, "normalized"), { recursive: true });
1654
+ await mkdir8(resolve(workspacePath, "raw", source.id), { recursive: true });
1571
1655
  if (extracted.raw) {
1572
1656
  await writeFile6(rawPath, extracted.raw, "utf8");
1573
1657
  }
@@ -1632,7 +1716,7 @@ ${content}`;
1632
1716
  const now = (/* @__PURE__ */ new Date()).toISOString();
1633
1717
  const lastChangedAt = previous?.contentHash === contentHash ? previous.lastChangedAt : now;
1634
1718
  const indexedAt = now;
1635
- await mkdir7(resolve(workspacePath, "normalized"), { recursive: true });
1719
+ await mkdir8(resolve(workspacePath, "normalized"), { recursive: true });
1636
1720
  await writeNormalizedDocument({
1637
1721
  documentId,
1638
1722
  sourceId: source.id,
@@ -1676,7 +1760,7 @@ async function reprocessStoredDocument(document, source) {
1676
1760
  if (!document.rawPath) {
1677
1761
  return null;
1678
1762
  }
1679
- const raw = await readFile9(document.rawPath, "utf8");
1763
+ const raw = await readFile8(document.rawPath, "utf8");
1680
1764
  const fallbackTitle = document.title || basename(document.uri);
1681
1765
  const extracted = await extractRawContent(raw, document.mimeType, fallbackTitle);
1682
1766
  const contentHash = sha256(extracted.markdown);
@@ -1793,7 +1877,7 @@ async function parseRssFeedDocument(xml, source) {
1793
1877
  }
1794
1878
 
1795
1879
  // src/ingest/adapters/url-adapter.ts
1796
- import { mkdir as mkdir8, readFile as readFile10, writeFile as writeFile7 } from "fs/promises";
1880
+ import { mkdir as mkdir9, readFile as readFile9, writeFile as writeFile7 } from "fs/promises";
1797
1881
  import path16 from "path";
1798
1882
 
1799
1883
  // src/core/urls.ts
@@ -1845,7 +1929,7 @@ ${extracted.markdown}`;
1845
1929
  const indexedAt = now;
1846
1930
  const crawledAt = now;
1847
1931
  const resolvedPublicationDate = choosePublicationDate(publicationDate, extractPublicationDateFromHtml(body), previous?.publicationDate);
1848
- await mkdir8(path16.resolve(workspacePath, "raw", source.id), { recursive: true });
1932
+ await mkdir9(path16.resolve(workspacePath, "raw", source.id), { recursive: true });
1849
1933
  await writeFile7(rawPath, body, "utf8");
1850
1934
  await writeNormalizedDocument({
1851
1935
  documentId,
@@ -1966,7 +2050,7 @@ async function reprocessRemoteDocument(document, source) {
1966
2050
  if (!document.rawPath || !await fileExists(document.rawPath)) {
1967
2051
  return null;
1968
2052
  }
1969
- const raw = await readFile10(document.rawPath, "utf8");
2053
+ const raw = await readFile9(document.rawPath, "utf8");
1970
2054
  const extracted = extractHtmlToMarkdown(raw);
1971
2055
  const markdown = `# ${extracted.title}
1972
2056
 
@@ -2684,7 +2768,7 @@ async function discoverWebsiteFeed(websiteUrl, userAgent) {
2684
2768
  }
2685
2769
 
2686
2770
  // src/query/search-service.ts
2687
- import { readFile as readFile11 } from "fs/promises";
2771
+ import { readFile as readFile10 } from "fs/promises";
2688
2772
  import { BoolQuery, MatchQuery, OP, TermQuery, reciprocalRankFusion } from "@tryformation/querylight-ts";
2689
2773
  import path18 from "path";
2690
2774
  async function loadHydratedIndex(workspacePath) {
@@ -2914,7 +2998,7 @@ async function buildSnippetWithAdjacentChunks(chunk, query, {
2914
2998
  if (!await fileExists(document.normalizedPath)) {
2915
2999
  return buildSnippet(chunk.text, query);
2916
3000
  }
2917
- const raw = await readFile11(document.normalizedPath, "utf8");
3001
+ const raw = await readFile10(document.normalizedPath, "utf8");
2918
3002
  orderedChunks = buildChunksForDocument(document, raw, config);
2919
3003
  orderedChunkCache.set(document.id, orderedChunks);
2920
3004
  }
@@ -4210,7 +4294,7 @@ Examples:
4210
4294
  try {
4211
4295
  const meta = await readLatestIndexMetadata(workspace);
4212
4296
  latestIndex = meta.createdAt;
4213
- indexSize = (await stat4(`${workspace}/indexes/latest.json`)).size;
4297
+ indexSize = (await stat4(await resolveLatestIndexArtifactPath(workspace))).size;
4214
4298
  } catch {
4215
4299
  latestIndex = void 0;
4216
4300
  }
@@ -1,5 +1,5 @@
1
1
  export declare const PACKAGE_NAME = "@tryformation/querylight-cli";
2
- export declare const PACKAGE_VERSION = "0.2.0";
2
+ export declare const PACKAGE_VERSION = "0.2.1";
3
3
  export declare const DEFAULT_WORKSPACE = ".kb";
4
4
  export declare const DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
5
5
  export declare const LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
@@ -0,0 +1,3 @@
1
+ export declare function writeGzipJson(filePath: string, value: unknown): Promise<void>;
2
+ export declare function readJsonFromGzipOrFile<T>(gzipPath: string, legacyPath?: string): Promise<T>;
3
+ export declare function resolveExistingGzipOrFilePath(gzipPath: string, legacyPath?: string): Promise<string>;
@@ -1,4 +1,6 @@
1
1
  import type { IndexMetadata } from "../types/models.js";
2
+ export declare function latestIndexPath(workspacePath: string): string;
3
+ export declare function latestMetaPath(workspacePath: string): string;
2
4
  export declare function writeIndexArtifacts({ workspacePath, indexState, metadata }: {
3
5
  workspacePath: string;
4
6
  indexState: object;
@@ -9,3 +11,4 @@ export declare function writeIndexArtifacts({ workspacePath, indexState, metadat
9
11
  }>;
10
12
  export declare function readLatestIndexState(workspacePath: string): Promise<object>;
11
13
  export declare function readLatestIndexMetadata(workspacePath: string): Promise<IndexMetadata>;
14
+ export declare function resolveLatestIndexArtifactPath(workspacePath: string): Promise<string>;
package/dist/index.js CHANGED
@@ -1885,8 +1885,31 @@ async function getDenseTransformersRuntime(cacheDir) {
1885
1885
  }
1886
1886
 
1887
1887
  // src/vector/store.ts
1888
- import { mkdir as mkdir6, readFile as readFile9, writeFile as writeFile6 } from "fs/promises";
1888
+ import { mkdir as mkdir6, rm as rm2, writeFile as writeFile7 } from "fs/promises";
1889
1889
  import path13 from "path";
1890
+
1891
+ // src/core/gzip-json.ts
1892
+ import { readFile as readFile9, writeFile as writeFile6 } from "fs/promises";
1893
+ import { promisify } from "util";
1894
+ import { gunzip, gzip } from "zlib";
1895
+ var gzipAsync = promisify(gzip);
1896
+ var gunzipAsync = promisify(gunzip);
1897
+ async function writeGzipJson(filePath, value) {
1898
+ const payload = JSON.stringify(value, null, 2);
1899
+ await writeFile6(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
1900
+ }
1901
+ async function readJsonFromGzipOrFile(gzipPath, legacyPath) {
1902
+ if (await fileExists(gzipPath)) {
1903
+ const payload = await readFile9(gzipPath);
1904
+ return JSON.parse((await gunzipAsync(payload)).toString("utf8"));
1905
+ }
1906
+ if (legacyPath && await fileExists(legacyPath)) {
1907
+ return JSON.parse(await readFile9(legacyPath, "utf8"));
1908
+ }
1909
+ return JSON.parse(await readFile9(gzipPath, "utf8"));
1910
+ }
1911
+
1912
+ // src/vector/store.ts
1890
1913
  function vectorsDir(workspacePath) {
1891
1914
  return path13.join(workspacePath, "vectors");
1892
1915
  }
@@ -1894,15 +1917,27 @@ function sharedModelStateDir() {
1894
1917
  return path13.join(resolveQliHomeDir(), "models", "status");
1895
1918
  }
1896
1919
  function denseVectorPath(workspacePath) {
1897
- return path13.join(vectorsDir(workspacePath), "dense.latest.json");
1920
+ return path13.join(vectorsDir(workspacePath), "dense.latest.json.gz");
1898
1921
  }
1899
1922
  function denseMetaPath(workspacePath) {
1900
- return path13.join(vectorsDir(workspacePath), "dense.latest.meta.json");
1923
+ return path13.join(vectorsDir(workspacePath), "dense.latest.meta.json.gz");
1901
1924
  }
1902
1925
  function sparseVectorPath(workspacePath) {
1903
- return path13.join(vectorsDir(workspacePath), "sparse.latest.json");
1926
+ return path13.join(vectorsDir(workspacePath), "sparse.latest.json.gz");
1904
1927
  }
1905
1928
  function sparseMetaPath(workspacePath) {
1929
+ return path13.join(vectorsDir(workspacePath), "sparse.latest.meta.json.gz");
1930
+ }
1931
+ function legacyDenseVectorPath(workspacePath) {
1932
+ return path13.join(vectorsDir(workspacePath), "dense.latest.json");
1933
+ }
1934
+ function legacyDenseMetaPath(workspacePath) {
1935
+ return path13.join(vectorsDir(workspacePath), "dense.latest.meta.json");
1936
+ }
1937
+ function legacySparseVectorPath(workspacePath) {
1938
+ return path13.join(vectorsDir(workspacePath), "sparse.latest.json");
1939
+ }
1940
+ function legacySparseMetaPath(workspacePath) {
1906
1941
  return path13.join(vectorsDir(workspacePath), "sparse.latest.meta.json");
1907
1942
  }
1908
1943
  function pullMarkerPath(type, workspacePath, modelId, cacheDir) {
@@ -1918,19 +1953,27 @@ function sparsePullMarker(workspacePath, modelId, cacheDir) {
1918
1953
  }
1919
1954
  async function writeDensePayload(workspacePath, payload) {
1920
1955
  await mkdir6(vectorsDir(workspacePath), { recursive: true });
1921
- await writeFile6(denseVectorPath(workspacePath), JSON.stringify(payload, null, 2), "utf8");
1922
- await writeFile6(denseMetaPath(workspacePath), JSON.stringify(payload.metadata, null, 2), "utf8");
1956
+ await writeGzipJson(denseVectorPath(workspacePath), payload);
1957
+ await writeGzipJson(denseMetaPath(workspacePath), payload.metadata);
1958
+ await Promise.all([
1959
+ rm2(legacyDenseVectorPath(workspacePath), { force: true }),
1960
+ rm2(legacyDenseMetaPath(workspacePath), { force: true })
1961
+ ]);
1923
1962
  }
1924
1963
  async function readDensePayload(workspacePath) {
1925
- return JSON.parse(await readFile9(denseVectorPath(workspacePath), "utf8"));
1964
+ return readJsonFromGzipOrFile(denseVectorPath(workspacePath), legacyDenseVectorPath(workspacePath));
1926
1965
  }
1927
1966
  async function writeSparsePayload(workspacePath, payload) {
1928
1967
  await mkdir6(vectorsDir(workspacePath), { recursive: true });
1929
- await writeFile6(sparseVectorPath(workspacePath), JSON.stringify(payload, null, 2), "utf8");
1930
- await writeFile6(sparseMetaPath(workspacePath), JSON.stringify(payload.metadata, null, 2), "utf8");
1968
+ await writeGzipJson(sparseVectorPath(workspacePath), payload);
1969
+ await writeGzipJson(sparseMetaPath(workspacePath), payload.metadata);
1970
+ await Promise.all([
1971
+ rm2(legacySparseVectorPath(workspacePath), { force: true }),
1972
+ rm2(legacySparseMetaPath(workspacePath), { force: true })
1973
+ ]);
1931
1974
  }
1932
1975
  async function readSparsePayload(workspacePath) {
1933
- return JSON.parse(await readFile9(sparseVectorPath(workspacePath), "utf8"));
1976
+ return readJsonFromGzipOrFile(sparseVectorPath(workspacePath), legacySparseVectorPath(workspacePath));
1934
1977
  }
1935
1978
  async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
1936
1979
  const denseCacheDir = resolveCacheDir(workspacePath, dense.cacheDir);
@@ -1941,7 +1984,7 @@ async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
1941
1984
  modelId: dense.modelId,
1942
1985
  cacheDir: denseCacheDir,
1943
1986
  available: await fileExists(densePullMarker(workspacePath, dense.modelId, dense.cacheDir)),
1944
- artifactExists: await fileExists(denseVectorPath(workspacePath))
1987
+ artifactExists: await fileExists(denseVectorPath(workspacePath)) || await fileExists(legacyDenseVectorPath(workspacePath))
1945
1988
  },
1946
1989
  sparse: {
1947
1990
  configured: sparse.enabled,
@@ -1949,7 +1992,7 @@ async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
1949
1992
  cacheDir: sparseCacheDir,
1950
1993
  uvAvailable,
1951
1994
  available: await fileExists(sparsePullMarker(workspacePath, sparse.modelId, sparse.cacheDir)),
1952
- artifactExists: await fileExists(sparseVectorPath(workspacePath))
1995
+ artifactExists: await fileExists(sparseVectorPath(workspacePath)) || await fileExists(legacySparseVectorPath(workspacePath))
1953
1996
  }
1954
1997
  };
1955
1998
  }
@@ -2266,28 +2309,57 @@ async function buildVectorArtifacts({
2266
2309
  }
2267
2310
 
2268
2311
  // src/index/index-store.ts
2269
- import { readFile as readFile10, writeFile as writeFile7 } from "fs/promises";
2312
+ import { mkdir as mkdir9, rm as rm3 } from "fs/promises";
2270
2313
  import path16 from "path";
2314
+ function versionedIndexPath(workspacePath, stamp) {
2315
+ return path16.join(workspacePath, "indexes", `${stamp}.json.gz`);
2316
+ }
2317
+ function versionedLegacyIndexPath(workspacePath, stamp) {
2318
+ return path16.join(workspacePath, "indexes", `${stamp}.json`);
2319
+ }
2320
+ function versionedMetaPath(workspacePath, stamp) {
2321
+ return path16.join(workspacePath, "indexes", `${stamp}.meta.json.gz`);
2322
+ }
2323
+ function versionedLegacyMetaPath(workspacePath, stamp) {
2324
+ return path16.join(workspacePath, "indexes", `${stamp}.meta.json`);
2325
+ }
2326
+ function latestIndexPath(workspacePath) {
2327
+ return path16.join(workspacePath, "indexes", "latest.json.gz");
2328
+ }
2329
+ function legacyLatestIndexPath(workspacePath) {
2330
+ return path16.join(workspacePath, "indexes", "latest.json");
2331
+ }
2332
+ function latestMetaPath(workspacePath) {
2333
+ return path16.join(workspacePath, "indexes", "latest.meta.json.gz");
2334
+ }
2335
+ function legacyLatestMetaPath(workspacePath) {
2336
+ return path16.join(workspacePath, "indexes", "latest.meta.json");
2337
+ }
2271
2338
  async function writeIndexArtifacts({
2272
2339
  workspacePath,
2273
2340
  indexState,
2274
2341
  metadata
2275
2342
  }) {
2276
2343
  const stamp = metadata.createdAt.replace(/[:.]/g, "-");
2277
- const indexPath = path16.join(workspacePath, "indexes", `${stamp}.json`);
2278
- const metaPath = path16.join(workspacePath, "indexes", `${stamp}.meta.json`);
2279
- const latestIndexPath = path16.join(workspacePath, "indexes", "latest.json");
2280
- const latestMetaPath = path16.join(workspacePath, "indexes", "latest.meta.json");
2281
- const indexPayload = JSON.stringify(indexState, null, 2);
2282
- const metaPayload = JSON.stringify(metadata, null, 2);
2283
- await writeFile7(indexPath, indexPayload, "utf8");
2284
- await writeFile7(metaPath, metaPayload, "utf8");
2285
- await writeFile7(latestIndexPath, indexPayload, "utf8");
2286
- await writeFile7(latestMetaPath, metaPayload, "utf8");
2287
- return { indexPath: latestIndexPath, metadataPath: latestMetaPath };
2344
+ const indexPath = versionedIndexPath(workspacePath, stamp);
2345
+ const metaPath = versionedMetaPath(workspacePath, stamp);
2346
+ const latestIndexArtifactPath = latestIndexPath(workspacePath);
2347
+ const latestMetadataArtifactPath = latestMetaPath(workspacePath);
2348
+ await mkdir9(path16.join(workspacePath, "indexes"), { recursive: true });
2349
+ await writeGzipJson(indexPath, indexState);
2350
+ await writeGzipJson(metaPath, metadata);
2351
+ await writeGzipJson(latestIndexArtifactPath, indexState);
2352
+ await writeGzipJson(latestMetadataArtifactPath, metadata);
2353
+ await Promise.all([
2354
+ rm3(legacyLatestIndexPath(workspacePath), { force: true }),
2355
+ rm3(legacyLatestMetaPath(workspacePath), { force: true }),
2356
+ rm3(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
2357
+ rm3(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
2358
+ ]);
2359
+ return { indexPath: latestIndexArtifactPath, metadataPath: latestMetadataArtifactPath };
2288
2360
  }
2289
2361
  async function readLatestIndexState(workspacePath) {
2290
- return JSON.parse(await readFile10(path16.join(workspacePath, "indexes", "latest.json"), "utf8"));
2362
+ return readJsonFromGzipOrFile(latestIndexPath(workspacePath), legacyLatestIndexPath(workspacePath));
2291
2363
  }
2292
2364
 
2293
2365
  // src/index/querylight-indexer.ts
@@ -2387,7 +2459,7 @@ async function buildIndex({
2387
2459
  }
2388
2460
 
2389
2461
  // src/query/search-service.ts
2390
- import { readFile as readFile11 } from "fs/promises";
2462
+ import { readFile as readFile10 } from "fs/promises";
2391
2463
  import { BoolQuery, MatchQuery, OP, TermQuery, reciprocalRankFusion } from "@tryformation/querylight-ts";
2392
2464
  import path18 from "path";
2393
2465
  async function loadHydratedIndex(workspacePath) {
@@ -2617,7 +2689,7 @@ async function buildSnippetWithAdjacentChunks(chunk, query, {
2617
2689
  if (!await fileExists(document.normalizedPath)) {
2618
2690
  return buildSnippet(chunk.text, query);
2619
2691
  }
2620
- const raw = await readFile11(document.normalizedPath, "utf8");
2692
+ const raw = await readFile10(document.normalizedPath, "utf8");
2621
2693
  orderedChunks = buildChunksForDocument(document, raw, config);
2622
2694
  orderedChunkCache.set(document.id, orderedChunks);
2623
2695
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tryformation/querylight-cli",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "description": "Querylight CLI for building and querying local knowledge bases.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/formation-res/querylight-cli#readme",