@tryformation/querylight-cli 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/cli/main.js +135 -51
- package/dist/core/constants.d.ts +1 -1
- package/dist/core/gzip-json.d.ts +3 -0
- package/dist/index/index-store.d.ts +3 -0
- package/dist/index.js +99 -27
- package/package.json +1 -1
package/README.md
CHANGED
package/dist/cli/main.js
CHANGED
|
@@ -16,7 +16,7 @@ import path from "path";
|
|
|
16
16
|
import YAML from "yaml";
|
|
17
17
|
|
|
18
18
|
// src/core/constants.ts
|
|
19
|
-
var PACKAGE_VERSION = "0.2.
|
|
19
|
+
var PACKAGE_VERSION = "0.2.1";
|
|
20
20
|
var DEFAULT_WORKSPACE = ".kb";
|
|
21
21
|
var DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
|
|
22
22
|
var LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
|
|
@@ -490,8 +490,40 @@ async function getDenseTransformersRuntime(cacheDir) {
|
|
|
490
490
|
}
|
|
491
491
|
|
|
492
492
|
// src/vector/store.ts
|
|
493
|
-
import { mkdir as mkdir3,
|
|
493
|
+
import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
|
|
494
494
|
import path7 from "path";
|
|
495
|
+
|
|
496
|
+
// src/core/gzip-json.ts
|
|
497
|
+
import { readFile as readFile4, writeFile as writeFile3 } from "fs/promises";
|
|
498
|
+
import { promisify } from "util";
|
|
499
|
+
import { gunzip, gzip } from "zlib";
|
|
500
|
+
var gzipAsync = promisify(gzip);
|
|
501
|
+
var gunzipAsync = promisify(gunzip);
|
|
502
|
+
async function writeGzipJson(filePath, value) {
|
|
503
|
+
const payload = JSON.stringify(value, null, 2);
|
|
504
|
+
await writeFile3(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
|
|
505
|
+
}
|
|
506
|
+
async function readJsonFromGzipOrFile(gzipPath, legacyPath) {
|
|
507
|
+
if (await fileExists(gzipPath)) {
|
|
508
|
+
const payload = await readFile4(gzipPath);
|
|
509
|
+
return JSON.parse((await gunzipAsync(payload)).toString("utf8"));
|
|
510
|
+
}
|
|
511
|
+
if (legacyPath && await fileExists(legacyPath)) {
|
|
512
|
+
return JSON.parse(await readFile4(legacyPath, "utf8"));
|
|
513
|
+
}
|
|
514
|
+
return JSON.parse(await readFile4(gzipPath, "utf8"));
|
|
515
|
+
}
|
|
516
|
+
async function resolveExistingGzipOrFilePath(gzipPath, legacyPath) {
|
|
517
|
+
if (await fileExists(gzipPath)) {
|
|
518
|
+
return gzipPath;
|
|
519
|
+
}
|
|
520
|
+
if (legacyPath && await fileExists(legacyPath)) {
|
|
521
|
+
return legacyPath;
|
|
522
|
+
}
|
|
523
|
+
return gzipPath;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
// src/vector/store.ts
|
|
495
527
|
function vectorsDir(workspacePath) {
|
|
496
528
|
return path7.join(workspacePath, "vectors");
|
|
497
529
|
}
|
|
@@ -499,15 +531,27 @@ function sharedModelStateDir() {
|
|
|
499
531
|
return path7.join(resolveQliHomeDir(), "models", "status");
|
|
500
532
|
}
|
|
501
533
|
function denseVectorPath(workspacePath) {
|
|
502
|
-
return path7.join(vectorsDir(workspacePath), "dense.latest.json");
|
|
534
|
+
return path7.join(vectorsDir(workspacePath), "dense.latest.json.gz");
|
|
503
535
|
}
|
|
504
536
|
function denseMetaPath(workspacePath) {
|
|
505
|
-
return path7.join(vectorsDir(workspacePath), "dense.latest.meta.json");
|
|
537
|
+
return path7.join(vectorsDir(workspacePath), "dense.latest.meta.json.gz");
|
|
506
538
|
}
|
|
507
539
|
function sparseVectorPath(workspacePath) {
|
|
508
|
-
return path7.join(vectorsDir(workspacePath), "sparse.latest.json");
|
|
540
|
+
return path7.join(vectorsDir(workspacePath), "sparse.latest.json.gz");
|
|
509
541
|
}
|
|
510
542
|
function sparseMetaPath(workspacePath) {
|
|
543
|
+
return path7.join(vectorsDir(workspacePath), "sparse.latest.meta.json.gz");
|
|
544
|
+
}
|
|
545
|
+
function legacyDenseVectorPath(workspacePath) {
|
|
546
|
+
return path7.join(vectorsDir(workspacePath), "dense.latest.json");
|
|
547
|
+
}
|
|
548
|
+
function legacyDenseMetaPath(workspacePath) {
|
|
549
|
+
return path7.join(vectorsDir(workspacePath), "dense.latest.meta.json");
|
|
550
|
+
}
|
|
551
|
+
function legacySparseVectorPath(workspacePath) {
|
|
552
|
+
return path7.join(vectorsDir(workspacePath), "sparse.latest.json");
|
|
553
|
+
}
|
|
554
|
+
function legacySparseMetaPath(workspacePath) {
|
|
511
555
|
return path7.join(vectorsDir(workspacePath), "sparse.latest.meta.json");
|
|
512
556
|
}
|
|
513
557
|
function pullMarkerPath(type, workspacePath, modelId, cacheDir) {
|
|
@@ -523,29 +567,37 @@ function sparsePullMarker(workspacePath, modelId, cacheDir) {
|
|
|
523
567
|
}
|
|
524
568
|
async function writeDensePayload(workspacePath, payload) {
|
|
525
569
|
await mkdir3(vectorsDir(workspacePath), { recursive: true });
|
|
526
|
-
await
|
|
527
|
-
await
|
|
570
|
+
await writeGzipJson(denseVectorPath(workspacePath), payload);
|
|
571
|
+
await writeGzipJson(denseMetaPath(workspacePath), payload.metadata);
|
|
572
|
+
await Promise.all([
|
|
573
|
+
rm(legacyDenseVectorPath(workspacePath), { force: true }),
|
|
574
|
+
rm(legacyDenseMetaPath(workspacePath), { force: true })
|
|
575
|
+
]);
|
|
528
576
|
}
|
|
529
577
|
async function readDensePayload(workspacePath) {
|
|
530
|
-
return
|
|
578
|
+
return readJsonFromGzipOrFile(denseVectorPath(workspacePath), legacyDenseVectorPath(workspacePath));
|
|
531
579
|
}
|
|
532
580
|
async function writeSparsePayload(workspacePath, payload) {
|
|
533
581
|
await mkdir3(vectorsDir(workspacePath), { recursive: true });
|
|
534
|
-
await
|
|
535
|
-
await
|
|
582
|
+
await writeGzipJson(sparseVectorPath(workspacePath), payload);
|
|
583
|
+
await writeGzipJson(sparseMetaPath(workspacePath), payload.metadata);
|
|
584
|
+
await Promise.all([
|
|
585
|
+
rm(legacySparseVectorPath(workspacePath), { force: true }),
|
|
586
|
+
rm(legacySparseMetaPath(workspacePath), { force: true })
|
|
587
|
+
]);
|
|
536
588
|
}
|
|
537
589
|
async function readSparsePayload(workspacePath) {
|
|
538
|
-
return
|
|
590
|
+
return readJsonFromGzipOrFile(sparseVectorPath(workspacePath), legacySparseVectorPath(workspacePath));
|
|
539
591
|
}
|
|
540
592
|
async function writeDensePullMarker(workspacePath, model, value) {
|
|
541
593
|
const markerPath = densePullMarker(workspacePath, model.modelId, model.cacheDir);
|
|
542
594
|
await mkdir3(path7.dirname(markerPath), { recursive: true });
|
|
543
|
-
await
|
|
595
|
+
await writeFile4(markerPath, JSON.stringify(value, null, 2), "utf8");
|
|
544
596
|
}
|
|
545
597
|
async function writeSparsePullMarker(workspacePath, model, value) {
|
|
546
598
|
const markerPath = sparsePullMarker(workspacePath, model.modelId, model.cacheDir);
|
|
547
599
|
await mkdir3(path7.dirname(markerPath), { recursive: true });
|
|
548
|
-
await
|
|
600
|
+
await writeFile4(markerPath, JSON.stringify(value, null, 2), "utf8");
|
|
549
601
|
}
|
|
550
602
|
async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
551
603
|
const denseCacheDir = resolveCacheDir(workspacePath, dense.cacheDir);
|
|
@@ -556,7 +608,7 @@ async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
|
556
608
|
modelId: dense.modelId,
|
|
557
609
|
cacheDir: denseCacheDir,
|
|
558
610
|
available: await fileExists(densePullMarker(workspacePath, dense.modelId, dense.cacheDir)),
|
|
559
|
-
artifactExists: await fileExists(denseVectorPath(workspacePath))
|
|
611
|
+
artifactExists: await fileExists(denseVectorPath(workspacePath)) || await fileExists(legacyDenseVectorPath(workspacePath))
|
|
560
612
|
},
|
|
561
613
|
sparse: {
|
|
562
614
|
configured: sparse.enabled,
|
|
@@ -564,7 +616,7 @@ async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
|
564
616
|
cacheDir: sparseCacheDir,
|
|
565
617
|
uvAvailable,
|
|
566
618
|
available: await fileExists(sparsePullMarker(workspacePath, sparse.modelId, sparse.cacheDir)),
|
|
567
|
-
artifactExists: await fileExists(sparseVectorPath(workspacePath))
|
|
619
|
+
artifactExists: await fileExists(sparseVectorPath(workspacePath)) || await fileExists(legacySparseVectorPath(workspacePath))
|
|
568
620
|
}
|
|
569
621
|
};
|
|
570
622
|
}
|
|
@@ -963,31 +1015,63 @@ async function getModelStatus(workspacePath, config) {
|
|
|
963
1015
|
}
|
|
964
1016
|
|
|
965
1017
|
// src/index/index-store.ts
|
|
966
|
-
import {
|
|
1018
|
+
import { mkdir as mkdir6, rm as rm2 } from "fs/promises";
|
|
967
1019
|
import path10 from "path";
|
|
1020
|
+
function versionedIndexPath(workspacePath, stamp) {
|
|
1021
|
+
return path10.join(workspacePath, "indexes", `${stamp}.json.gz`);
|
|
1022
|
+
}
|
|
1023
|
+
function versionedLegacyIndexPath(workspacePath, stamp) {
|
|
1024
|
+
return path10.join(workspacePath, "indexes", `${stamp}.json`);
|
|
1025
|
+
}
|
|
1026
|
+
function versionedMetaPath(workspacePath, stamp) {
|
|
1027
|
+
return path10.join(workspacePath, "indexes", `${stamp}.meta.json.gz`);
|
|
1028
|
+
}
|
|
1029
|
+
function versionedLegacyMetaPath(workspacePath, stamp) {
|
|
1030
|
+
return path10.join(workspacePath, "indexes", `${stamp}.meta.json`);
|
|
1031
|
+
}
|
|
1032
|
+
function latestIndexPath(workspacePath) {
|
|
1033
|
+
return path10.join(workspacePath, "indexes", "latest.json.gz");
|
|
1034
|
+
}
|
|
1035
|
+
function legacyLatestIndexPath(workspacePath) {
|
|
1036
|
+
return path10.join(workspacePath, "indexes", "latest.json");
|
|
1037
|
+
}
|
|
1038
|
+
function latestMetaPath(workspacePath) {
|
|
1039
|
+
return path10.join(workspacePath, "indexes", "latest.meta.json.gz");
|
|
1040
|
+
}
|
|
1041
|
+
function legacyLatestMetaPath(workspacePath) {
|
|
1042
|
+
return path10.join(workspacePath, "indexes", "latest.meta.json");
|
|
1043
|
+
}
|
|
968
1044
|
async function writeIndexArtifacts({
|
|
969
1045
|
workspacePath,
|
|
970
1046
|
indexState,
|
|
971
1047
|
metadata
|
|
972
1048
|
}) {
|
|
973
1049
|
const stamp = metadata.createdAt.replace(/[:.]/g, "-");
|
|
974
|
-
const indexPath =
|
|
975
|
-
const metaPath =
|
|
976
|
-
const
|
|
977
|
-
const
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
await
|
|
981
|
-
await
|
|
982
|
-
await
|
|
983
|
-
await
|
|
984
|
-
|
|
1050
|
+
const indexPath = versionedIndexPath(workspacePath, stamp);
|
|
1051
|
+
const metaPath = versionedMetaPath(workspacePath, stamp);
|
|
1052
|
+
const latestIndexArtifactPath = latestIndexPath(workspacePath);
|
|
1053
|
+
const latestMetadataArtifactPath = latestMetaPath(workspacePath);
|
|
1054
|
+
await mkdir6(path10.join(workspacePath, "indexes"), { recursive: true });
|
|
1055
|
+
await writeGzipJson(indexPath, indexState);
|
|
1056
|
+
await writeGzipJson(metaPath, metadata);
|
|
1057
|
+
await writeGzipJson(latestIndexArtifactPath, indexState);
|
|
1058
|
+
await writeGzipJson(latestMetadataArtifactPath, metadata);
|
|
1059
|
+
await Promise.all([
|
|
1060
|
+
rm2(legacyLatestIndexPath(workspacePath), { force: true }),
|
|
1061
|
+
rm2(legacyLatestMetaPath(workspacePath), { force: true }),
|
|
1062
|
+
rm2(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
|
|
1063
|
+
rm2(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
|
|
1064
|
+
]);
|
|
1065
|
+
return { indexPath: latestIndexArtifactPath, metadataPath: latestMetadataArtifactPath };
|
|
985
1066
|
}
|
|
986
1067
|
async function readLatestIndexState(workspacePath) {
|
|
987
|
-
return
|
|
1068
|
+
return readJsonFromGzipOrFile(latestIndexPath(workspacePath), legacyLatestIndexPath(workspacePath));
|
|
988
1069
|
}
|
|
989
1070
|
async function readLatestIndexMetadata(workspacePath) {
|
|
990
|
-
return
|
|
1071
|
+
return readJsonFromGzipOrFile(latestMetaPath(workspacePath), legacyLatestMetaPath(workspacePath));
|
|
1072
|
+
}
|
|
1073
|
+
async function resolveLatestIndexArtifactPath(workspacePath) {
|
|
1074
|
+
return resolveExistingGzipOrFilePath(latestIndexPath(workspacePath), legacyLatestIndexPath(workspacePath));
|
|
991
1075
|
}
|
|
992
1076
|
|
|
993
1077
|
// src/index/querylight-indexer.ts
|
|
@@ -1178,7 +1262,7 @@ async function removeSource(workspacePath, sourceId) {
|
|
|
1178
1262
|
}
|
|
1179
1263
|
|
|
1180
1264
|
// src/ingest/document-utils.ts
|
|
1181
|
-
import { mkdir as
|
|
1265
|
+
import { mkdir as mkdir7, rm as rm3, writeFile as writeFile5 } from "fs/promises";
|
|
1182
1266
|
import path14 from "path";
|
|
1183
1267
|
|
|
1184
1268
|
// src/normalize/normalize-markdown.ts
|
|
@@ -1231,7 +1315,7 @@ async function writeNormalizedDocument({
|
|
|
1231
1315
|
normalizedPath,
|
|
1232
1316
|
markdown
|
|
1233
1317
|
}) {
|
|
1234
|
-
await
|
|
1318
|
+
await mkdir7(path14.dirname(normalizedPath), { recursive: true });
|
|
1235
1319
|
await writeFile5(
|
|
1236
1320
|
normalizedPath,
|
|
1237
1321
|
withFrontmatter(
|
|
@@ -1254,8 +1338,8 @@ async function writeNormalizedDocument({
|
|
|
1254
1338
|
}
|
|
1255
1339
|
async function deleteDocumentArtifacts(document) {
|
|
1256
1340
|
await Promise.all([
|
|
1257
|
-
document.rawPath ?
|
|
1258
|
-
|
|
1341
|
+
document.rawPath ? rm3(document.rawPath, { force: true }) : Promise.resolve(),
|
|
1342
|
+
rm3(document.normalizedPath, { force: true })
|
|
1259
1343
|
]);
|
|
1260
1344
|
}
|
|
1261
1345
|
|
|
@@ -1279,7 +1363,7 @@ async function listDirectoryFiles(source) {
|
|
|
1279
1363
|
|
|
1280
1364
|
// src/ingest/adapters/file-adapter.ts
|
|
1281
1365
|
import { basename, extname, resolve } from "path";
|
|
1282
|
-
import { mkdir as
|
|
1366
|
+
import { mkdir as mkdir8, readFile as readFile8, stat as stat3, writeFile as writeFile6 } from "fs/promises";
|
|
1283
1367
|
|
|
1284
1368
|
// src/ingest/extractors/docx-extractor.ts
|
|
1285
1369
|
import mammoth from "mammoth";
|
|
@@ -1453,16 +1537,16 @@ function extractPublicationDateFromHtml(html) {
|
|
|
1453
1537
|
}
|
|
1454
1538
|
|
|
1455
1539
|
// src/ingest/extractors/markdown-extractor.ts
|
|
1456
|
-
import { readFile as
|
|
1540
|
+
import { readFile as readFile5 } from "fs/promises";
|
|
1457
1541
|
async function extractMarkdown(filePath) {
|
|
1458
|
-
return
|
|
1542
|
+
return readFile5(filePath, "utf8");
|
|
1459
1543
|
}
|
|
1460
1544
|
|
|
1461
1545
|
// src/ingest/extractors/pdf-extractor.ts
|
|
1462
|
-
import { readFile as
|
|
1546
|
+
import { readFile as readFile6 } from "fs/promises";
|
|
1463
1547
|
import { PDFParse } from "pdf-parse";
|
|
1464
1548
|
async function extractPdf(filePath) {
|
|
1465
|
-
const buffer = await
|
|
1549
|
+
const buffer = await readFile6(filePath);
|
|
1466
1550
|
const parser = new PDFParse({ data: buffer });
|
|
1467
1551
|
try {
|
|
1468
1552
|
const parsed = await parser.getText();
|
|
@@ -1473,9 +1557,9 @@ async function extractPdf(filePath) {
|
|
|
1473
1557
|
}
|
|
1474
1558
|
|
|
1475
1559
|
// src/ingest/extractors/text-extractor.ts
|
|
1476
|
-
import { readFile as
|
|
1560
|
+
import { readFile as readFile7 } from "fs/promises";
|
|
1477
1561
|
async function extractText(filePath) {
|
|
1478
|
-
return
|
|
1562
|
+
return readFile7(filePath, "utf8");
|
|
1479
1563
|
}
|
|
1480
1564
|
|
|
1481
1565
|
// src/ingest/adapters/file-adapter.ts
|
|
@@ -1510,7 +1594,7 @@ async function extractFileContent(filePath, mimeType) {
|
|
|
1510
1594
|
${text}`, raw: text };
|
|
1511
1595
|
}
|
|
1512
1596
|
if (mimeType === "text/html") {
|
|
1513
|
-
const raw = await
|
|
1597
|
+
const raw = await readFile8(filePath, "utf8");
|
|
1514
1598
|
const extracted = extractHtmlToMarkdown(raw);
|
|
1515
1599
|
return { title: extracted.title, markdown: `# ${extracted.title}
|
|
1516
1600
|
|
|
@@ -1566,8 +1650,8 @@ async function ingestFile({
|
|
|
1566
1650
|
const lastChangedAt = previous?.contentHash === contentHash ? previous.lastChangedAt : now;
|
|
1567
1651
|
const indexedAt = now;
|
|
1568
1652
|
const crawledAt = now;
|
|
1569
|
-
await
|
|
1570
|
-
await
|
|
1653
|
+
await mkdir8(resolve(workspacePath, "normalized"), { recursive: true });
|
|
1654
|
+
await mkdir8(resolve(workspacePath, "raw", source.id), { recursive: true });
|
|
1571
1655
|
if (extracted.raw) {
|
|
1572
1656
|
await writeFile6(rawPath, extracted.raw, "utf8");
|
|
1573
1657
|
}
|
|
@@ -1632,7 +1716,7 @@ ${content}`;
|
|
|
1632
1716
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1633
1717
|
const lastChangedAt = previous?.contentHash === contentHash ? previous.lastChangedAt : now;
|
|
1634
1718
|
const indexedAt = now;
|
|
1635
|
-
await
|
|
1719
|
+
await mkdir8(resolve(workspacePath, "normalized"), { recursive: true });
|
|
1636
1720
|
await writeNormalizedDocument({
|
|
1637
1721
|
documentId,
|
|
1638
1722
|
sourceId: source.id,
|
|
@@ -1676,7 +1760,7 @@ async function reprocessStoredDocument(document, source) {
|
|
|
1676
1760
|
if (!document.rawPath) {
|
|
1677
1761
|
return null;
|
|
1678
1762
|
}
|
|
1679
|
-
const raw = await
|
|
1763
|
+
const raw = await readFile8(document.rawPath, "utf8");
|
|
1680
1764
|
const fallbackTitle = document.title || basename(document.uri);
|
|
1681
1765
|
const extracted = await extractRawContent(raw, document.mimeType, fallbackTitle);
|
|
1682
1766
|
const contentHash = sha256(extracted.markdown);
|
|
@@ -1793,7 +1877,7 @@ async function parseRssFeedDocument(xml, source) {
|
|
|
1793
1877
|
}
|
|
1794
1878
|
|
|
1795
1879
|
// src/ingest/adapters/url-adapter.ts
|
|
1796
|
-
import { mkdir as
|
|
1880
|
+
import { mkdir as mkdir9, readFile as readFile9, writeFile as writeFile7 } from "fs/promises";
|
|
1797
1881
|
import path16 from "path";
|
|
1798
1882
|
|
|
1799
1883
|
// src/core/urls.ts
|
|
@@ -1845,7 +1929,7 @@ ${extracted.markdown}`;
|
|
|
1845
1929
|
const indexedAt = now;
|
|
1846
1930
|
const crawledAt = now;
|
|
1847
1931
|
const resolvedPublicationDate = choosePublicationDate(publicationDate, extractPublicationDateFromHtml(body), previous?.publicationDate);
|
|
1848
|
-
await
|
|
1932
|
+
await mkdir9(path16.resolve(workspacePath, "raw", source.id), { recursive: true });
|
|
1849
1933
|
await writeFile7(rawPath, body, "utf8");
|
|
1850
1934
|
await writeNormalizedDocument({
|
|
1851
1935
|
documentId,
|
|
@@ -1966,7 +2050,7 @@ async function reprocessRemoteDocument(document, source) {
|
|
|
1966
2050
|
if (!document.rawPath || !await fileExists(document.rawPath)) {
|
|
1967
2051
|
return null;
|
|
1968
2052
|
}
|
|
1969
|
-
const raw = await
|
|
2053
|
+
const raw = await readFile9(document.rawPath, "utf8");
|
|
1970
2054
|
const extracted = extractHtmlToMarkdown(raw);
|
|
1971
2055
|
const markdown = `# ${extracted.title}
|
|
1972
2056
|
|
|
@@ -2684,7 +2768,7 @@ async function discoverWebsiteFeed(websiteUrl, userAgent) {
|
|
|
2684
2768
|
}
|
|
2685
2769
|
|
|
2686
2770
|
// src/query/search-service.ts
|
|
2687
|
-
import { readFile as
|
|
2771
|
+
import { readFile as readFile10 } from "fs/promises";
|
|
2688
2772
|
import { BoolQuery, MatchQuery, OP, TermQuery, reciprocalRankFusion } from "@tryformation/querylight-ts";
|
|
2689
2773
|
import path18 from "path";
|
|
2690
2774
|
async function loadHydratedIndex(workspacePath) {
|
|
@@ -2914,7 +2998,7 @@ async function buildSnippetWithAdjacentChunks(chunk, query, {
|
|
|
2914
2998
|
if (!await fileExists(document.normalizedPath)) {
|
|
2915
2999
|
return buildSnippet(chunk.text, query);
|
|
2916
3000
|
}
|
|
2917
|
-
const raw = await
|
|
3001
|
+
const raw = await readFile10(document.normalizedPath, "utf8");
|
|
2918
3002
|
orderedChunks = buildChunksForDocument(document, raw, config);
|
|
2919
3003
|
orderedChunkCache.set(document.id, orderedChunks);
|
|
2920
3004
|
}
|
|
@@ -4210,7 +4294,7 @@ Examples:
|
|
|
4210
4294
|
try {
|
|
4211
4295
|
const meta = await readLatestIndexMetadata(workspace);
|
|
4212
4296
|
latestIndex = meta.createdAt;
|
|
4213
|
-
indexSize = (await stat4(
|
|
4297
|
+
indexSize = (await stat4(await resolveLatestIndexArtifactPath(workspace))).size;
|
|
4214
4298
|
} catch {
|
|
4215
4299
|
latestIndex = void 0;
|
|
4216
4300
|
}
|
package/dist/core/constants.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export declare const PACKAGE_NAME = "@tryformation/querylight-cli";
|
|
2
|
-
export declare const PACKAGE_VERSION = "0.2.
|
|
2
|
+
export declare const PACKAGE_VERSION = "0.2.1";
|
|
3
3
|
export declare const DEFAULT_WORKSPACE = ".kb";
|
|
4
4
|
export declare const DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
|
|
5
5
|
export declare const LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export declare function writeGzipJson(filePath: string, value: unknown): Promise<void>;
|
|
2
|
+
export declare function readJsonFromGzipOrFile<T>(gzipPath: string, legacyPath?: string): Promise<T>;
|
|
3
|
+
export declare function resolveExistingGzipOrFilePath(gzipPath: string, legacyPath?: string): Promise<string>;
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import type { IndexMetadata } from "../types/models.js";
|
|
2
|
+
export declare function latestIndexPath(workspacePath: string): string;
|
|
3
|
+
export declare function latestMetaPath(workspacePath: string): string;
|
|
2
4
|
export declare function writeIndexArtifacts({ workspacePath, indexState, metadata }: {
|
|
3
5
|
workspacePath: string;
|
|
4
6
|
indexState: object;
|
|
@@ -9,3 +11,4 @@ export declare function writeIndexArtifacts({ workspacePath, indexState, metadat
|
|
|
9
11
|
}>;
|
|
10
12
|
export declare function readLatestIndexState(workspacePath: string): Promise<object>;
|
|
11
13
|
export declare function readLatestIndexMetadata(workspacePath: string): Promise<IndexMetadata>;
|
|
14
|
+
export declare function resolveLatestIndexArtifactPath(workspacePath: string): Promise<string>;
|
package/dist/index.js
CHANGED
|
@@ -1885,8 +1885,31 @@ async function getDenseTransformersRuntime(cacheDir) {
|
|
|
1885
1885
|
}
|
|
1886
1886
|
|
|
1887
1887
|
// src/vector/store.ts
|
|
1888
|
-
import { mkdir as mkdir6,
|
|
1888
|
+
import { mkdir as mkdir6, rm as rm2, writeFile as writeFile7 } from "fs/promises";
|
|
1889
1889
|
import path13 from "path";
|
|
1890
|
+
|
|
1891
|
+
// src/core/gzip-json.ts
|
|
1892
|
+
import { readFile as readFile9, writeFile as writeFile6 } from "fs/promises";
|
|
1893
|
+
import { promisify } from "util";
|
|
1894
|
+
import { gunzip, gzip } from "zlib";
|
|
1895
|
+
var gzipAsync = promisify(gzip);
|
|
1896
|
+
var gunzipAsync = promisify(gunzip);
|
|
1897
|
+
async function writeGzipJson(filePath, value) {
|
|
1898
|
+
const payload = JSON.stringify(value, null, 2);
|
|
1899
|
+
await writeFile6(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
|
|
1900
|
+
}
|
|
1901
|
+
async function readJsonFromGzipOrFile(gzipPath, legacyPath) {
|
|
1902
|
+
if (await fileExists(gzipPath)) {
|
|
1903
|
+
const payload = await readFile9(gzipPath);
|
|
1904
|
+
return JSON.parse((await gunzipAsync(payload)).toString("utf8"));
|
|
1905
|
+
}
|
|
1906
|
+
if (legacyPath && await fileExists(legacyPath)) {
|
|
1907
|
+
return JSON.parse(await readFile9(legacyPath, "utf8"));
|
|
1908
|
+
}
|
|
1909
|
+
return JSON.parse(await readFile9(gzipPath, "utf8"));
|
|
1910
|
+
}
|
|
1911
|
+
|
|
1912
|
+
// src/vector/store.ts
|
|
1890
1913
|
function vectorsDir(workspacePath) {
|
|
1891
1914
|
return path13.join(workspacePath, "vectors");
|
|
1892
1915
|
}
|
|
@@ -1894,15 +1917,27 @@ function sharedModelStateDir() {
|
|
|
1894
1917
|
return path13.join(resolveQliHomeDir(), "models", "status");
|
|
1895
1918
|
}
|
|
1896
1919
|
function denseVectorPath(workspacePath) {
|
|
1897
|
-
return path13.join(vectorsDir(workspacePath), "dense.latest.json");
|
|
1920
|
+
return path13.join(vectorsDir(workspacePath), "dense.latest.json.gz");
|
|
1898
1921
|
}
|
|
1899
1922
|
function denseMetaPath(workspacePath) {
|
|
1900
|
-
return path13.join(vectorsDir(workspacePath), "dense.latest.meta.json");
|
|
1923
|
+
return path13.join(vectorsDir(workspacePath), "dense.latest.meta.json.gz");
|
|
1901
1924
|
}
|
|
1902
1925
|
function sparseVectorPath(workspacePath) {
|
|
1903
|
-
return path13.join(vectorsDir(workspacePath), "sparse.latest.json");
|
|
1926
|
+
return path13.join(vectorsDir(workspacePath), "sparse.latest.json.gz");
|
|
1904
1927
|
}
|
|
1905
1928
|
function sparseMetaPath(workspacePath) {
|
|
1929
|
+
return path13.join(vectorsDir(workspacePath), "sparse.latest.meta.json.gz");
|
|
1930
|
+
}
|
|
1931
|
+
function legacyDenseVectorPath(workspacePath) {
|
|
1932
|
+
return path13.join(vectorsDir(workspacePath), "dense.latest.json");
|
|
1933
|
+
}
|
|
1934
|
+
function legacyDenseMetaPath(workspacePath) {
|
|
1935
|
+
return path13.join(vectorsDir(workspacePath), "dense.latest.meta.json");
|
|
1936
|
+
}
|
|
1937
|
+
function legacySparseVectorPath(workspacePath) {
|
|
1938
|
+
return path13.join(vectorsDir(workspacePath), "sparse.latest.json");
|
|
1939
|
+
}
|
|
1940
|
+
function legacySparseMetaPath(workspacePath) {
|
|
1906
1941
|
return path13.join(vectorsDir(workspacePath), "sparse.latest.meta.json");
|
|
1907
1942
|
}
|
|
1908
1943
|
function pullMarkerPath(type, workspacePath, modelId, cacheDir) {
|
|
@@ -1918,19 +1953,27 @@ function sparsePullMarker(workspacePath, modelId, cacheDir) {
|
|
|
1918
1953
|
}
|
|
1919
1954
|
async function writeDensePayload(workspacePath, payload) {
|
|
1920
1955
|
await mkdir6(vectorsDir(workspacePath), { recursive: true });
|
|
1921
|
-
await
|
|
1922
|
-
await
|
|
1956
|
+
await writeGzipJson(denseVectorPath(workspacePath), payload);
|
|
1957
|
+
await writeGzipJson(denseMetaPath(workspacePath), payload.metadata);
|
|
1958
|
+
await Promise.all([
|
|
1959
|
+
rm2(legacyDenseVectorPath(workspacePath), { force: true }),
|
|
1960
|
+
rm2(legacyDenseMetaPath(workspacePath), { force: true })
|
|
1961
|
+
]);
|
|
1923
1962
|
}
|
|
1924
1963
|
async function readDensePayload(workspacePath) {
|
|
1925
|
-
return
|
|
1964
|
+
return readJsonFromGzipOrFile(denseVectorPath(workspacePath), legacyDenseVectorPath(workspacePath));
|
|
1926
1965
|
}
|
|
1927
1966
|
async function writeSparsePayload(workspacePath, payload) {
|
|
1928
1967
|
await mkdir6(vectorsDir(workspacePath), { recursive: true });
|
|
1929
|
-
await
|
|
1930
|
-
await
|
|
1968
|
+
await writeGzipJson(sparseVectorPath(workspacePath), payload);
|
|
1969
|
+
await writeGzipJson(sparseMetaPath(workspacePath), payload.metadata);
|
|
1970
|
+
await Promise.all([
|
|
1971
|
+
rm2(legacySparseVectorPath(workspacePath), { force: true }),
|
|
1972
|
+
rm2(legacySparseMetaPath(workspacePath), { force: true })
|
|
1973
|
+
]);
|
|
1931
1974
|
}
|
|
1932
1975
|
async function readSparsePayload(workspacePath) {
|
|
1933
|
-
return
|
|
1976
|
+
return readJsonFromGzipOrFile(sparseVectorPath(workspacePath), legacySparseVectorPath(workspacePath));
|
|
1934
1977
|
}
|
|
1935
1978
|
async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
1936
1979
|
const denseCacheDir = resolveCacheDir(workspacePath, dense.cacheDir);
|
|
@@ -1941,7 +1984,7 @@ async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
|
1941
1984
|
modelId: dense.modelId,
|
|
1942
1985
|
cacheDir: denseCacheDir,
|
|
1943
1986
|
available: await fileExists(densePullMarker(workspacePath, dense.modelId, dense.cacheDir)),
|
|
1944
|
-
artifactExists: await fileExists(denseVectorPath(workspacePath))
|
|
1987
|
+
artifactExists: await fileExists(denseVectorPath(workspacePath)) || await fileExists(legacyDenseVectorPath(workspacePath))
|
|
1945
1988
|
},
|
|
1946
1989
|
sparse: {
|
|
1947
1990
|
configured: sparse.enabled,
|
|
@@ -1949,7 +1992,7 @@ async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
|
1949
1992
|
cacheDir: sparseCacheDir,
|
|
1950
1993
|
uvAvailable,
|
|
1951
1994
|
available: await fileExists(sparsePullMarker(workspacePath, sparse.modelId, sparse.cacheDir)),
|
|
1952
|
-
artifactExists: await fileExists(sparseVectorPath(workspacePath))
|
|
1995
|
+
artifactExists: await fileExists(sparseVectorPath(workspacePath)) || await fileExists(legacySparseVectorPath(workspacePath))
|
|
1953
1996
|
}
|
|
1954
1997
|
};
|
|
1955
1998
|
}
|
|
@@ -2266,28 +2309,57 @@ async function buildVectorArtifacts({
|
|
|
2266
2309
|
}
|
|
2267
2310
|
|
|
2268
2311
|
// src/index/index-store.ts
|
|
2269
|
-
import {
|
|
2312
|
+
import { mkdir as mkdir9, rm as rm3 } from "fs/promises";
|
|
2270
2313
|
import path16 from "path";
|
|
2314
|
+
function versionedIndexPath(workspacePath, stamp) {
|
|
2315
|
+
return path16.join(workspacePath, "indexes", `${stamp}.json.gz`);
|
|
2316
|
+
}
|
|
2317
|
+
function versionedLegacyIndexPath(workspacePath, stamp) {
|
|
2318
|
+
return path16.join(workspacePath, "indexes", `${stamp}.json`);
|
|
2319
|
+
}
|
|
2320
|
+
function versionedMetaPath(workspacePath, stamp) {
|
|
2321
|
+
return path16.join(workspacePath, "indexes", `${stamp}.meta.json.gz`);
|
|
2322
|
+
}
|
|
2323
|
+
function versionedLegacyMetaPath(workspacePath, stamp) {
|
|
2324
|
+
return path16.join(workspacePath, "indexes", `${stamp}.meta.json`);
|
|
2325
|
+
}
|
|
2326
|
+
function latestIndexPath(workspacePath) {
|
|
2327
|
+
return path16.join(workspacePath, "indexes", "latest.json.gz");
|
|
2328
|
+
}
|
|
2329
|
+
function legacyLatestIndexPath(workspacePath) {
|
|
2330
|
+
return path16.join(workspacePath, "indexes", "latest.json");
|
|
2331
|
+
}
|
|
2332
|
+
function latestMetaPath(workspacePath) {
|
|
2333
|
+
return path16.join(workspacePath, "indexes", "latest.meta.json.gz");
|
|
2334
|
+
}
|
|
2335
|
+
function legacyLatestMetaPath(workspacePath) {
|
|
2336
|
+
return path16.join(workspacePath, "indexes", "latest.meta.json");
|
|
2337
|
+
}
|
|
2271
2338
|
async function writeIndexArtifacts({
|
|
2272
2339
|
workspacePath,
|
|
2273
2340
|
indexState,
|
|
2274
2341
|
metadata
|
|
2275
2342
|
}) {
|
|
2276
2343
|
const stamp = metadata.createdAt.replace(/[:.]/g, "-");
|
|
2277
|
-
const indexPath =
|
|
2278
|
-
const metaPath =
|
|
2279
|
-
const
|
|
2280
|
-
const
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
await
|
|
2284
|
-
await
|
|
2285
|
-
await
|
|
2286
|
-
await
|
|
2287
|
-
|
|
2344
|
+
const indexPath = versionedIndexPath(workspacePath, stamp);
|
|
2345
|
+
const metaPath = versionedMetaPath(workspacePath, stamp);
|
|
2346
|
+
const latestIndexArtifactPath = latestIndexPath(workspacePath);
|
|
2347
|
+
const latestMetadataArtifactPath = latestMetaPath(workspacePath);
|
|
2348
|
+
await mkdir9(path16.join(workspacePath, "indexes"), { recursive: true });
|
|
2349
|
+
await writeGzipJson(indexPath, indexState);
|
|
2350
|
+
await writeGzipJson(metaPath, metadata);
|
|
2351
|
+
await writeGzipJson(latestIndexArtifactPath, indexState);
|
|
2352
|
+
await writeGzipJson(latestMetadataArtifactPath, metadata);
|
|
2353
|
+
await Promise.all([
|
|
2354
|
+
rm3(legacyLatestIndexPath(workspacePath), { force: true }),
|
|
2355
|
+
rm3(legacyLatestMetaPath(workspacePath), { force: true }),
|
|
2356
|
+
rm3(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
|
|
2357
|
+
rm3(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
|
|
2358
|
+
]);
|
|
2359
|
+
return { indexPath: latestIndexArtifactPath, metadataPath: latestMetadataArtifactPath };
|
|
2288
2360
|
}
|
|
2289
2361
|
async function readLatestIndexState(workspacePath) {
|
|
2290
|
-
return
|
|
2362
|
+
return readJsonFromGzipOrFile(latestIndexPath(workspacePath), legacyLatestIndexPath(workspacePath));
|
|
2291
2363
|
}
|
|
2292
2364
|
|
|
2293
2365
|
// src/index/querylight-indexer.ts
|
|
@@ -2387,7 +2459,7 @@ async function buildIndex({
|
|
|
2387
2459
|
}
|
|
2388
2460
|
|
|
2389
2461
|
// src/query/search-service.ts
|
|
2390
|
-
import { readFile as
|
|
2462
|
+
import { readFile as readFile10 } from "fs/promises";
|
|
2391
2463
|
import { BoolQuery, MatchQuery, OP, TermQuery, reciprocalRankFusion } from "@tryformation/querylight-ts";
|
|
2392
2464
|
import path18 from "path";
|
|
2393
2465
|
async function loadHydratedIndex(workspacePath) {
|
|
@@ -2617,7 +2689,7 @@ async function buildSnippetWithAdjacentChunks(chunk, query, {
|
|
|
2617
2689
|
if (!await fileExists(document.normalizedPath)) {
|
|
2618
2690
|
return buildSnippet(chunk.text, query);
|
|
2619
2691
|
}
|
|
2620
|
-
const raw = await
|
|
2692
|
+
const raw = await readFile10(document.normalizedPath, "utf8");
|
|
2621
2693
|
orderedChunks = buildChunksForDocument(document, raw, config);
|
|
2622
2694
|
orderedChunkCache.set(document.id, orderedChunks);
|
|
2623
2695
|
}
|
package/package.json
CHANGED