@tryformation/querylight-cli 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/cli/main.js +169 -76
- package/dist/core/constants.d.ts +1 -1
- package/dist/core/gzip-json.d.ts +3 -0
- package/dist/index/index-store.d.ts +3 -0
- package/dist/index.js +130 -49
- package/dist/vector/runtime.d.ts +8 -0
- package/package.json +1 -1
- package/scripts/sparse-encode.py +5 -1
package/README.md
CHANGED
package/dist/cli/main.js
CHANGED
|
@@ -16,7 +16,7 @@ import path from "path";
|
|
|
16
16
|
import YAML from "yaml";
|
|
17
17
|
|
|
18
18
|
// src/core/constants.ts
|
|
19
|
-
var PACKAGE_VERSION = "0.2.
|
|
19
|
+
var PACKAGE_VERSION = "0.2.1";
|
|
20
20
|
var DEFAULT_WORKSPACE = ".kb";
|
|
21
21
|
var DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
|
|
22
22
|
var LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
|
|
@@ -387,6 +387,7 @@ import os from "os";
|
|
|
387
387
|
import path6 from "path";
|
|
388
388
|
import { fileURLToPath } from "url";
|
|
389
389
|
import { execFile, execFileSync } from "child_process";
|
|
390
|
+
import { mkdtemp, rm, writeFile as writeFile3 } from "fs/promises";
|
|
390
391
|
|
|
391
392
|
// src/core/files.ts
|
|
392
393
|
import { stat as stat2 } from "fs/promises";
|
|
@@ -400,6 +401,7 @@ async function fileExists(filePath) {
|
|
|
400
401
|
}
|
|
401
402
|
|
|
402
403
|
// src/vector/runtime.ts
|
|
404
|
+
var sparseExecFileSync = execFileSync;
|
|
403
405
|
function resolveQliHomeDir() {
|
|
404
406
|
return path6.resolve(process.env.QLI_HOME ?? path6.join(os.homedir(), ".qli"));
|
|
405
407
|
}
|
|
@@ -455,29 +457,36 @@ async function runSparsePython({
|
|
|
455
457
|
}) {
|
|
456
458
|
const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
|
|
457
459
|
const scriptPath = await sparseScriptPath(importMetaUrl);
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
"
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
460
|
+
const payloadDir = await mkdtemp(path6.join(os.tmpdir(), "qli-sparse-"));
|
|
461
|
+
const payloadPath = path6.join(payloadDir, "payload.json");
|
|
462
|
+
await writeFile3(payloadPath, JSON.stringify(payload), "utf8");
|
|
463
|
+
try {
|
|
464
|
+
return sparseExecFileSync(
|
|
465
|
+
"uv",
|
|
466
|
+
[
|
|
467
|
+
"run",
|
|
468
|
+
"--with",
|
|
469
|
+
"torch",
|
|
470
|
+
"--with",
|
|
471
|
+
"transformers",
|
|
472
|
+
"--with",
|
|
473
|
+
"huggingface_hub",
|
|
474
|
+
"python",
|
|
475
|
+
scriptPath,
|
|
476
|
+
payloadPath
|
|
477
|
+
],
|
|
478
|
+
{
|
|
479
|
+
encoding: "utf8",
|
|
480
|
+
maxBuffer: 1024 * 1024 * 1024,
|
|
481
|
+
env: {
|
|
482
|
+
...process.env,
|
|
483
|
+
HF_HOME: cacheDir
|
|
484
|
+
}
|
|
478
485
|
}
|
|
479
|
-
|
|
480
|
-
|
|
486
|
+
);
|
|
487
|
+
} finally {
|
|
488
|
+
await rm(payloadDir, { recursive: true, force: true });
|
|
489
|
+
}
|
|
481
490
|
}
|
|
482
491
|
async function getDenseTransformersRuntime(cacheDir) {
|
|
483
492
|
const transformers = await import("@huggingface/transformers");
|
|
@@ -490,8 +499,40 @@ async function getDenseTransformersRuntime(cacheDir) {
|
|
|
490
499
|
}
|
|
491
500
|
|
|
492
501
|
// src/vector/store.ts
|
|
493
|
-
import { mkdir as mkdir3,
|
|
502
|
+
import { mkdir as mkdir3, rm as rm2, writeFile as writeFile5 } from "fs/promises";
|
|
494
503
|
import path7 from "path";
|
|
504
|
+
|
|
505
|
+
// src/core/gzip-json.ts
|
|
506
|
+
import { readFile as readFile4, writeFile as writeFile4 } from "fs/promises";
|
|
507
|
+
import { promisify } from "util";
|
|
508
|
+
import { gunzip, gzip } from "zlib";
|
|
509
|
+
var gzipAsync = promisify(gzip);
|
|
510
|
+
var gunzipAsync = promisify(gunzip);
|
|
511
|
+
async function writeGzipJson(filePath, value) {
|
|
512
|
+
const payload = JSON.stringify(value, null, 2);
|
|
513
|
+
await writeFile4(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
|
|
514
|
+
}
|
|
515
|
+
async function readJsonFromGzipOrFile(gzipPath, legacyPath) {
|
|
516
|
+
if (await fileExists(gzipPath)) {
|
|
517
|
+
const payload = await readFile4(gzipPath);
|
|
518
|
+
return JSON.parse((await gunzipAsync(payload)).toString("utf8"));
|
|
519
|
+
}
|
|
520
|
+
if (legacyPath && await fileExists(legacyPath)) {
|
|
521
|
+
return JSON.parse(await readFile4(legacyPath, "utf8"));
|
|
522
|
+
}
|
|
523
|
+
return JSON.parse(await readFile4(gzipPath, "utf8"));
|
|
524
|
+
}
|
|
525
|
+
async function resolveExistingGzipOrFilePath(gzipPath, legacyPath) {
|
|
526
|
+
if (await fileExists(gzipPath)) {
|
|
527
|
+
return gzipPath;
|
|
528
|
+
}
|
|
529
|
+
if (legacyPath && await fileExists(legacyPath)) {
|
|
530
|
+
return legacyPath;
|
|
531
|
+
}
|
|
532
|
+
return gzipPath;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// src/vector/store.ts
|
|
495
536
|
function vectorsDir(workspacePath) {
|
|
496
537
|
return path7.join(workspacePath, "vectors");
|
|
497
538
|
}
|
|
@@ -499,15 +540,27 @@ function sharedModelStateDir() {
|
|
|
499
540
|
return path7.join(resolveQliHomeDir(), "models", "status");
|
|
500
541
|
}
|
|
501
542
|
function denseVectorPath(workspacePath) {
|
|
502
|
-
return path7.join(vectorsDir(workspacePath), "dense.latest.json");
|
|
543
|
+
return path7.join(vectorsDir(workspacePath), "dense.latest.json.gz");
|
|
503
544
|
}
|
|
504
545
|
function denseMetaPath(workspacePath) {
|
|
505
|
-
return path7.join(vectorsDir(workspacePath), "dense.latest.meta.json");
|
|
546
|
+
return path7.join(vectorsDir(workspacePath), "dense.latest.meta.json.gz");
|
|
506
547
|
}
|
|
507
548
|
function sparseVectorPath(workspacePath) {
|
|
508
|
-
return path7.join(vectorsDir(workspacePath), "sparse.latest.json");
|
|
549
|
+
return path7.join(vectorsDir(workspacePath), "sparse.latest.json.gz");
|
|
509
550
|
}
|
|
510
551
|
function sparseMetaPath(workspacePath) {
|
|
552
|
+
return path7.join(vectorsDir(workspacePath), "sparse.latest.meta.json.gz");
|
|
553
|
+
}
|
|
554
|
+
function legacyDenseVectorPath(workspacePath) {
|
|
555
|
+
return path7.join(vectorsDir(workspacePath), "dense.latest.json");
|
|
556
|
+
}
|
|
557
|
+
function legacyDenseMetaPath(workspacePath) {
|
|
558
|
+
return path7.join(vectorsDir(workspacePath), "dense.latest.meta.json");
|
|
559
|
+
}
|
|
560
|
+
function legacySparseVectorPath(workspacePath) {
|
|
561
|
+
return path7.join(vectorsDir(workspacePath), "sparse.latest.json");
|
|
562
|
+
}
|
|
563
|
+
function legacySparseMetaPath(workspacePath) {
|
|
511
564
|
return path7.join(vectorsDir(workspacePath), "sparse.latest.meta.json");
|
|
512
565
|
}
|
|
513
566
|
function pullMarkerPath(type, workspacePath, modelId, cacheDir) {
|
|
@@ -523,29 +576,37 @@ function sparsePullMarker(workspacePath, modelId, cacheDir) {
|
|
|
523
576
|
}
|
|
524
577
|
async function writeDensePayload(workspacePath, payload) {
|
|
525
578
|
await mkdir3(vectorsDir(workspacePath), { recursive: true });
|
|
526
|
-
await
|
|
527
|
-
await
|
|
579
|
+
await writeGzipJson(denseVectorPath(workspacePath), payload);
|
|
580
|
+
await writeGzipJson(denseMetaPath(workspacePath), payload.metadata);
|
|
581
|
+
await Promise.all([
|
|
582
|
+
rm2(legacyDenseVectorPath(workspacePath), { force: true }),
|
|
583
|
+
rm2(legacyDenseMetaPath(workspacePath), { force: true })
|
|
584
|
+
]);
|
|
528
585
|
}
|
|
529
586
|
async function readDensePayload(workspacePath) {
|
|
530
|
-
return
|
|
587
|
+
return readJsonFromGzipOrFile(denseVectorPath(workspacePath), legacyDenseVectorPath(workspacePath));
|
|
531
588
|
}
|
|
532
589
|
async function writeSparsePayload(workspacePath, payload) {
|
|
533
590
|
await mkdir3(vectorsDir(workspacePath), { recursive: true });
|
|
534
|
-
await
|
|
535
|
-
await
|
|
591
|
+
await writeGzipJson(sparseVectorPath(workspacePath), payload);
|
|
592
|
+
await writeGzipJson(sparseMetaPath(workspacePath), payload.metadata);
|
|
593
|
+
await Promise.all([
|
|
594
|
+
rm2(legacySparseVectorPath(workspacePath), { force: true }),
|
|
595
|
+
rm2(legacySparseMetaPath(workspacePath), { force: true })
|
|
596
|
+
]);
|
|
536
597
|
}
|
|
537
598
|
async function readSparsePayload(workspacePath) {
|
|
538
|
-
return
|
|
599
|
+
return readJsonFromGzipOrFile(sparseVectorPath(workspacePath), legacySparseVectorPath(workspacePath));
|
|
539
600
|
}
|
|
540
601
|
async function writeDensePullMarker(workspacePath, model, value) {
|
|
541
602
|
const markerPath = densePullMarker(workspacePath, model.modelId, model.cacheDir);
|
|
542
603
|
await mkdir3(path7.dirname(markerPath), { recursive: true });
|
|
543
|
-
await
|
|
604
|
+
await writeFile5(markerPath, JSON.stringify(value, null, 2), "utf8");
|
|
544
605
|
}
|
|
545
606
|
async function writeSparsePullMarker(workspacePath, model, value) {
|
|
546
607
|
const markerPath = sparsePullMarker(workspacePath, model.modelId, model.cacheDir);
|
|
547
608
|
await mkdir3(path7.dirname(markerPath), { recursive: true });
|
|
548
|
-
await
|
|
609
|
+
await writeFile5(markerPath, JSON.stringify(value, null, 2), "utf8");
|
|
549
610
|
}
|
|
550
611
|
async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
551
612
|
const denseCacheDir = resolveCacheDir(workspacePath, dense.cacheDir);
|
|
@@ -556,7 +617,7 @@ async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
|
556
617
|
modelId: dense.modelId,
|
|
557
618
|
cacheDir: denseCacheDir,
|
|
558
619
|
available: await fileExists(densePullMarker(workspacePath, dense.modelId, dense.cacheDir)),
|
|
559
|
-
artifactExists: await fileExists(denseVectorPath(workspacePath))
|
|
620
|
+
artifactExists: await fileExists(denseVectorPath(workspacePath)) || await fileExists(legacyDenseVectorPath(workspacePath))
|
|
560
621
|
},
|
|
561
622
|
sparse: {
|
|
562
623
|
configured: sparse.enabled,
|
|
@@ -564,7 +625,7 @@ async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
|
564
625
|
cacheDir: sparseCacheDir,
|
|
565
626
|
uvAvailable,
|
|
566
627
|
available: await fileExists(sparsePullMarker(workspacePath, sparse.modelId, sparse.cacheDir)),
|
|
567
|
-
artifactExists: await fileExists(sparseVectorPath(workspacePath))
|
|
628
|
+
artifactExists: await fileExists(sparseVectorPath(workspacePath)) || await fileExists(legacySparseVectorPath(workspacePath))
|
|
568
629
|
}
|
|
569
630
|
};
|
|
570
631
|
}
|
|
@@ -963,31 +1024,63 @@ async function getModelStatus(workspacePath, config) {
|
|
|
963
1024
|
}
|
|
964
1025
|
|
|
965
1026
|
// src/index/index-store.ts
|
|
966
|
-
import {
|
|
1027
|
+
import { mkdir as mkdir6, rm as rm3 } from "fs/promises";
|
|
967
1028
|
import path10 from "path";
|
|
1029
|
+
function versionedIndexPath(workspacePath, stamp) {
|
|
1030
|
+
return path10.join(workspacePath, "indexes", `${stamp}.json.gz`);
|
|
1031
|
+
}
|
|
1032
|
+
function versionedLegacyIndexPath(workspacePath, stamp) {
|
|
1033
|
+
return path10.join(workspacePath, "indexes", `${stamp}.json`);
|
|
1034
|
+
}
|
|
1035
|
+
function versionedMetaPath(workspacePath, stamp) {
|
|
1036
|
+
return path10.join(workspacePath, "indexes", `${stamp}.meta.json.gz`);
|
|
1037
|
+
}
|
|
1038
|
+
function versionedLegacyMetaPath(workspacePath, stamp) {
|
|
1039
|
+
return path10.join(workspacePath, "indexes", `${stamp}.meta.json`);
|
|
1040
|
+
}
|
|
1041
|
+
function latestIndexPath(workspacePath) {
|
|
1042
|
+
return path10.join(workspacePath, "indexes", "latest.json.gz");
|
|
1043
|
+
}
|
|
1044
|
+
function legacyLatestIndexPath(workspacePath) {
|
|
1045
|
+
return path10.join(workspacePath, "indexes", "latest.json");
|
|
1046
|
+
}
|
|
1047
|
+
function latestMetaPath(workspacePath) {
|
|
1048
|
+
return path10.join(workspacePath, "indexes", "latest.meta.json.gz");
|
|
1049
|
+
}
|
|
1050
|
+
function legacyLatestMetaPath(workspacePath) {
|
|
1051
|
+
return path10.join(workspacePath, "indexes", "latest.meta.json");
|
|
1052
|
+
}
|
|
968
1053
|
async function writeIndexArtifacts({
|
|
969
1054
|
workspacePath,
|
|
970
1055
|
indexState,
|
|
971
1056
|
metadata
|
|
972
1057
|
}) {
|
|
973
1058
|
const stamp = metadata.createdAt.replace(/[:.]/g, "-");
|
|
974
|
-
const indexPath =
|
|
975
|
-
const metaPath =
|
|
976
|
-
const
|
|
977
|
-
const
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
await
|
|
981
|
-
await
|
|
982
|
-
await
|
|
983
|
-
await
|
|
984
|
-
|
|
1059
|
+
const indexPath = versionedIndexPath(workspacePath, stamp);
|
|
1060
|
+
const metaPath = versionedMetaPath(workspacePath, stamp);
|
|
1061
|
+
const latestIndexArtifactPath = latestIndexPath(workspacePath);
|
|
1062
|
+
const latestMetadataArtifactPath = latestMetaPath(workspacePath);
|
|
1063
|
+
await mkdir6(path10.join(workspacePath, "indexes"), { recursive: true });
|
|
1064
|
+
await writeGzipJson(indexPath, indexState);
|
|
1065
|
+
await writeGzipJson(metaPath, metadata);
|
|
1066
|
+
await writeGzipJson(latestIndexArtifactPath, indexState);
|
|
1067
|
+
await writeGzipJson(latestMetadataArtifactPath, metadata);
|
|
1068
|
+
await Promise.all([
|
|
1069
|
+
rm3(legacyLatestIndexPath(workspacePath), { force: true }),
|
|
1070
|
+
rm3(legacyLatestMetaPath(workspacePath), { force: true }),
|
|
1071
|
+
rm3(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
|
|
1072
|
+
rm3(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
|
|
1073
|
+
]);
|
|
1074
|
+
return { indexPath: latestIndexArtifactPath, metadataPath: latestMetadataArtifactPath };
|
|
985
1075
|
}
|
|
986
1076
|
async function readLatestIndexState(workspacePath) {
|
|
987
|
-
return
|
|
1077
|
+
return readJsonFromGzipOrFile(latestIndexPath(workspacePath), legacyLatestIndexPath(workspacePath));
|
|
988
1078
|
}
|
|
989
1079
|
async function readLatestIndexMetadata(workspacePath) {
|
|
990
|
-
return
|
|
1080
|
+
return readJsonFromGzipOrFile(latestMetaPath(workspacePath), legacyLatestMetaPath(workspacePath));
|
|
1081
|
+
}
|
|
1082
|
+
async function resolveLatestIndexArtifactPath(workspacePath) {
|
|
1083
|
+
return resolveExistingGzipOrFilePath(latestIndexPath(workspacePath), legacyLatestIndexPath(workspacePath));
|
|
991
1084
|
}
|
|
992
1085
|
|
|
993
1086
|
// src/index/querylight-indexer.ts
|
|
@@ -1178,7 +1271,7 @@ async function removeSource(workspacePath, sourceId) {
|
|
|
1178
1271
|
}
|
|
1179
1272
|
|
|
1180
1273
|
// src/ingest/document-utils.ts
|
|
1181
|
-
import { mkdir as
|
|
1274
|
+
import { mkdir as mkdir7, rm as rm4, writeFile as writeFile6 } from "fs/promises";
|
|
1182
1275
|
import path14 from "path";
|
|
1183
1276
|
|
|
1184
1277
|
// src/normalize/normalize-markdown.ts
|
|
@@ -1231,8 +1324,8 @@ async function writeNormalizedDocument({
|
|
|
1231
1324
|
normalizedPath,
|
|
1232
1325
|
markdown
|
|
1233
1326
|
}) {
|
|
1234
|
-
await
|
|
1235
|
-
await
|
|
1327
|
+
await mkdir7(path14.dirname(normalizedPath), { recursive: true });
|
|
1328
|
+
await writeFile6(
|
|
1236
1329
|
normalizedPath,
|
|
1237
1330
|
withFrontmatter(
|
|
1238
1331
|
{
|
|
@@ -1254,8 +1347,8 @@ async function writeNormalizedDocument({
|
|
|
1254
1347
|
}
|
|
1255
1348
|
async function deleteDocumentArtifacts(document) {
|
|
1256
1349
|
await Promise.all([
|
|
1257
|
-
document.rawPath ?
|
|
1258
|
-
|
|
1350
|
+
document.rawPath ? rm4(document.rawPath, { force: true }) : Promise.resolve(),
|
|
1351
|
+
rm4(document.normalizedPath, { force: true })
|
|
1259
1352
|
]);
|
|
1260
1353
|
}
|
|
1261
1354
|
|
|
@@ -1279,7 +1372,7 @@ async function listDirectoryFiles(source) {
|
|
|
1279
1372
|
|
|
1280
1373
|
// src/ingest/adapters/file-adapter.ts
|
|
1281
1374
|
import { basename, extname, resolve } from "path";
|
|
1282
|
-
import { mkdir as
|
|
1375
|
+
import { mkdir as mkdir8, readFile as readFile8, stat as stat3, writeFile as writeFile7 } from "fs/promises";
|
|
1283
1376
|
|
|
1284
1377
|
// src/ingest/extractors/docx-extractor.ts
|
|
1285
1378
|
import mammoth from "mammoth";
|
|
@@ -1453,16 +1546,16 @@ function extractPublicationDateFromHtml(html) {
|
|
|
1453
1546
|
}
|
|
1454
1547
|
|
|
1455
1548
|
// src/ingest/extractors/markdown-extractor.ts
|
|
1456
|
-
import { readFile as
|
|
1549
|
+
import { readFile as readFile5 } from "fs/promises";
|
|
1457
1550
|
async function extractMarkdown(filePath) {
|
|
1458
|
-
return
|
|
1551
|
+
return readFile5(filePath, "utf8");
|
|
1459
1552
|
}
|
|
1460
1553
|
|
|
1461
1554
|
// src/ingest/extractors/pdf-extractor.ts
|
|
1462
|
-
import { readFile as
|
|
1555
|
+
import { readFile as readFile6 } from "fs/promises";
|
|
1463
1556
|
import { PDFParse } from "pdf-parse";
|
|
1464
1557
|
async function extractPdf(filePath) {
|
|
1465
|
-
const buffer = await
|
|
1558
|
+
const buffer = await readFile6(filePath);
|
|
1466
1559
|
const parser = new PDFParse({ data: buffer });
|
|
1467
1560
|
try {
|
|
1468
1561
|
const parsed = await parser.getText();
|
|
@@ -1473,9 +1566,9 @@ async function extractPdf(filePath) {
|
|
|
1473
1566
|
}
|
|
1474
1567
|
|
|
1475
1568
|
// src/ingest/extractors/text-extractor.ts
|
|
1476
|
-
import { readFile as
|
|
1569
|
+
import { readFile as readFile7 } from "fs/promises";
|
|
1477
1570
|
async function extractText(filePath) {
|
|
1478
|
-
return
|
|
1571
|
+
return readFile7(filePath, "utf8");
|
|
1479
1572
|
}
|
|
1480
1573
|
|
|
1481
1574
|
// src/ingest/adapters/file-adapter.ts
|
|
@@ -1510,7 +1603,7 @@ async function extractFileContent(filePath, mimeType) {
|
|
|
1510
1603
|
${text}`, raw: text };
|
|
1511
1604
|
}
|
|
1512
1605
|
if (mimeType === "text/html") {
|
|
1513
|
-
const raw = await
|
|
1606
|
+
const raw = await readFile8(filePath, "utf8");
|
|
1514
1607
|
const extracted = extractHtmlToMarkdown(raw);
|
|
1515
1608
|
return { title: extracted.title, markdown: `# ${extracted.title}
|
|
1516
1609
|
|
|
@@ -1566,10 +1659,10 @@ async function ingestFile({
|
|
|
1566
1659
|
const lastChangedAt = previous?.contentHash === contentHash ? previous.lastChangedAt : now;
|
|
1567
1660
|
const indexedAt = now;
|
|
1568
1661
|
const crawledAt = now;
|
|
1569
|
-
await
|
|
1570
|
-
await
|
|
1662
|
+
await mkdir8(resolve(workspacePath, "normalized"), { recursive: true });
|
|
1663
|
+
await mkdir8(resolve(workspacePath, "raw", source.id), { recursive: true });
|
|
1571
1664
|
if (extracted.raw) {
|
|
1572
|
-
await
|
|
1665
|
+
await writeFile7(rawPath, extracted.raw, "utf8");
|
|
1573
1666
|
}
|
|
1574
1667
|
await writeNormalizedDocument({
|
|
1575
1668
|
documentId,
|
|
@@ -1632,7 +1725,7 @@ ${content}`;
|
|
|
1632
1725
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1633
1726
|
const lastChangedAt = previous?.contentHash === contentHash ? previous.lastChangedAt : now;
|
|
1634
1727
|
const indexedAt = now;
|
|
1635
|
-
await
|
|
1728
|
+
await mkdir8(resolve(workspacePath, "normalized"), { recursive: true });
|
|
1636
1729
|
await writeNormalizedDocument({
|
|
1637
1730
|
documentId,
|
|
1638
1731
|
sourceId: source.id,
|
|
@@ -1676,7 +1769,7 @@ async function reprocessStoredDocument(document, source) {
|
|
|
1676
1769
|
if (!document.rawPath) {
|
|
1677
1770
|
return null;
|
|
1678
1771
|
}
|
|
1679
|
-
const raw = await
|
|
1772
|
+
const raw = await readFile8(document.rawPath, "utf8");
|
|
1680
1773
|
const fallbackTitle = document.title || basename(document.uri);
|
|
1681
1774
|
const extracted = await extractRawContent(raw, document.mimeType, fallbackTitle);
|
|
1682
1775
|
const contentHash = sha256(extracted.markdown);
|
|
@@ -1793,7 +1886,7 @@ async function parseRssFeedDocument(xml, source) {
|
|
|
1793
1886
|
}
|
|
1794
1887
|
|
|
1795
1888
|
// src/ingest/adapters/url-adapter.ts
|
|
1796
|
-
import { mkdir as
|
|
1889
|
+
import { mkdir as mkdir9, readFile as readFile9, writeFile as writeFile8 } from "fs/promises";
|
|
1797
1890
|
import path16 from "path";
|
|
1798
1891
|
|
|
1799
1892
|
// src/core/urls.ts
|
|
@@ -1845,8 +1938,8 @@ ${extracted.markdown}`;
|
|
|
1845
1938
|
const indexedAt = now;
|
|
1846
1939
|
const crawledAt = now;
|
|
1847
1940
|
const resolvedPublicationDate = choosePublicationDate(publicationDate, extractPublicationDateFromHtml(body), previous?.publicationDate);
|
|
1848
|
-
await
|
|
1849
|
-
await
|
|
1941
|
+
await mkdir9(path16.resolve(workspacePath, "raw", source.id), { recursive: true });
|
|
1942
|
+
await writeFile8(rawPath, body, "utf8");
|
|
1850
1943
|
await writeNormalizedDocument({
|
|
1851
1944
|
documentId,
|
|
1852
1945
|
sourceId: source.id,
|
|
@@ -1966,7 +2059,7 @@ async function reprocessRemoteDocument(document, source) {
|
|
|
1966
2059
|
if (!document.rawPath || !await fileExists(document.rawPath)) {
|
|
1967
2060
|
return null;
|
|
1968
2061
|
}
|
|
1969
|
-
const raw = await
|
|
2062
|
+
const raw = await readFile9(document.rawPath, "utf8");
|
|
1970
2063
|
const extracted = extractHtmlToMarkdown(raw);
|
|
1971
2064
|
const markdown = `# ${extracted.title}
|
|
1972
2065
|
|
|
@@ -2684,7 +2777,7 @@ async function discoverWebsiteFeed(websiteUrl, userAgent) {
|
|
|
2684
2777
|
}
|
|
2685
2778
|
|
|
2686
2779
|
// src/query/search-service.ts
|
|
2687
|
-
import { readFile as
|
|
2780
|
+
import { readFile as readFile10 } from "fs/promises";
|
|
2688
2781
|
import { BoolQuery, MatchQuery, OP, TermQuery, reciprocalRankFusion } from "@tryformation/querylight-ts";
|
|
2689
2782
|
import path18 from "path";
|
|
2690
2783
|
async function loadHydratedIndex(workspacePath) {
|
|
@@ -2914,7 +3007,7 @@ async function buildSnippetWithAdjacentChunks(chunk, query, {
|
|
|
2914
3007
|
if (!await fileExists(document.normalizedPath)) {
|
|
2915
3008
|
return buildSnippet(chunk.text, query);
|
|
2916
3009
|
}
|
|
2917
|
-
const raw = await
|
|
3010
|
+
const raw = await readFile10(document.normalizedPath, "utf8");
|
|
2918
3011
|
orderedChunks = buildChunksForDocument(document, raw, config);
|
|
2919
3012
|
orderedChunkCache.set(document.id, orderedChunks);
|
|
2920
3013
|
}
|
|
@@ -4210,7 +4303,7 @@ Examples:
|
|
|
4210
4303
|
try {
|
|
4211
4304
|
const meta = await readLatestIndexMetadata(workspace);
|
|
4212
4305
|
latestIndex = meta.createdAt;
|
|
4213
|
-
indexSize = (await stat4(
|
|
4306
|
+
indexSize = (await stat4(await resolveLatestIndexArtifactPath(workspace))).size;
|
|
4214
4307
|
} catch {
|
|
4215
4308
|
latestIndex = void 0;
|
|
4216
4309
|
}
|
package/dist/core/constants.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export declare const PACKAGE_NAME = "@tryformation/querylight-cli";
|
|
2
|
-
export declare const PACKAGE_VERSION = "0.2.
|
|
2
|
+
export declare const PACKAGE_VERSION = "0.2.1";
|
|
3
3
|
export declare const DEFAULT_WORKSPACE = ".kb";
|
|
4
4
|
export declare const DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
|
|
5
5
|
export declare const LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export declare function writeGzipJson(filePath: string, value: unknown): Promise<void>;
|
|
2
|
+
export declare function readJsonFromGzipOrFile<T>(gzipPath: string, legacyPath?: string): Promise<T>;
|
|
3
|
+
export declare function resolveExistingGzipOrFilePath(gzipPath: string, legacyPath?: string): Promise<string>;
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import type { IndexMetadata } from "../types/models.js";
|
|
2
|
+
export declare function latestIndexPath(workspacePath: string): string;
|
|
3
|
+
export declare function latestMetaPath(workspacePath: string): string;
|
|
2
4
|
export declare function writeIndexArtifacts({ workspacePath, indexState, metadata }: {
|
|
3
5
|
workspacePath: string;
|
|
4
6
|
indexState: object;
|
|
@@ -9,3 +11,4 @@ export declare function writeIndexArtifacts({ workspacePath, indexState, metadat
|
|
|
9
11
|
}>;
|
|
10
12
|
export declare function readLatestIndexState(workspacePath: string): Promise<object>;
|
|
11
13
|
export declare function readLatestIndexMetadata(workspacePath: string): Promise<IndexMetadata>;
|
|
14
|
+
export declare function resolveLatestIndexArtifactPath(workspacePath: string): Promise<string>;
|
package/dist/index.js
CHANGED
|
@@ -1795,6 +1795,8 @@ import os from "os";
|
|
|
1795
1795
|
import path12 from "path";
|
|
1796
1796
|
import { fileURLToPath } from "url";
|
|
1797
1797
|
import { execFile, execFileSync } from "child_process";
|
|
1798
|
+
import { mkdtemp, rm as rm2, writeFile as writeFile6 } from "fs/promises";
|
|
1799
|
+
var sparseExecFileSync = execFileSync;
|
|
1798
1800
|
function resolveQliHomeDir() {
|
|
1799
1801
|
return path12.resolve(process.env.QLI_HOME ?? path12.join(os.homedir(), ".qli"));
|
|
1800
1802
|
}
|
|
@@ -1850,29 +1852,36 @@ async function runSparsePython({
|
|
|
1850
1852
|
}) {
|
|
1851
1853
|
const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
|
|
1852
1854
|
const scriptPath = await sparseScriptPath(importMetaUrl);
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
"
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1855
|
+
const payloadDir = await mkdtemp(path12.join(os.tmpdir(), "qli-sparse-"));
|
|
1856
|
+
const payloadPath = path12.join(payloadDir, "payload.json");
|
|
1857
|
+
await writeFile6(payloadPath, JSON.stringify(payload), "utf8");
|
|
1858
|
+
try {
|
|
1859
|
+
return sparseExecFileSync(
|
|
1860
|
+
"uv",
|
|
1861
|
+
[
|
|
1862
|
+
"run",
|
|
1863
|
+
"--with",
|
|
1864
|
+
"torch",
|
|
1865
|
+
"--with",
|
|
1866
|
+
"transformers",
|
|
1867
|
+
"--with",
|
|
1868
|
+
"huggingface_hub",
|
|
1869
|
+
"python",
|
|
1870
|
+
scriptPath,
|
|
1871
|
+
payloadPath
|
|
1872
|
+
],
|
|
1873
|
+
{
|
|
1874
|
+
encoding: "utf8",
|
|
1875
|
+
maxBuffer: 1024 * 1024 * 1024,
|
|
1876
|
+
env: {
|
|
1877
|
+
...process.env,
|
|
1878
|
+
HF_HOME: cacheDir
|
|
1879
|
+
}
|
|
1873
1880
|
}
|
|
1874
|
-
|
|
1875
|
-
|
|
1881
|
+
);
|
|
1882
|
+
} finally {
|
|
1883
|
+
await rm2(payloadDir, { recursive: true, force: true });
|
|
1884
|
+
}
|
|
1876
1885
|
}
|
|
1877
1886
|
async function getDenseTransformersRuntime(cacheDir) {
|
|
1878
1887
|
const transformers = await import("@huggingface/transformers");
|
|
@@ -1885,8 +1894,31 @@ async function getDenseTransformersRuntime(cacheDir) {
|
|
|
1885
1894
|
}
|
|
1886
1895
|
|
|
1887
1896
|
// src/vector/store.ts
|
|
1888
|
-
import { mkdir as mkdir6,
|
|
1897
|
+
import { mkdir as mkdir6, rm as rm3, writeFile as writeFile8 } from "fs/promises";
|
|
1889
1898
|
import path13 from "path";
|
|
1899
|
+
|
|
1900
|
+
// src/core/gzip-json.ts
|
|
1901
|
+
import { readFile as readFile9, writeFile as writeFile7 } from "fs/promises";
|
|
1902
|
+
import { promisify } from "util";
|
|
1903
|
+
import { gunzip, gzip } from "zlib";
|
|
1904
|
+
var gzipAsync = promisify(gzip);
|
|
1905
|
+
var gunzipAsync = promisify(gunzip);
|
|
1906
|
+
async function writeGzipJson(filePath, value) {
|
|
1907
|
+
const payload = JSON.stringify(value, null, 2);
|
|
1908
|
+
await writeFile7(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
|
|
1909
|
+
}
|
|
1910
|
+
async function readJsonFromGzipOrFile(gzipPath, legacyPath) {
|
|
1911
|
+
if (await fileExists(gzipPath)) {
|
|
1912
|
+
const payload = await readFile9(gzipPath);
|
|
1913
|
+
return JSON.parse((await gunzipAsync(payload)).toString("utf8"));
|
|
1914
|
+
}
|
|
1915
|
+
if (legacyPath && await fileExists(legacyPath)) {
|
|
1916
|
+
return JSON.parse(await readFile9(legacyPath, "utf8"));
|
|
1917
|
+
}
|
|
1918
|
+
return JSON.parse(await readFile9(gzipPath, "utf8"));
|
|
1919
|
+
}
|
|
1920
|
+
|
|
1921
|
+
// src/vector/store.ts
|
|
1890
1922
|
function vectorsDir(workspacePath) {
|
|
1891
1923
|
return path13.join(workspacePath, "vectors");
|
|
1892
1924
|
}
|
|
@@ -1894,15 +1926,27 @@ function sharedModelStateDir() {
|
|
|
1894
1926
|
return path13.join(resolveQliHomeDir(), "models", "status");
|
|
1895
1927
|
}
|
|
1896
1928
|
function denseVectorPath(workspacePath) {
|
|
1897
|
-
return path13.join(vectorsDir(workspacePath), "dense.latest.json");
|
|
1929
|
+
return path13.join(vectorsDir(workspacePath), "dense.latest.json.gz");
|
|
1898
1930
|
}
|
|
1899
1931
|
function denseMetaPath(workspacePath) {
|
|
1900
|
-
return path13.join(vectorsDir(workspacePath), "dense.latest.meta.json");
|
|
1932
|
+
return path13.join(vectorsDir(workspacePath), "dense.latest.meta.json.gz");
|
|
1901
1933
|
}
|
|
1902
1934
|
function sparseVectorPath(workspacePath) {
|
|
1903
|
-
return path13.join(vectorsDir(workspacePath), "sparse.latest.json");
|
|
1935
|
+
return path13.join(vectorsDir(workspacePath), "sparse.latest.json.gz");
|
|
1904
1936
|
}
|
|
1905
1937
|
function sparseMetaPath(workspacePath) {
|
|
1938
|
+
return path13.join(vectorsDir(workspacePath), "sparse.latest.meta.json.gz");
|
|
1939
|
+
}
|
|
1940
|
+
function legacyDenseVectorPath(workspacePath) {
|
|
1941
|
+
return path13.join(vectorsDir(workspacePath), "dense.latest.json");
|
|
1942
|
+
}
|
|
1943
|
+
function legacyDenseMetaPath(workspacePath) {
|
|
1944
|
+
return path13.join(vectorsDir(workspacePath), "dense.latest.meta.json");
|
|
1945
|
+
}
|
|
1946
|
+
function legacySparseVectorPath(workspacePath) {
|
|
1947
|
+
return path13.join(vectorsDir(workspacePath), "sparse.latest.json");
|
|
1948
|
+
}
|
|
1949
|
+
function legacySparseMetaPath(workspacePath) {
|
|
1906
1950
|
return path13.join(vectorsDir(workspacePath), "sparse.latest.meta.json");
|
|
1907
1951
|
}
|
|
1908
1952
|
function pullMarkerPath(type, workspacePath, modelId, cacheDir) {
|
|
@@ -1918,19 +1962,27 @@ function sparsePullMarker(workspacePath, modelId, cacheDir) {
|
|
|
1918
1962
|
}
|
|
1919
1963
|
async function writeDensePayload(workspacePath, payload) {
|
|
1920
1964
|
await mkdir6(vectorsDir(workspacePath), { recursive: true });
|
|
1921
|
-
await
|
|
1922
|
-
await
|
|
1965
|
+
await writeGzipJson(denseVectorPath(workspacePath), payload);
|
|
1966
|
+
await writeGzipJson(denseMetaPath(workspacePath), payload.metadata);
|
|
1967
|
+
await Promise.all([
|
|
1968
|
+
rm3(legacyDenseVectorPath(workspacePath), { force: true }),
|
|
1969
|
+
rm3(legacyDenseMetaPath(workspacePath), { force: true })
|
|
1970
|
+
]);
|
|
1923
1971
|
}
|
|
1924
1972
|
async function readDensePayload(workspacePath) {
|
|
1925
|
-
return
|
|
1973
|
+
return readJsonFromGzipOrFile(denseVectorPath(workspacePath), legacyDenseVectorPath(workspacePath));
|
|
1926
1974
|
}
|
|
1927
1975
|
async function writeSparsePayload(workspacePath, payload) {
|
|
1928
1976
|
await mkdir6(vectorsDir(workspacePath), { recursive: true });
|
|
1929
|
-
await
|
|
1930
|
-
await
|
|
1977
|
+
await writeGzipJson(sparseVectorPath(workspacePath), payload);
|
|
1978
|
+
await writeGzipJson(sparseMetaPath(workspacePath), payload.metadata);
|
|
1979
|
+
await Promise.all([
|
|
1980
|
+
rm3(legacySparseVectorPath(workspacePath), { force: true }),
|
|
1981
|
+
rm3(legacySparseMetaPath(workspacePath), { force: true })
|
|
1982
|
+
]);
|
|
1931
1983
|
}
|
|
1932
1984
|
async function readSparsePayload(workspacePath) {
|
|
1933
|
-
return
|
|
1985
|
+
return readJsonFromGzipOrFile(sparseVectorPath(workspacePath), legacySparseVectorPath(workspacePath));
|
|
1934
1986
|
}
|
|
1935
1987
|
async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
1936
1988
|
const denseCacheDir = resolveCacheDir(workspacePath, dense.cacheDir);
|
|
@@ -1941,7 +1993,7 @@ async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
|
1941
1993
|
modelId: dense.modelId,
|
|
1942
1994
|
cacheDir: denseCacheDir,
|
|
1943
1995
|
available: await fileExists(densePullMarker(workspacePath, dense.modelId, dense.cacheDir)),
|
|
1944
|
-
artifactExists: await fileExists(denseVectorPath(workspacePath))
|
|
1996
|
+
artifactExists: await fileExists(denseVectorPath(workspacePath)) || await fileExists(legacyDenseVectorPath(workspacePath))
|
|
1945
1997
|
},
|
|
1946
1998
|
sparse: {
|
|
1947
1999
|
configured: sparse.enabled,
|
|
@@ -1949,7 +2001,7 @@ async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
|
|
|
1949
2001
|
cacheDir: sparseCacheDir,
|
|
1950
2002
|
uvAvailable,
|
|
1951
2003
|
available: await fileExists(sparsePullMarker(workspacePath, sparse.modelId, sparse.cacheDir)),
|
|
1952
|
-
artifactExists: await fileExists(sparseVectorPath(workspacePath))
|
|
2004
|
+
artifactExists: await fileExists(sparseVectorPath(workspacePath)) || await fileExists(legacySparseVectorPath(workspacePath))
|
|
1953
2005
|
}
|
|
1954
2006
|
};
|
|
1955
2007
|
}
|
|
@@ -2266,28 +2318,57 @@ async function buildVectorArtifacts({
|
|
|
2266
2318
|
}
|
|
2267
2319
|
|
|
2268
2320
|
// src/index/index-store.ts
|
|
2269
|
-
import {
|
|
2321
|
+
import { mkdir as mkdir9, rm as rm4 } from "fs/promises";
|
|
2270
2322
|
import path16 from "path";
|
|
2323
|
+
function versionedIndexPath(workspacePath, stamp) {
|
|
2324
|
+
return path16.join(workspacePath, "indexes", `${stamp}.json.gz`);
|
|
2325
|
+
}
|
|
2326
|
+
function versionedLegacyIndexPath(workspacePath, stamp) {
|
|
2327
|
+
return path16.join(workspacePath, "indexes", `${stamp}.json`);
|
|
2328
|
+
}
|
|
2329
|
+
function versionedMetaPath(workspacePath, stamp) {
|
|
2330
|
+
return path16.join(workspacePath, "indexes", `${stamp}.meta.json.gz`);
|
|
2331
|
+
}
|
|
2332
|
+
function versionedLegacyMetaPath(workspacePath, stamp) {
|
|
2333
|
+
return path16.join(workspacePath, "indexes", `${stamp}.meta.json`);
|
|
2334
|
+
}
|
|
2335
|
+
function latestIndexPath(workspacePath) {
|
|
2336
|
+
return path16.join(workspacePath, "indexes", "latest.json.gz");
|
|
2337
|
+
}
|
|
2338
|
+
function legacyLatestIndexPath(workspacePath) {
|
|
2339
|
+
return path16.join(workspacePath, "indexes", "latest.json");
|
|
2340
|
+
}
|
|
2341
|
+
function latestMetaPath(workspacePath) {
|
|
2342
|
+
return path16.join(workspacePath, "indexes", "latest.meta.json.gz");
|
|
2343
|
+
}
|
|
2344
|
+
function legacyLatestMetaPath(workspacePath) {
|
|
2345
|
+
return path16.join(workspacePath, "indexes", "latest.meta.json");
|
|
2346
|
+
}
|
|
2271
2347
|
async function writeIndexArtifacts({
|
|
2272
2348
|
workspacePath,
|
|
2273
2349
|
indexState,
|
|
2274
2350
|
metadata
|
|
2275
2351
|
}) {
|
|
2276
2352
|
const stamp = metadata.createdAt.replace(/[:.]/g, "-");
|
|
2277
|
-
const indexPath =
|
|
2278
|
-
const metaPath =
|
|
2279
|
-
const
|
|
2280
|
-
const
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
await
|
|
2284
|
-
await
|
|
2285
|
-
await
|
|
2286
|
-
await
|
|
2287
|
-
|
|
2353
|
+
const indexPath = versionedIndexPath(workspacePath, stamp);
|
|
2354
|
+
const metaPath = versionedMetaPath(workspacePath, stamp);
|
|
2355
|
+
const latestIndexArtifactPath = latestIndexPath(workspacePath);
|
|
2356
|
+
const latestMetadataArtifactPath = latestMetaPath(workspacePath);
|
|
2357
|
+
await mkdir9(path16.join(workspacePath, "indexes"), { recursive: true });
|
|
2358
|
+
await writeGzipJson(indexPath, indexState);
|
|
2359
|
+
await writeGzipJson(metaPath, metadata);
|
|
2360
|
+
await writeGzipJson(latestIndexArtifactPath, indexState);
|
|
2361
|
+
await writeGzipJson(latestMetadataArtifactPath, metadata);
|
|
2362
|
+
await Promise.all([
|
|
2363
|
+
rm4(legacyLatestIndexPath(workspacePath), { force: true }),
|
|
2364
|
+
rm4(legacyLatestMetaPath(workspacePath), { force: true }),
|
|
2365
|
+
rm4(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
|
|
2366
|
+
rm4(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
|
|
2367
|
+
]);
|
|
2368
|
+
return { indexPath: latestIndexArtifactPath, metadataPath: latestMetadataArtifactPath };
|
|
2288
2369
|
}
|
|
2289
2370
|
async function readLatestIndexState(workspacePath) {
|
|
2290
|
-
return
|
|
2371
|
+
return readJsonFromGzipOrFile(latestIndexPath(workspacePath), legacyLatestIndexPath(workspacePath));
|
|
2291
2372
|
}
|
|
2292
2373
|
|
|
2293
2374
|
// src/index/querylight-indexer.ts
|
|
@@ -2387,7 +2468,7 @@ async function buildIndex({
|
|
|
2387
2468
|
}
|
|
2388
2469
|
|
|
2389
2470
|
// src/query/search-service.ts
|
|
2390
|
-
import { readFile as
|
|
2471
|
+
import { readFile as readFile10 } from "fs/promises";
|
|
2391
2472
|
import { BoolQuery, MatchQuery, OP, TermQuery, reciprocalRankFusion } from "@tryformation/querylight-ts";
|
|
2392
2473
|
import path18 from "path";
|
|
2393
2474
|
async function loadHydratedIndex(workspacePath) {
|
|
@@ -2617,7 +2698,7 @@ async function buildSnippetWithAdjacentChunks(chunk, query, {
|
|
|
2617
2698
|
if (!await fileExists(document.normalizedPath)) {
|
|
2618
2699
|
return buildSnippet(chunk.text, query);
|
|
2619
2700
|
}
|
|
2620
|
-
const raw = await
|
|
2701
|
+
const raw = await readFile10(document.normalizedPath, "utf8");
|
|
2621
2702
|
orderedChunks = buildChunksForDocument(document, raw, config);
|
|
2622
2703
|
orderedChunkCache.set(document.id, orderedChunks);
|
|
2623
2704
|
}
|
package/dist/vector/runtime.d.ts
CHANGED
|
@@ -1,4 +1,11 @@
|
|
|
1
1
|
import type { SparseVectorModelConfig } from "../types/models.js";
|
|
2
|
+
type SparseExecOptions = {
|
|
3
|
+
encoding: BufferEncoding;
|
|
4
|
+
maxBuffer: number;
|
|
5
|
+
env: NodeJS.ProcessEnv;
|
|
6
|
+
};
|
|
7
|
+
type SparseExecFileSync = (file: string, args: string[], options: SparseExecOptions) => string;
|
|
8
|
+
export declare function setSparseExecFileSyncForTests(fn: SparseExecFileSync | null): void;
|
|
2
9
|
export declare function resolveQliHomeDir(): string;
|
|
3
10
|
export declare function resolveCacheDir(workspacePath: string, configuredPath: string): string;
|
|
4
11
|
export declare function packageRootFromImportMeta(importMetaUrl: string): string;
|
|
@@ -18,3 +25,4 @@ export declare function getDenseTransformersRuntime(cacheDir: string): Promise<{
|
|
|
18
25
|
};
|
|
19
26
|
pipeline: typeof import("@huggingface/transformers").pipeline;
|
|
20
27
|
}>;
|
|
28
|
+
export {};
|
package/package.json
CHANGED
package/scripts/sparse-encode.py
CHANGED
|
@@ -88,7 +88,11 @@ def encode_documents(model_id: str, top_tokens: int, documents):
|
|
|
88
88
|
|
|
89
89
|
|
|
90
90
|
def main():
|
|
91
|
-
|
|
91
|
+
if len(sys.argv) > 1:
|
|
92
|
+
with open(sys.argv[1], encoding="utf-8") as handle:
|
|
93
|
+
payload = json.load(handle)
|
|
94
|
+
else:
|
|
95
|
+
payload = json.load(sys.stdin)
|
|
92
96
|
action = payload["action"]
|
|
93
97
|
model_id = payload["model_id"]
|
|
94
98
|
if action == "download_only":
|