@tryformation/querylight-cli 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/main.js CHANGED
@@ -387,6 +387,7 @@ import os from "os";
387
387
  import path6 from "path";
388
388
  import { fileURLToPath } from "url";
389
389
  import { execFile, execFileSync } from "child_process";
390
+ import { mkdtemp, rm, writeFile as writeFile3 } from "fs/promises";
390
391
 
391
392
  // src/core/files.ts
392
393
  import { stat as stat2 } from "fs/promises";
@@ -400,6 +401,7 @@ async function fileExists(filePath) {
400
401
  }
401
402
 
402
403
  // src/vector/runtime.ts
404
+ var sparseExecFileSync = execFileSync;
403
405
  function resolveQliHomeDir() {
404
406
  return path6.resolve(process.env.QLI_HOME ?? path6.join(os.homedir(), ".qli"));
405
407
  }
@@ -455,29 +457,36 @@ async function runSparsePython({
455
457
  }) {
456
458
  const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
457
459
  const scriptPath = await sparseScriptPath(importMetaUrl);
458
- return execFileSync(
459
- "uv",
460
- [
461
- "run",
462
- "--with",
463
- "torch",
464
- "--with",
465
- "transformers",
466
- "--with",
467
- "huggingface_hub",
468
- "python",
469
- scriptPath
470
- ],
471
- {
472
- encoding: "utf8",
473
- maxBuffer: 1024 * 1024 * 1024,
474
- input: JSON.stringify(payload),
475
- env: {
476
- ...process.env,
477
- HF_HOME: cacheDir
460
+ const payloadDir = await mkdtemp(path6.join(os.tmpdir(), "qli-sparse-"));
461
+ const payloadPath = path6.join(payloadDir, "payload.json");
462
+ await writeFile3(payloadPath, JSON.stringify(payload), "utf8");
463
+ try {
464
+ return sparseExecFileSync(
465
+ "uv",
466
+ [
467
+ "run",
468
+ "--with",
469
+ "torch",
470
+ "--with",
471
+ "transformers",
472
+ "--with",
473
+ "huggingface_hub",
474
+ "python",
475
+ scriptPath,
476
+ payloadPath
477
+ ],
478
+ {
479
+ encoding: "utf8",
480
+ maxBuffer: 1024 * 1024 * 1024,
481
+ env: {
482
+ ...process.env,
483
+ HF_HOME: cacheDir
484
+ }
478
485
  }
479
- }
480
- );
486
+ );
487
+ } finally {
488
+ await rm(payloadDir, { recursive: true, force: true });
489
+ }
481
490
  }
482
491
  async function getDenseTransformersRuntime(cacheDir) {
483
492
  const transformers = await import("@huggingface/transformers");
@@ -490,18 +499,18 @@ async function getDenseTransformersRuntime(cacheDir) {
490
499
  }
491
500
 
492
501
  // src/vector/store.ts
493
- import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
502
+ import { mkdir as mkdir3, rm as rm2, writeFile as writeFile5 } from "fs/promises";
494
503
  import path7 from "path";
495
504
 
496
505
  // src/core/gzip-json.ts
497
- import { readFile as readFile4, writeFile as writeFile3 } from "fs/promises";
506
+ import { readFile as readFile4, writeFile as writeFile4 } from "fs/promises";
498
507
  import { promisify } from "util";
499
508
  import { gunzip, gzip } from "zlib";
500
509
  var gzipAsync = promisify(gzip);
501
510
  var gunzipAsync = promisify(gunzip);
502
511
  async function writeGzipJson(filePath, value) {
503
512
  const payload = JSON.stringify(value, null, 2);
504
- await writeFile3(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
513
+ await writeFile4(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
505
514
  }
506
515
  async function readJsonFromGzipOrFile(gzipPath, legacyPath) {
507
516
  if (await fileExists(gzipPath)) {
@@ -570,8 +579,8 @@ async function writeDensePayload(workspacePath, payload) {
570
579
  await writeGzipJson(denseVectorPath(workspacePath), payload);
571
580
  await writeGzipJson(denseMetaPath(workspacePath), payload.metadata);
572
581
  await Promise.all([
573
- rm(legacyDenseVectorPath(workspacePath), { force: true }),
574
- rm(legacyDenseMetaPath(workspacePath), { force: true })
582
+ rm2(legacyDenseVectorPath(workspacePath), { force: true }),
583
+ rm2(legacyDenseMetaPath(workspacePath), { force: true })
575
584
  ]);
576
585
  }
577
586
  async function readDensePayload(workspacePath) {
@@ -582,8 +591,8 @@ async function writeSparsePayload(workspacePath, payload) {
582
591
  await writeGzipJson(sparseVectorPath(workspacePath), payload);
583
592
  await writeGzipJson(sparseMetaPath(workspacePath), payload.metadata);
584
593
  await Promise.all([
585
- rm(legacySparseVectorPath(workspacePath), { force: true }),
586
- rm(legacySparseMetaPath(workspacePath), { force: true })
594
+ rm2(legacySparseVectorPath(workspacePath), { force: true }),
595
+ rm2(legacySparseMetaPath(workspacePath), { force: true })
587
596
  ]);
588
597
  }
589
598
  async function readSparsePayload(workspacePath) {
@@ -592,12 +601,12 @@ async function readSparsePayload(workspacePath) {
592
601
  async function writeDensePullMarker(workspacePath, model, value) {
593
602
  const markerPath = densePullMarker(workspacePath, model.modelId, model.cacheDir);
594
603
  await mkdir3(path7.dirname(markerPath), { recursive: true });
595
- await writeFile4(markerPath, JSON.stringify(value, null, 2), "utf8");
604
+ await writeFile5(markerPath, JSON.stringify(value, null, 2), "utf8");
596
605
  }
597
606
  async function writeSparsePullMarker(workspacePath, model, value) {
598
607
  const markerPath = sparsePullMarker(workspacePath, model.modelId, model.cacheDir);
599
608
  await mkdir3(path7.dirname(markerPath), { recursive: true });
600
- await writeFile4(markerPath, JSON.stringify(value, null, 2), "utf8");
609
+ await writeFile5(markerPath, JSON.stringify(value, null, 2), "utf8");
601
610
  }
602
611
  async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
603
612
  const denseCacheDir = resolveCacheDir(workspacePath, dense.cacheDir);
@@ -1015,7 +1024,7 @@ async function getModelStatus(workspacePath, config) {
1015
1024
  }
1016
1025
 
1017
1026
  // src/index/index-store.ts
1018
- import { mkdir as mkdir6, rm as rm2 } from "fs/promises";
1027
+ import { mkdir as mkdir6, rm as rm3 } from "fs/promises";
1019
1028
  import path10 from "path";
1020
1029
  function versionedIndexPath(workspacePath, stamp) {
1021
1030
  return path10.join(workspacePath, "indexes", `${stamp}.json.gz`);
@@ -1057,10 +1066,10 @@ async function writeIndexArtifacts({
1057
1066
  await writeGzipJson(latestIndexArtifactPath, indexState);
1058
1067
  await writeGzipJson(latestMetadataArtifactPath, metadata);
1059
1068
  await Promise.all([
1060
- rm2(legacyLatestIndexPath(workspacePath), { force: true }),
1061
- rm2(legacyLatestMetaPath(workspacePath), { force: true }),
1062
- rm2(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
1063
- rm2(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
1069
+ rm3(legacyLatestIndexPath(workspacePath), { force: true }),
1070
+ rm3(legacyLatestMetaPath(workspacePath), { force: true }),
1071
+ rm3(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
1072
+ rm3(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
1064
1073
  ]);
1065
1074
  return { indexPath: latestIndexArtifactPath, metadataPath: latestMetadataArtifactPath };
1066
1075
  }
@@ -1262,7 +1271,7 @@ async function removeSource(workspacePath, sourceId) {
1262
1271
  }
1263
1272
 
1264
1273
  // src/ingest/document-utils.ts
1265
- import { mkdir as mkdir7, rm as rm3, writeFile as writeFile5 } from "fs/promises";
1274
+ import { mkdir as mkdir7, rm as rm4, writeFile as writeFile6 } from "fs/promises";
1266
1275
  import path14 from "path";
1267
1276
 
1268
1277
  // src/normalize/normalize-markdown.ts
@@ -1316,7 +1325,7 @@ async function writeNormalizedDocument({
1316
1325
  markdown
1317
1326
  }) {
1318
1327
  await mkdir7(path14.dirname(normalizedPath), { recursive: true });
1319
- await writeFile5(
1328
+ await writeFile6(
1320
1329
  normalizedPath,
1321
1330
  withFrontmatter(
1322
1331
  {
@@ -1338,8 +1347,8 @@ async function writeNormalizedDocument({
1338
1347
  }
1339
1348
  async function deleteDocumentArtifacts(document) {
1340
1349
  await Promise.all([
1341
- document.rawPath ? rm3(document.rawPath, { force: true }) : Promise.resolve(),
1342
- rm3(document.normalizedPath, { force: true })
1350
+ document.rawPath ? rm4(document.rawPath, { force: true }) : Promise.resolve(),
1351
+ rm4(document.normalizedPath, { force: true })
1343
1352
  ]);
1344
1353
  }
1345
1354
 
@@ -1363,7 +1372,7 @@ async function listDirectoryFiles(source) {
1363
1372
 
1364
1373
  // src/ingest/adapters/file-adapter.ts
1365
1374
  import { basename, extname, resolve } from "path";
1366
- import { mkdir as mkdir8, readFile as readFile8, stat as stat3, writeFile as writeFile6 } from "fs/promises";
1375
+ import { mkdir as mkdir8, readFile as readFile8, stat as stat3, writeFile as writeFile7 } from "fs/promises";
1367
1376
 
1368
1377
  // src/ingest/extractors/docx-extractor.ts
1369
1378
  import mammoth from "mammoth";
@@ -1653,7 +1662,7 @@ async function ingestFile({
1653
1662
  await mkdir8(resolve(workspacePath, "normalized"), { recursive: true });
1654
1663
  await mkdir8(resolve(workspacePath, "raw", source.id), { recursive: true });
1655
1664
  if (extracted.raw) {
1656
- await writeFile6(rawPath, extracted.raw, "utf8");
1665
+ await writeFile7(rawPath, extracted.raw, "utf8");
1657
1666
  }
1658
1667
  await writeNormalizedDocument({
1659
1668
  documentId,
@@ -1877,7 +1886,7 @@ async function parseRssFeedDocument(xml, source) {
1877
1886
  }
1878
1887
 
1879
1888
  // src/ingest/adapters/url-adapter.ts
1880
- import { mkdir as mkdir9, readFile as readFile9, writeFile as writeFile7 } from "fs/promises";
1889
+ import { mkdir as mkdir9, readFile as readFile9, writeFile as writeFile8 } from "fs/promises";
1881
1890
  import path16 from "path";
1882
1891
 
1883
1892
  // src/core/urls.ts
@@ -1930,7 +1939,7 @@ ${extracted.markdown}`;
1930
1939
  const crawledAt = now;
1931
1940
  const resolvedPublicationDate = choosePublicationDate(publicationDate, extractPublicationDateFromHtml(body), previous?.publicationDate);
1932
1941
  await mkdir9(path16.resolve(workspacePath, "raw", source.id), { recursive: true });
1933
- await writeFile7(rawPath, body, "utf8");
1942
+ await writeFile8(rawPath, body, "utf8");
1934
1943
  await writeNormalizedDocument({
1935
1944
  documentId,
1936
1945
  sourceId: source.id,
package/dist/index.js CHANGED
@@ -1795,6 +1795,8 @@ import os from "os";
1795
1795
  import path12 from "path";
1796
1796
  import { fileURLToPath } from "url";
1797
1797
  import { execFile, execFileSync } from "child_process";
1798
+ import { mkdtemp, rm as rm2, writeFile as writeFile6 } from "fs/promises";
1799
+ var sparseExecFileSync = execFileSync;
1798
1800
  function resolveQliHomeDir() {
1799
1801
  return path12.resolve(process.env.QLI_HOME ?? path12.join(os.homedir(), ".qli"));
1800
1802
  }
@@ -1850,29 +1852,36 @@ async function runSparsePython({
1850
1852
  }) {
1851
1853
  const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
1852
1854
  const scriptPath = await sparseScriptPath(importMetaUrl);
1853
- return execFileSync(
1854
- "uv",
1855
- [
1856
- "run",
1857
- "--with",
1858
- "torch",
1859
- "--with",
1860
- "transformers",
1861
- "--with",
1862
- "huggingface_hub",
1863
- "python",
1864
- scriptPath
1865
- ],
1866
- {
1867
- encoding: "utf8",
1868
- maxBuffer: 1024 * 1024 * 1024,
1869
- input: JSON.stringify(payload),
1870
- env: {
1871
- ...process.env,
1872
- HF_HOME: cacheDir
1855
+ const payloadDir = await mkdtemp(path12.join(os.tmpdir(), "qli-sparse-"));
1856
+ const payloadPath = path12.join(payloadDir, "payload.json");
1857
+ await writeFile6(payloadPath, JSON.stringify(payload), "utf8");
1858
+ try {
1859
+ return sparseExecFileSync(
1860
+ "uv",
1861
+ [
1862
+ "run",
1863
+ "--with",
1864
+ "torch",
1865
+ "--with",
1866
+ "transformers",
1867
+ "--with",
1868
+ "huggingface_hub",
1869
+ "python",
1870
+ scriptPath,
1871
+ payloadPath
1872
+ ],
1873
+ {
1874
+ encoding: "utf8",
1875
+ maxBuffer: 1024 * 1024 * 1024,
1876
+ env: {
1877
+ ...process.env,
1878
+ HF_HOME: cacheDir
1879
+ }
1873
1880
  }
1874
- }
1875
- );
1881
+ );
1882
+ } finally {
1883
+ await rm2(payloadDir, { recursive: true, force: true });
1884
+ }
1876
1885
  }
1877
1886
  async function getDenseTransformersRuntime(cacheDir) {
1878
1887
  const transformers = await import("@huggingface/transformers");
@@ -1885,18 +1894,18 @@ async function getDenseTransformersRuntime(cacheDir) {
1885
1894
  }
1886
1895
 
1887
1896
  // src/vector/store.ts
1888
- import { mkdir as mkdir6, rm as rm2, writeFile as writeFile7 } from "fs/promises";
1897
+ import { mkdir as mkdir6, rm as rm3, writeFile as writeFile8 } from "fs/promises";
1889
1898
  import path13 from "path";
1890
1899
 
1891
1900
  // src/core/gzip-json.ts
1892
- import { readFile as readFile9, writeFile as writeFile6 } from "fs/promises";
1901
+ import { readFile as readFile9, writeFile as writeFile7 } from "fs/promises";
1893
1902
  import { promisify } from "util";
1894
1903
  import { gunzip, gzip } from "zlib";
1895
1904
  var gzipAsync = promisify(gzip);
1896
1905
  var gunzipAsync = promisify(gunzip);
1897
1906
  async function writeGzipJson(filePath, value) {
1898
1907
  const payload = JSON.stringify(value, null, 2);
1899
- await writeFile6(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
1908
+ await writeFile7(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
1900
1909
  }
1901
1910
  async function readJsonFromGzipOrFile(gzipPath, legacyPath) {
1902
1911
  if (await fileExists(gzipPath)) {
@@ -1956,8 +1965,8 @@ async function writeDensePayload(workspacePath, payload) {
1956
1965
  await writeGzipJson(denseVectorPath(workspacePath), payload);
1957
1966
  await writeGzipJson(denseMetaPath(workspacePath), payload.metadata);
1958
1967
  await Promise.all([
1959
- rm2(legacyDenseVectorPath(workspacePath), { force: true }),
1960
- rm2(legacyDenseMetaPath(workspacePath), { force: true })
1968
+ rm3(legacyDenseVectorPath(workspacePath), { force: true }),
1969
+ rm3(legacyDenseMetaPath(workspacePath), { force: true })
1961
1970
  ]);
1962
1971
  }
1963
1972
  async function readDensePayload(workspacePath) {
@@ -1968,8 +1977,8 @@ async function writeSparsePayload(workspacePath, payload) {
1968
1977
  await writeGzipJson(sparseVectorPath(workspacePath), payload);
1969
1978
  await writeGzipJson(sparseMetaPath(workspacePath), payload.metadata);
1970
1979
  await Promise.all([
1971
- rm2(legacySparseVectorPath(workspacePath), { force: true }),
1972
- rm2(legacySparseMetaPath(workspacePath), { force: true })
1980
+ rm3(legacySparseVectorPath(workspacePath), { force: true }),
1981
+ rm3(legacySparseMetaPath(workspacePath), { force: true })
1973
1982
  ]);
1974
1983
  }
1975
1984
  async function readSparsePayload(workspacePath) {
@@ -2309,7 +2318,7 @@ async function buildVectorArtifacts({
2309
2318
  }
2310
2319
 
2311
2320
  // src/index/index-store.ts
2312
- import { mkdir as mkdir9, rm as rm3 } from "fs/promises";
2321
+ import { mkdir as mkdir9, rm as rm4 } from "fs/promises";
2313
2322
  import path16 from "path";
2314
2323
  function versionedIndexPath(workspacePath, stamp) {
2315
2324
  return path16.join(workspacePath, "indexes", `${stamp}.json.gz`);
@@ -2351,10 +2360,10 @@ async function writeIndexArtifacts({
2351
2360
  await writeGzipJson(latestIndexArtifactPath, indexState);
2352
2361
  await writeGzipJson(latestMetadataArtifactPath, metadata);
2353
2362
  await Promise.all([
2354
- rm3(legacyLatestIndexPath(workspacePath), { force: true }),
2355
- rm3(legacyLatestMetaPath(workspacePath), { force: true }),
2356
- rm3(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
2357
- rm3(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
2363
+ rm4(legacyLatestIndexPath(workspacePath), { force: true }),
2364
+ rm4(legacyLatestMetaPath(workspacePath), { force: true }),
2365
+ rm4(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
2366
+ rm4(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
2358
2367
  ]);
2359
2368
  return { indexPath: latestIndexArtifactPath, metadataPath: latestMetadataArtifactPath };
2360
2369
  }
@@ -1,4 +1,11 @@
1
1
  import type { SparseVectorModelConfig } from "../types/models.js";
2
+ type SparseExecOptions = {
3
+ encoding: BufferEncoding;
4
+ maxBuffer: number;
5
+ env: NodeJS.ProcessEnv;
6
+ };
7
+ type SparseExecFileSync = (file: string, args: string[], options: SparseExecOptions) => string;
8
+ export declare function setSparseExecFileSyncForTests(fn: SparseExecFileSync | null): void;
2
9
  export declare function resolveQliHomeDir(): string;
3
10
  export declare function resolveCacheDir(workspacePath: string, configuredPath: string): string;
4
11
  export declare function packageRootFromImportMeta(importMetaUrl: string): string;
@@ -18,3 +25,4 @@ export declare function getDenseTransformersRuntime(cacheDir: string): Promise<{
18
25
  };
19
26
  pipeline: typeof import("@huggingface/transformers").pipeline;
20
27
  }>;
28
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tryformation/querylight-cli",
3
- "version": "0.2.1",
3
+ "version": "0.2.2",
4
4
  "description": "Querylight CLI for building and querying local knowledge bases.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/formation-res/querylight-cli#readme",
@@ -88,7 +88,11 @@ def encode_documents(model_id: str, top_tokens: int, documents):
88
88
 
89
89
 
90
90
  def main():
91
- payload = json.load(sys.stdin)
91
+ if len(sys.argv) > 1:
92
+ with open(sys.argv[1], encoding="utf-8") as handle:
93
+ payload = json.load(handle)
94
+ else:
95
+ payload = json.load(sys.stdin)
92
96
  action = payload["action"]
93
97
  model_id = payload["model_id"]
94
98
  if action == "download_only":