@tryformation/querylight-cli 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -55,6 +55,8 @@ Publish releases from semantic version tags such as `0.1.1`.
55
55
 
56
56
  The GitHub Actions publish workflow publishes `@tryformation/querylight-cli` to the public npm registry.
57
57
 
58
+ The publish workflow builds the package and verifies that the built CLI JSON envelope reports the same version as `package.json` before it publishes.
59
+
58
60
  Configure npm trusted publishing for this repository before the first release. The publish workflow uses GitHub OIDC and does not use an `NPM_TOKEN` secret.
59
61
 
60
62
  ### Local Development with `npm link`
@@ -248,6 +250,15 @@ crawler:
248
250
 
249
251
  Set `crawl.maxConcurrentRequests` on a website or RSS source when one source needs a different limit.
250
252
 
253
+ Control the default number of search results returned when `--top-k` is omitted:
254
+
255
+ ```yaml
256
+ search:
257
+ defaultTopK: 50
258
+ ```
259
+
260
+ For `qli search --source-type rss` with a time-window filter such as `--since`, `--until`, or `--publication-date-from`, `qli` uses `500` results when `--top-k` is omitted.
261
+
251
262
  ## Supported Sources
252
263
 
253
264
  Current source types:
package/dist/cli/main.js CHANGED
@@ -16,7 +16,11 @@ import path from "path";
16
16
  import YAML from "yaml";
17
17
 
18
18
  // src/core/constants.ts
19
- var PACKAGE_VERSION = "0.2.3";
19
+ import { createRequire } from "module";
20
+ var require2 = createRequire(import.meta.url);
21
+ var packageJson = require2("../../package.json");
22
+ var PACKAGE_NAME = packageJson.name;
23
+ var PACKAGE_VERSION = packageJson.version;
20
24
  var DEFAULT_WORKSPACE = ".kb";
21
25
  var DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
22
26
  var LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
@@ -49,6 +53,9 @@ var defaultConfig = () => ({
49
53
  maxContextChars: 12e3,
50
54
  citationStyle: "markdown"
51
55
  },
56
+ search: {
57
+ defaultTopK: 50
58
+ },
52
59
  retrieval: {
53
60
  defaultMode: "lexical",
54
61
  dense: {
@@ -70,12 +77,12 @@ var defaultConfig = () => ({
70
77
  }
71
78
  },
72
79
  crawler: {
73
- defaultUserAgent: "querylight-cli/0.1",
80
+ defaultUserAgent: "querylight-cli",
74
81
  obeyRobotsTxt: true,
75
82
  rateLimitMs: 1e3,
76
83
  maxConcurrentRequests: 5,
77
84
  renderJs: false,
78
- retentionDays: 365,
85
+ retentionDays: 30,
79
86
  fetchArticles: true
80
87
  },
81
88
  limits: {
@@ -119,6 +126,10 @@ async function loadConfig(workspacePath, configPath) {
119
126
  ...defaults.rag,
120
127
  ...parsed.rag ?? {}
121
128
  },
129
+ search: {
130
+ ...defaults.search,
131
+ ...parsed.search ?? {}
132
+ },
122
133
  retrieval: {
123
134
  ...defaults.retrieval,
124
135
  ...parsed.retrieval ?? {},
@@ -2052,7 +2063,7 @@ async function fetchUrlDocument({
2052
2063
  publicationDate
2053
2064
  }) {
2054
2065
  const headers = {
2055
- "user-agent": source.crawl?.userAgent ?? "querylight-cli/0.1"
2066
+ "user-agent": source.crawl?.userAgent ?? "querylight-cli"
2056
2067
  };
2057
2068
  if (previous?.httpCache?.etag) {
2058
2069
  headers["if-none-match"] = previous.httpCache.etag;
@@ -2351,7 +2362,7 @@ async function purgeDocuments(workspacePath, documentIds, documents) {
2351
2362
  async function fetchFeedText(source) {
2352
2363
  const response2 = await fetch(source.uri, {
2353
2364
  headers: {
2354
- "user-agent": source.crawl?.userAgent ?? "querylight-cli/0.1"
2365
+ "user-agent": source.crawl?.userAgent ?? "querylight-cli"
2355
2366
  }
2356
2367
  });
2357
2368
  if (!response2.ok) {
@@ -4316,6 +4327,17 @@ function searchDateRanges(options) {
4316
4327
  }
4317
4328
  return entries;
4318
4329
  }
4330
+ function resolveSearchTopK(optionsTopK, sourceTypes, dateRanges, defaultTopK) {
4331
+ const explicitTopK = parseOptionalPositiveInteger(optionsTopK, "--top-k");
4332
+ if (explicitTopK !== void 0) {
4333
+ return explicitTopK;
4334
+ }
4335
+ const includesRss = (sourceTypes ?? []).includes("rss");
4336
+ if (includesRss && dateRanges.length > 0) {
4337
+ return 500;
4338
+ }
4339
+ return defaultTopK;
4340
+ }
4319
4341
  async function resolveWorkspace(options) {
4320
4342
  return path22.resolve(options.workspace ?? DEFAULT_WORKSPACE);
4321
4343
  }
@@ -4629,7 +4651,7 @@ Examples:
4629
4651
  progress?.("info", "Rebuild complete");
4630
4652
  emit(global.json, capture, response("rebuild", workspace, data), `Processed ${ingest.processedSources} sources, wrote ${chunk.chunksWritten} chunks`);
4631
4653
  });
4632
- program.command("search").description("Search the built index and return ranked matching documents or chunks. Use search-json for raw JSON DSL queries.").argument("[query]", "Text query. Omit it to list the latest matching documents.").option("--top-k <n>", "Maximum number of results to return.", "12").option("--source <sourceIds>", "Restrict results to one or more source ids. Use comma-separated values.").option("--source-name <names>", "Restrict results to one or more source names. Use comma-separated values.").option("--source-type <types>", `Restrict results to one or more source types. Use comma-separated values: ${SOURCE_TYPE_LIST.join(", ")}`).option("--uri-prefix <prefixes>", "Restrict results to one or more URI prefixes. Use comma-separated values.").option("--tag <tags>", "Restrict results to one or more source tags. Use comma-separated values.").option("--metadata <key=value...>", "Restrict results to sources with matching metadata.").option("--since <date>", "Shortcut for --publication-date-from.").option("--until <date>", "Shortcut for --publication-date-to.").option("--changed-since <date>", "Only include documents changed on or after this date.").option("--has-publication-date", "Only include documents with a publication date.").option("--publication-date-from <date>", "Only include documents published on or after this date.").option("--publication-date-to <date>", "Only include documents published on or before this date.").option("--first-seen-at-from <date>", "Only include documents first seen on or after this date.").option("--first-seen-at-to <date>", "Only include documents first seen on or before this date.").option("--last-seen-at-from <date>", "Only include documents last seen on or after this date.").option("--last-seen-at-to <date>", "Only include documents last seen on or before this date.").option("--last-changed-at-from <date>", "Only include documents changed on or after this date.").option("--last-changed-at-to <date>", "Only include documents changed on or before this date.").option("--crawled-at-from <date>", "Only include documents crawled on or after this date.").option("--crawled-at-to <date>", "Only include documents crawled on or before this date.").option("--retrieval <mode>", `Retrieval mode: ${RETRIEVAL_MODE_LIST.join(", ")}`).option("--show-chunks", "Return chunk-level matches when available.").addHelpText("after", `
4654
+ program.command("search").description("Search the built index and return ranked matching documents or chunks. Use search-json for raw JSON DSL queries.").argument("[query]", "Text query. Omit it to list the latest matching documents.").option("--top-k <n>", "Maximum number of results to return. Defaults to search.defaultTopK in config.yaml. RSS searches with a time window use 500 when omitted.").option("--source <sourceIds>", "Restrict results to one or more source ids. Use comma-separated values.").option("--source-name <names>", "Restrict results to one or more source names. Use comma-separated values.").option("--source-type <types>", `Restrict results to one or more source types. Use comma-separated values: ${SOURCE_TYPE_LIST.join(", ")}`).option("--uri-prefix <prefixes>", "Restrict results to one or more URI prefixes. Use comma-separated values.").option("--tag <tags>", "Restrict results to one or more source tags. Use comma-separated values.").option("--metadata <key=value...>", "Restrict results to sources with matching metadata.").option("--since <date>", "Shortcut for --publication-date-from.").option("--until <date>", "Shortcut for --publication-date-to.").option("--changed-since <date>", "Only include documents changed on or after this date.").option("--has-publication-date", "Only include documents with a publication date.").option("--publication-date-from <date>", "Only include documents published on or after this date.").option("--publication-date-to <date>", "Only include documents published on or before this date.").option("--first-seen-at-from <date>", "Only include documents first seen on or after this date.").option("--first-seen-at-to <date>", "Only include documents first seen on or before this date.").option("--last-seen-at-from <date>", "Only include documents last seen on or after this date.").option("--last-seen-at-to <date>", "Only include documents last seen on or before this date.").option("--last-changed-at-from <date>", "Only include documents changed on or after this date.").option("--last-changed-at-to <date>", "Only include documents changed on or before this date.").option("--crawled-at-from <date>", "Only include documents crawled on or after this date.").option("--crawled-at-to <date>", "Only include documents crawled on or before this date.").option("--retrieval <mode>", `Retrieval mode: ${RETRIEVAL_MODE_LIST.join(", ")}`).option("--show-chunks", "Return chunk-level matches when available.").addHelpText("after", `
4633
4655
  Examples:
4634
4656
  qli search "pricing api limits"
4635
4657
  qli search "authentication" --top-k 20 --tag docs
@@ -4642,22 +4664,27 @@ Examples:
4642
4664
  Notes:
4643
4665
  lexical works without vector models.
4644
4666
  dense, sparse, and hybrid require the relevant index artifacts to exist.
4667
+ When you omit --top-k, qli uses search.defaultTopK from config.yaml. The default workspace value is 50.
4668
+ RSS searches with a time window default to 500 results when you omit --top-k.
4645
4669
  Use search-json when you want the raw Querylight 0.11 JSON DSL and hit format.
4646
4670
  When you omit the query, qli returns the latest matching documents sorted by publication date.`).action(async function command(query, options) {
4647
4671
  const global = this.optsWithGlobals();
4648
4672
  const workspace = await resolveWorkspace({ workspace: global.workspace });
4673
+ const config = await loadConfig(workspace, global.config);
4674
+ const sourceTypes = parseSourceTypes(options.sourceType);
4675
+ const dateRanges = searchDateRanges(options);
4649
4676
  const result = await searchIndex({
4650
4677
  workspacePath: workspace,
4651
4678
  query: query ?? "",
4652
- topK: Number(options.topK),
4679
+ topK: resolveSearchTopK(options.topK, sourceTypes, dateRanges, config.search.defaultTopK),
4653
4680
  sourceIds: parseCommaSeparatedList(options.source),
4654
4681
  sourceNames: parseCommaSeparatedList(options.sourceName),
4655
- sourceTypes: parseSourceTypes(options.sourceType),
4682
+ sourceTypes,
4656
4683
  uriPrefixes: parseCommaSeparatedList(options.uriPrefix),
4657
4684
  hasPublicationDate: Boolean(options.hasPublicationDate),
4658
4685
  tags: parseCommaSeparatedList(options.tag),
4659
4686
  metadata: (options.metadata ?? []).map(parseKeyValue).map(([key, value]) => ({ key, value })),
4660
- dateRanges: searchDateRanges(options),
4687
+ dateRanges,
4661
4688
  retrievalMode: parseRetrievalMode(options.retrieval),
4662
4689
  showChunks: Boolean(options.showChunks)
4663
4690
  });
@@ -1,5 +1,5 @@
1
- export declare const PACKAGE_NAME = "@tryformation/querylight-cli";
2
- export declare const PACKAGE_VERSION = "0.2.3";
1
+ export declare const PACKAGE_NAME: string;
2
+ export declare const PACKAGE_VERSION: string;
3
3
  export declare const DEFAULT_WORKSPACE = ".kb";
4
4
  export declare const DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
5
5
  export declare const LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
package/dist/index.js CHANGED
@@ -22,6 +22,11 @@ import path from "path";
22
22
  import YAML from "yaml";
23
23
 
24
24
  // src/core/constants.ts
25
+ import { createRequire } from "module";
26
+ var require2 = createRequire(import.meta.url);
27
+ var packageJson = require2("../../package.json");
28
+ var PACKAGE_NAME = packageJson.name;
29
+ var PACKAGE_VERSION = packageJson.version;
25
30
  var DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
26
31
  var LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
27
32
 
@@ -53,6 +58,9 @@ var defaultConfig = () => ({
53
58
  maxContextChars: 12e3,
54
59
  citationStyle: "markdown"
55
60
  },
61
+ search: {
62
+ defaultTopK: 50
63
+ },
56
64
  retrieval: {
57
65
  defaultMode: "lexical",
58
66
  dense: {
@@ -74,12 +82,12 @@ var defaultConfig = () => ({
74
82
  }
75
83
  },
76
84
  crawler: {
77
- defaultUserAgent: "querylight-cli/0.1",
85
+ defaultUserAgent: "querylight-cli",
78
86
  obeyRobotsTxt: true,
79
87
  rateLimitMs: 1e3,
80
88
  maxConcurrentRequests: 5,
81
89
  renderJs: false,
82
- retentionDays: 365,
90
+ retentionDays: 30,
83
91
  fetchArticles: true
84
92
  },
85
93
  limits: {
@@ -123,6 +131,10 @@ async function loadConfig(workspacePath, configPath) {
123
131
  ...defaults.rag,
124
132
  ...parsed.rag ?? {}
125
133
  },
134
+ search: {
135
+ ...defaults.search,
136
+ ...parsed.search ?? {}
137
+ },
126
138
  retrieval: {
127
139
  ...defaults.retrieval,
128
140
  ...parsed.retrieval ?? {},
@@ -1069,7 +1081,7 @@ async function fetchUrlDocument({
1069
1081
  publicationDate
1070
1082
  }) {
1071
1083
  const headers = {
1072
- "user-agent": source.crawl?.userAgent ?? "querylight-cli/0.1"
1084
+ "user-agent": source.crawl?.userAgent ?? "querylight-cli"
1073
1085
  };
1074
1086
  if (previous?.httpCache?.etag) {
1075
1087
  headers["if-none-match"] = previous.httpCache.etag;
@@ -1368,7 +1380,7 @@ async function purgeDocuments(workspacePath, documentIds, documents) {
1368
1380
  async function fetchFeedText(source) {
1369
1381
  const response = await fetch(source.uri, {
1370
1382
  headers: {
1371
- "user-agent": source.crawl?.userAgent ?? "querylight-cli/0.1"
1383
+ "user-agent": source.crawl?.userAgent ?? "querylight-cli"
1372
1384
  }
1373
1385
  });
1374
1386
  if (!response.ok) {
@@ -173,6 +173,9 @@ export type WorkspaceConfig = {
173
173
  maxContextChars: number;
174
174
  citationStyle: "markdown";
175
175
  };
176
+ search: {
177
+ defaultTopK: number;
178
+ };
176
179
  retrieval: {
177
180
  defaultMode: RetrievalMode;
178
181
  dense: DenseVectorModelConfig;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tryformation/querylight-cli",
3
- "version": "0.2.4",
3
+ "version": "0.2.5",
4
4
  "description": "Querylight CLI for building and querying local knowledge bases.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/formation-res/querylight-cli#readme",
@@ -36,7 +36,8 @@
36
36
  "test:watch": "vitest",
37
37
  "lint": "tsc --noEmit",
38
38
  "check": "npm run lint && npm test",
39
- "prepublishOnly": "npm run check && npm run build"
39
+ "prepublishOnly": "npm run check && npm run build && npm run verify:release-version",
40
+ "verify:release-version": "node scripts/assert-release-version.mjs"
40
41
  },
41
42
  "dependencies": {
42
43
  "@huggingface/transformers": "^3.8.1",
@@ -0,0 +1,48 @@
1
+ import assert from "node:assert/strict";
2
+ import { mkdtemp, rm } from "node:fs/promises";
3
+ import os from "node:os";
4
+ import path from "node:path";
5
+ import { spawn } from "node:child_process";
6
+ import packageJson from "../package.json" with { type: "json" };
7
+
8
+ function run(command, args, options = {}) {
9
+ return new Promise((resolve, reject) => {
10
+ const child = spawn(command, args, {
11
+ stdio: ["ignore", "pipe", "pipe"],
12
+ ...options
13
+ });
14
+ let stdout = "";
15
+ let stderr = "";
16
+
17
+ child.stdout.on("data", (chunk) => {
18
+ stdout += String(chunk);
19
+ });
20
+ child.stderr.on("data", (chunk) => {
21
+ stderr += String(chunk);
22
+ });
23
+ child.on("error", reject);
24
+ child.on("close", (code) => {
25
+ if (code === 0) {
26
+ resolve({ stdout, stderr });
27
+ return;
28
+ }
29
+ reject(new Error(`${command} ${args.join(" ")} failed with exit code ${code}\n${stderr}`));
30
+ });
31
+ });
32
+ }
33
+
34
+ const workspaceRoot = await mkdtemp(path.join(os.tmpdir(), "qli-release-version-"));
35
+ const workspacePath = path.join(workspaceRoot, ".kb");
36
+
37
+ try {
38
+ const { stdout } = await run("node", ["dist/cli/main.js", "init", "--workspace", workspacePath, "--json"], {
39
+ cwd: new URL("..", import.meta.url)
40
+ });
41
+ const parsed = JSON.parse(stdout);
42
+
43
+ assert.equal(parsed.ok, true, "Expected qli init --json to succeed");
44
+ assert.equal(parsed.version, packageJson.version, `Built CLI reported version ${parsed.version}, expected ${packageJson.version}`);
45
+ process.stdout.write(`Verified built CLI version ${parsed.version}\n`);
46
+ } finally {
47
+ await rm(workspaceRoot, { recursive: true, force: true });
48
+ }