@tryformation/querylight-cli 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/dist/cli/main.js +36 -9
- package/dist/core/constants.d.ts +2 -2
- package/dist/index.js +16 -4
- package/dist/types/models.d.ts +3 -0
- package/package.json +3 -2
- package/scripts/assert-release-version.mjs +48 -0
package/README.md
CHANGED
|
@@ -55,6 +55,8 @@ Publish releases from semantic version tags such as `0.1.1`.
|
|
|
55
55
|
|
|
56
56
|
The GitHub Actions publish workflow publishes `@tryformation/querylight-cli` to the public npm registry.
|
|
57
57
|
|
|
58
|
+
The publish workflow builds the package and verifies that the built CLI JSON envelope reports the same version as `package.json` before it publishes.
|
|
59
|
+
|
|
58
60
|
Configure npm trusted publishing for this repository before the first release. The publish workflow uses GitHub OIDC and does not use an `NPM_TOKEN` secret.
|
|
59
61
|
|
|
60
62
|
### Local Development with `npm link`
|
|
@@ -248,6 +250,15 @@ crawler:
|
|
|
248
250
|
|
|
249
251
|
Set `crawl.maxConcurrentRequests` on a website or RSS source when one source needs a different limit.
|
|
250
252
|
|
|
253
|
+
Control the default number of search results returned when `--top-k` is omitted:
|
|
254
|
+
|
|
255
|
+
```yaml
|
|
256
|
+
search:
|
|
257
|
+
defaultTopK: 50
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
For `qli search --source-type rss` with a time-window filter such as `--since`, `--until`, or `--publication-date-from`, `qli` uses `500` results when `--top-k` is omitted.
|
|
261
|
+
|
|
251
262
|
## Supported Sources
|
|
252
263
|
|
|
253
264
|
Current source types:
|
package/dist/cli/main.js
CHANGED
|
@@ -16,7 +16,11 @@ import path from "path";
|
|
|
16
16
|
import YAML from "yaml";
|
|
17
17
|
|
|
18
18
|
// src/core/constants.ts
|
|
19
|
-
|
|
19
|
+
import { createRequire } from "module";
|
|
20
|
+
var require2 = createRequire(import.meta.url);
|
|
21
|
+
var packageJson = require2("../../package.json");
|
|
22
|
+
var PACKAGE_NAME = packageJson.name;
|
|
23
|
+
var PACKAGE_VERSION = packageJson.version;
|
|
20
24
|
var DEFAULT_WORKSPACE = ".kb";
|
|
21
25
|
var DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
|
|
22
26
|
var LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
|
|
@@ -49,6 +53,9 @@ var defaultConfig = () => ({
|
|
|
49
53
|
maxContextChars: 12e3,
|
|
50
54
|
citationStyle: "markdown"
|
|
51
55
|
},
|
|
56
|
+
search: {
|
|
57
|
+
defaultTopK: 50
|
|
58
|
+
},
|
|
52
59
|
retrieval: {
|
|
53
60
|
defaultMode: "lexical",
|
|
54
61
|
dense: {
|
|
@@ -70,12 +77,12 @@ var defaultConfig = () => ({
|
|
|
70
77
|
}
|
|
71
78
|
},
|
|
72
79
|
crawler: {
|
|
73
|
-
defaultUserAgent: "querylight-cli
|
|
80
|
+
defaultUserAgent: "querylight-cli",
|
|
74
81
|
obeyRobotsTxt: true,
|
|
75
82
|
rateLimitMs: 1e3,
|
|
76
83
|
maxConcurrentRequests: 5,
|
|
77
84
|
renderJs: false,
|
|
78
|
-
retentionDays:
|
|
85
|
+
retentionDays: 30,
|
|
79
86
|
fetchArticles: true
|
|
80
87
|
},
|
|
81
88
|
limits: {
|
|
@@ -119,6 +126,10 @@ async function loadConfig(workspacePath, configPath) {
|
|
|
119
126
|
...defaults.rag,
|
|
120
127
|
...parsed.rag ?? {}
|
|
121
128
|
},
|
|
129
|
+
search: {
|
|
130
|
+
...defaults.search,
|
|
131
|
+
...parsed.search ?? {}
|
|
132
|
+
},
|
|
122
133
|
retrieval: {
|
|
123
134
|
...defaults.retrieval,
|
|
124
135
|
...parsed.retrieval ?? {},
|
|
@@ -2052,7 +2063,7 @@ async function fetchUrlDocument({
|
|
|
2052
2063
|
publicationDate
|
|
2053
2064
|
}) {
|
|
2054
2065
|
const headers = {
|
|
2055
|
-
"user-agent": source.crawl?.userAgent ?? "querylight-cli
|
|
2066
|
+
"user-agent": source.crawl?.userAgent ?? "querylight-cli"
|
|
2056
2067
|
};
|
|
2057
2068
|
if (previous?.httpCache?.etag) {
|
|
2058
2069
|
headers["if-none-match"] = previous.httpCache.etag;
|
|
@@ -2351,7 +2362,7 @@ async function purgeDocuments(workspacePath, documentIds, documents) {
|
|
|
2351
2362
|
async function fetchFeedText(source) {
|
|
2352
2363
|
const response2 = await fetch(source.uri, {
|
|
2353
2364
|
headers: {
|
|
2354
|
-
"user-agent": source.crawl?.userAgent ?? "querylight-cli
|
|
2365
|
+
"user-agent": source.crawl?.userAgent ?? "querylight-cli"
|
|
2355
2366
|
}
|
|
2356
2367
|
});
|
|
2357
2368
|
if (!response2.ok) {
|
|
@@ -4316,6 +4327,17 @@ function searchDateRanges(options) {
|
|
|
4316
4327
|
}
|
|
4317
4328
|
return entries;
|
|
4318
4329
|
}
|
|
4330
|
+
function resolveSearchTopK(optionsTopK, sourceTypes, dateRanges, defaultTopK) {
|
|
4331
|
+
const explicitTopK = parseOptionalPositiveInteger(optionsTopK, "--top-k");
|
|
4332
|
+
if (explicitTopK !== void 0) {
|
|
4333
|
+
return explicitTopK;
|
|
4334
|
+
}
|
|
4335
|
+
const includesRss = (sourceTypes ?? []).includes("rss");
|
|
4336
|
+
if (includesRss && dateRanges.length > 0) {
|
|
4337
|
+
return 500;
|
|
4338
|
+
}
|
|
4339
|
+
return defaultTopK;
|
|
4340
|
+
}
|
|
4319
4341
|
async function resolveWorkspace(options) {
|
|
4320
4342
|
return path22.resolve(options.workspace ?? DEFAULT_WORKSPACE);
|
|
4321
4343
|
}
|
|
@@ -4629,7 +4651,7 @@ Examples:
|
|
|
4629
4651
|
progress?.("info", "Rebuild complete");
|
|
4630
4652
|
emit(global.json, capture, response("rebuild", workspace, data), `Processed ${ingest.processedSources} sources, wrote ${chunk.chunksWritten} chunks`);
|
|
4631
4653
|
});
|
|
4632
|
-
program.command("search").description("Search the built index and return ranked matching documents or chunks. Use search-json for raw JSON DSL queries.").argument("[query]", "Text query. Omit it to list the latest matching documents.").option("--top-k <n>", "Maximum number of results to return.
|
|
4654
|
+
program.command("search").description("Search the built index and return ranked matching documents or chunks. Use search-json for raw JSON DSL queries.").argument("[query]", "Text query. Omit it to list the latest matching documents.").option("--top-k <n>", "Maximum number of results to return. Defaults to search.defaultTopK in config.yaml. RSS searches with a time window use 500 when omitted.").option("--source <sourceIds>", "Restrict results to one or more source ids. Use comma-separated values.").option("--source-name <names>", "Restrict results to one or more source names. Use comma-separated values.").option("--source-type <types>", `Restrict results to one or more source types. Use comma-separated values: ${SOURCE_TYPE_LIST.join(", ")}`).option("--uri-prefix <prefixes>", "Restrict results to one or more URI prefixes. Use comma-separated values.").option("--tag <tags>", "Restrict results to one or more source tags. Use comma-separated values.").option("--metadata <key=value...>", "Restrict results to sources with matching metadata.").option("--since <date>", "Shortcut for --publication-date-from.").option("--until <date>", "Shortcut for --publication-date-to.").option("--changed-since <date>", "Only include documents changed on or after this date.").option("--has-publication-date", "Only include documents with a publication date.").option("--publication-date-from <date>", "Only include documents published on or after this date.").option("--publication-date-to <date>", "Only include documents published on or before this date.").option("--first-seen-at-from <date>", "Only include documents first seen on or after this date.").option("--first-seen-at-to <date>", "Only include documents first seen on or before this date.").option("--last-seen-at-from <date>", "Only include documents last seen on or after this date.").option("--last-seen-at-to <date>", "Only include documents last seen on or before this date.").option("--last-changed-at-from <date>", "Only include documents changed on or after this date.").option("--last-changed-at-to <date>", "Only include documents changed on or before this date.").option("--crawled-at-from <date>", "Only include documents crawled on or after this date.").option("--crawled-at-to <date>", "Only include documents crawled on or before this date.").option("--retrieval <mode>", `Retrieval mode: ${RETRIEVAL_MODE_LIST.join(", ")}`).option("--show-chunks", "Return chunk-level matches when available.").addHelpText("after", `
|
|
4633
4655
|
Examples:
|
|
4634
4656
|
qli search "pricing api limits"
|
|
4635
4657
|
qli search "authentication" --top-k 20 --tag docs
|
|
@@ -4642,22 +4664,27 @@ Examples:
|
|
|
4642
4664
|
Notes:
|
|
4643
4665
|
lexical works without vector models.
|
|
4644
4666
|
dense, sparse, and hybrid require the relevant index artifacts to exist.
|
|
4667
|
+
When you omit --top-k, qli uses search.defaultTopK from config.yaml. The default workspace value is 50.
|
|
4668
|
+
RSS searches with a time window default to 500 results when you omit --top-k.
|
|
4645
4669
|
Use search-json when you want the raw Querylight 0.11 JSON DSL and hit format.
|
|
4646
4670
|
When you omit the query, qli returns the latest matching documents sorted by publication date.`).action(async function command(query, options) {
|
|
4647
4671
|
const global = this.optsWithGlobals();
|
|
4648
4672
|
const workspace = await resolveWorkspace({ workspace: global.workspace });
|
|
4673
|
+
const config = await loadConfig(workspace, global.config);
|
|
4674
|
+
const sourceTypes = parseSourceTypes(options.sourceType);
|
|
4675
|
+
const dateRanges = searchDateRanges(options);
|
|
4649
4676
|
const result = await searchIndex({
|
|
4650
4677
|
workspacePath: workspace,
|
|
4651
4678
|
query: query ?? "",
|
|
4652
|
-
topK:
|
|
4679
|
+
topK: resolveSearchTopK(options.topK, sourceTypes, dateRanges, config.search.defaultTopK),
|
|
4653
4680
|
sourceIds: parseCommaSeparatedList(options.source),
|
|
4654
4681
|
sourceNames: parseCommaSeparatedList(options.sourceName),
|
|
4655
|
-
sourceTypes
|
|
4682
|
+
sourceTypes,
|
|
4656
4683
|
uriPrefixes: parseCommaSeparatedList(options.uriPrefix),
|
|
4657
4684
|
hasPublicationDate: Boolean(options.hasPublicationDate),
|
|
4658
4685
|
tags: parseCommaSeparatedList(options.tag),
|
|
4659
4686
|
metadata: (options.metadata ?? []).map(parseKeyValue).map(([key, value]) => ({ key, value })),
|
|
4660
|
-
dateRanges
|
|
4687
|
+
dateRanges,
|
|
4661
4688
|
retrievalMode: parseRetrievalMode(options.retrieval),
|
|
4662
4689
|
showChunks: Boolean(options.showChunks)
|
|
4663
4690
|
});
|
package/dist/core/constants.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
export declare const PACKAGE_NAME
|
|
2
|
-
export declare const PACKAGE_VERSION
|
|
1
|
+
export declare const PACKAGE_NAME: string;
|
|
2
|
+
export declare const PACKAGE_VERSION: string;
|
|
3
3
|
export declare const DEFAULT_WORKSPACE = ".kb";
|
|
4
4
|
export declare const DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
|
|
5
5
|
export declare const LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
|
package/dist/index.js
CHANGED
|
@@ -22,6 +22,11 @@ import path from "path";
|
|
|
22
22
|
import YAML from "yaml";
|
|
23
23
|
|
|
24
24
|
// src/core/constants.ts
|
|
25
|
+
import { createRequire } from "module";
|
|
26
|
+
var require2 = createRequire(import.meta.url);
|
|
27
|
+
var packageJson = require2("../../package.json");
|
|
28
|
+
var PACKAGE_NAME = packageJson.name;
|
|
29
|
+
var PACKAGE_VERSION = packageJson.version;
|
|
25
30
|
var DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
|
|
26
31
|
var LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
|
|
27
32
|
|
|
@@ -53,6 +58,9 @@ var defaultConfig = () => ({
|
|
|
53
58
|
maxContextChars: 12e3,
|
|
54
59
|
citationStyle: "markdown"
|
|
55
60
|
},
|
|
61
|
+
search: {
|
|
62
|
+
defaultTopK: 50
|
|
63
|
+
},
|
|
56
64
|
retrieval: {
|
|
57
65
|
defaultMode: "lexical",
|
|
58
66
|
dense: {
|
|
@@ -74,12 +82,12 @@ var defaultConfig = () => ({
|
|
|
74
82
|
}
|
|
75
83
|
},
|
|
76
84
|
crawler: {
|
|
77
|
-
defaultUserAgent: "querylight-cli
|
|
85
|
+
defaultUserAgent: "querylight-cli",
|
|
78
86
|
obeyRobotsTxt: true,
|
|
79
87
|
rateLimitMs: 1e3,
|
|
80
88
|
maxConcurrentRequests: 5,
|
|
81
89
|
renderJs: false,
|
|
82
|
-
retentionDays:
|
|
90
|
+
retentionDays: 30,
|
|
83
91
|
fetchArticles: true
|
|
84
92
|
},
|
|
85
93
|
limits: {
|
|
@@ -123,6 +131,10 @@ async function loadConfig(workspacePath, configPath) {
|
|
|
123
131
|
...defaults.rag,
|
|
124
132
|
...parsed.rag ?? {}
|
|
125
133
|
},
|
|
134
|
+
search: {
|
|
135
|
+
...defaults.search,
|
|
136
|
+
...parsed.search ?? {}
|
|
137
|
+
},
|
|
126
138
|
retrieval: {
|
|
127
139
|
...defaults.retrieval,
|
|
128
140
|
...parsed.retrieval ?? {},
|
|
@@ -1069,7 +1081,7 @@ async function fetchUrlDocument({
|
|
|
1069
1081
|
publicationDate
|
|
1070
1082
|
}) {
|
|
1071
1083
|
const headers = {
|
|
1072
|
-
"user-agent": source.crawl?.userAgent ?? "querylight-cli
|
|
1084
|
+
"user-agent": source.crawl?.userAgent ?? "querylight-cli"
|
|
1073
1085
|
};
|
|
1074
1086
|
if (previous?.httpCache?.etag) {
|
|
1075
1087
|
headers["if-none-match"] = previous.httpCache.etag;
|
|
@@ -1368,7 +1380,7 @@ async function purgeDocuments(workspacePath, documentIds, documents) {
|
|
|
1368
1380
|
async function fetchFeedText(source) {
|
|
1369
1381
|
const response = await fetch(source.uri, {
|
|
1370
1382
|
headers: {
|
|
1371
|
-
"user-agent": source.crawl?.userAgent ?? "querylight-cli
|
|
1383
|
+
"user-agent": source.crawl?.userAgent ?? "querylight-cli"
|
|
1372
1384
|
}
|
|
1373
1385
|
});
|
|
1374
1386
|
if (!response.ok) {
|
package/dist/types/models.d.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tryformation/querylight-cli",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.5",
|
|
4
4
|
"description": "Querylight CLI for building and querying local knowledge bases.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"homepage": "https://github.com/formation-res/querylight-cli#readme",
|
|
@@ -36,7 +36,8 @@
|
|
|
36
36
|
"test:watch": "vitest",
|
|
37
37
|
"lint": "tsc --noEmit",
|
|
38
38
|
"check": "npm run lint && npm test",
|
|
39
|
-
"prepublishOnly": "npm run check && npm run build"
|
|
39
|
+
"prepublishOnly": "npm run check && npm run build && npm run verify:release-version",
|
|
40
|
+
"verify:release-version": "node scripts/assert-release-version.mjs"
|
|
40
41
|
},
|
|
41
42
|
"dependencies": {
|
|
42
43
|
"@huggingface/transformers": "^3.8.1",
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { mkdtemp, rm } from "node:fs/promises";
|
|
3
|
+
import os from "node:os";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
import { spawn } from "node:child_process";
|
|
6
|
+
import packageJson from "../package.json" with { type: "json" };
|
|
7
|
+
|
|
8
|
+
function run(command, args, options = {}) {
|
|
9
|
+
return new Promise((resolve, reject) => {
|
|
10
|
+
const child = spawn(command, args, {
|
|
11
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
12
|
+
...options
|
|
13
|
+
});
|
|
14
|
+
let stdout = "";
|
|
15
|
+
let stderr = "";
|
|
16
|
+
|
|
17
|
+
child.stdout.on("data", (chunk) => {
|
|
18
|
+
stdout += String(chunk);
|
|
19
|
+
});
|
|
20
|
+
child.stderr.on("data", (chunk) => {
|
|
21
|
+
stderr += String(chunk);
|
|
22
|
+
});
|
|
23
|
+
child.on("error", reject);
|
|
24
|
+
child.on("close", (code) => {
|
|
25
|
+
if (code === 0) {
|
|
26
|
+
resolve({ stdout, stderr });
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
reject(new Error(`${command} ${args.join(" ")} failed with exit code ${code}\n${stderr}`));
|
|
30
|
+
});
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const workspaceRoot = await mkdtemp(path.join(os.tmpdir(), "qli-release-version-"));
|
|
35
|
+
const workspacePath = path.join(workspaceRoot, ".kb");
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
const { stdout } = await run("node", ["dist/cli/main.js", "init", "--workspace", workspacePath, "--json"], {
|
|
39
|
+
cwd: new URL("..", import.meta.url)
|
|
40
|
+
});
|
|
41
|
+
const parsed = JSON.parse(stdout);
|
|
42
|
+
|
|
43
|
+
assert.equal(parsed.ok, true, "Expected qli init --json to succeed");
|
|
44
|
+
assert.equal(parsed.version, packageJson.version, `Built CLI reported version ${parsed.version}, expected ${packageJson.version}`);
|
|
45
|
+
process.stdout.write(`Verified built CLI version ${parsed.version}\n`);
|
|
46
|
+
} finally {
|
|
47
|
+
await rm(workspaceRoot, { recursive: true, force: true });
|
|
48
|
+
}
|