@tryformation/querylight-cli 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +7 -0
- package/LICENSE +21 -0
- package/README.md +391 -0
- package/dist/chunk/chunk-store.d.ts +4 -0
- package/dist/chunk/chunker.d.ts +9 -0
- package/dist/cli/format.d.ts +4 -0
- package/dist/cli/main.d.ts +2 -0
- package/dist/cli/main.js +3523 -0
- package/dist/cli/run-cli.d.ts +5 -0
- package/dist/core/config.d.ts +4 -0
- package/dist/core/constants.d.ts +3 -0
- package/dist/core/errors.d.ts +17 -0
- package/dist/core/files.d.ts +1 -0
- package/dist/core/hashing.d.ts +1 -0
- package/dist/core/ids.d.ts +1 -0
- package/dist/core/jsonl.d.ts +2 -0
- package/dist/core/runs.d.ts +3 -0
- package/dist/core/workspace.d.ts +7 -0
- package/dist/index/index-store.d.ts +11 -0
- package/dist/index/querylight-indexer.d.ts +14 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.js +2794 -0
- package/dist/ingest/adapters/crawl4ai-adapter.d.ts +1 -0
- package/dist/ingest/adapters/directory-adapter.d.ts +2 -0
- package/dist/ingest/adapters/file-adapter.d.ts +16 -0
- package/dist/ingest/adapters/rss-adapter.d.ts +7 -0
- package/dist/ingest/adapters/url-adapter.d.ts +11 -0
- package/dist/ingest/adapters/website-adapter.d.ts +2 -0
- package/dist/ingest/document-utils.d.ts +24 -0
- package/dist/ingest/extractors/docx-extractor.d.ts +1 -0
- package/dist/ingest/extractors/html-extractor.d.ts +5 -0
- package/dist/ingest/extractors/markdown-extractor.d.ts +1 -0
- package/dist/ingest/extractors/pdf-extractor.d.ts +1 -0
- package/dist/ingest/extractors/text-extractor.d.ts +1 -0
- package/dist/ingest/ingest-service.d.ts +23 -0
- package/dist/normalize/boilerplate.d.ts +1 -0
- package/dist/normalize/normalize-markdown.d.ts +2 -0
- package/dist/query/context-builder.d.ts +8 -0
- package/dist/query/related-service.d.ts +6 -0
- package/dist/query/search-service.d.ts +31 -0
- package/dist/report/diff-service.d.ts +23 -0
- package/dist/sources/source-model.d.ts +1 -0
- package/dist/sources/source-store.d.ts +7 -0
- package/dist/types/models.d.ts +309 -0
- package/dist/vector/dense.d.ts +13 -0
- package/dist/vector/runtime.d.ts +18 -0
- package/dist/vector/service.d.ts +26 -0
- package/dist/vector/sparse.d.ts +19 -0
- package/dist/vector/store.d.ts +20 -0
- package/dist/vector/text.d.ts +3 -0
- package/package.json +66 -0
- package/scripts/sparse-encode.py +104 -0
package/Dockerfile
ADDED
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026-present FORMATION GmbH
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
# Querylight CLI
|
|
2
|
+
|
|
3
|
+
[](https://github.com/formation-res/querylight-cli/actions/workflows/ci.yml)
|
|
4
|
+
[](https://www.npmjs.com/package/@tryformation/querylight-cli)
|
|
5
|
+
|
|
6
|
+
`Querylight CLI` is a TypeScript command line application for building and querying local knowledge bases with Querylight TS.
|
|
7
|
+
|
|
8
|
+
- Package: `@tryformation/querylight-cli`
|
|
9
|
+
- Binary: `qli`
|
|
10
|
+
- Runtime: Node.js 22+
|
|
11
|
+
|
|
12
|
+
It is designed for local, inspectable workflows:
|
|
13
|
+
|
|
14
|
+
- ingest files, directories, URLs, and websites
|
|
15
|
+
- normalize content into Markdown-like text
|
|
16
|
+
- chunk documents for retrieval
|
|
17
|
+
- build a portable local Querylight index
|
|
18
|
+
- search and generate retrieval context for external agents and tools
|
|
19
|
+
- inspect workspace state, diffs, and change reports
|
|
20
|
+
|
|
21
|
+
## Install
|
|
22
|
+
|
|
23
|
+
Run without installing globally:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
bunx @tryformation/querylight-cli init
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Install as a dependency:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
npm install @tryformation/querylight-cli
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Then run:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
npx qli --help
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Release
|
|
42
|
+
|
|
43
|
+
Publish releases from semantic version tags such as `0.1.1`.
|
|
44
|
+
|
|
45
|
+
The GitHub Actions publish workflow publishes `@tryformation/querylight-cli` to the public npm registry.
|
|
46
|
+
|
|
47
|
+
Configure npm trusted publishing for this repository before the first release. The publish workflow uses GitHub OIDC and does not use an `NPM_TOKEN` secret.
|
|
48
|
+
|
|
49
|
+
### Local Development with `npm link`
|
|
50
|
+
|
|
51
|
+
If you are working from a local checkout of this repository and want a real `qli` command available in any directory:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
cd /path/to/querylight-cli
|
|
55
|
+
npm install
|
|
56
|
+
npm run build
|
|
57
|
+
npm link
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
After that, you can use `qli` anywhere on your machine:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
cd /some/project
|
|
64
|
+
qli --help
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
To remove the linked command later:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
npm unlink -g @tryformation/querylight-cli
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Quick Start
|
|
74
|
+
|
|
75
|
+
Initialize a workspace:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
qli init
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Add a local docs directory:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
qli source add directory ./docs --name "Local Docs" --tag docs
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Build the knowledge base:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
qli rebuild
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Search it:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
qli search "API authentication"
|
|
97
|
+
qli search --source-type rss --since 2026-05-01 --has-publication-date
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Find related documents for an existing one:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
qli related <document-id-or-uri>
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Generate retrieval context:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
qli context "How do I authenticate API requests?" --top-k 8
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Example: Index `querylight.tryformation.com`
|
|
113
|
+
|
|
114
|
+
This example uses a local linked build of `qli` to create a test knowledge base for the Querylight documentation website.
|
|
115
|
+
|
|
116
|
+
1. Link the local CLI:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
cd /path/to/querylight-cli
|
|
120
|
+
npm install
|
|
121
|
+
npm run build
|
|
122
|
+
npm link
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
2. Create a fresh test workspace:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
mkdir -p ~/querylight-ts-search
|
|
129
|
+
cd ~/querylight-ts-search
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
3. Initialize the knowledge base:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
qli init
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
4. Add the Querylight website as a source:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
qli source add website https://querylight.tryformation.com \
|
|
142
|
+
--name "Querylight TS Docs" \
|
|
143
|
+
--max-depth 2 \
|
|
144
|
+
--max-pages 50 \
|
|
145
|
+
--include /docs/ \
|
|
146
|
+
--tag docs
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
5. Build the local index:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
qli rebuild
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
6. Inspect and query the result:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
qli status
|
|
159
|
+
qli source list
|
|
160
|
+
qli search "BM25 ranking"
|
|
161
|
+
qli context "How does Querylight TS handle BM25 ranking?" --top-k 8
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
If you want the workspace somewhere else, use:
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
qli --workspace /custom/path/.kb <command>
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Workspace
|
|
171
|
+
|
|
172
|
+
The default workspace is `.kb/`.
|
|
173
|
+
|
|
174
|
+
```text
|
|
175
|
+
.kb/
|
|
176
|
+
config.yaml
|
|
177
|
+
sources/
|
|
178
|
+
sources.jsonl
|
|
179
|
+
documents/
|
|
180
|
+
documents.jsonl
|
|
181
|
+
chunks/
|
|
182
|
+
chunks.jsonl
|
|
183
|
+
raw/
|
|
184
|
+
normalized/
|
|
185
|
+
indexes/
|
|
186
|
+
latest.json
|
|
187
|
+
latest.meta.json
|
|
188
|
+
runs/
|
|
189
|
+
logs/
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Use a custom workspace with:
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
qli --workspace ./my-kb <command>
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Supported Sources
|
|
199
|
+
|
|
200
|
+
Current source types:
|
|
201
|
+
|
|
202
|
+
- `file`
|
|
203
|
+
- `directory`
|
|
204
|
+
- `url`
|
|
205
|
+
- `website`
|
|
206
|
+
- `rss`
|
|
207
|
+
- `markdown`
|
|
208
|
+
- `text`
|
|
209
|
+
|
|
210
|
+
Current local file ingestion support:
|
|
211
|
+
|
|
212
|
+
- `.md`
|
|
213
|
+
- `.txt`
|
|
214
|
+
- `.html`
|
|
215
|
+
- `.htm`
|
|
216
|
+
- `.pdf`
|
|
217
|
+
- `.docx`
|
|
218
|
+
|
|
219
|
+
## Commands
|
|
220
|
+
|
|
221
|
+
All commands support:
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
--workspace <path>
|
|
225
|
+
--config <path>
|
|
226
|
+
--json
|
|
227
|
+
--verbose
|
|
228
|
+
--quiet
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### Initialize
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
qli init
|
|
235
|
+
qli init --workspace ./kb
|
|
236
|
+
qli init --force
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
### Manage Sources
|
|
240
|
+
|
|
241
|
+
Add sources:
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
qli source add file ./docs/guide.md --name "Guide"
|
|
245
|
+
qli source add directory ./docs --name "Docs" --tag docs
|
|
246
|
+
qli source add url https://example.com/docs/auth --name "Auth Page"
|
|
247
|
+
qli source add website https://example.com --name "Example Site" --max-depth 2 --max-pages 50
|
|
248
|
+
qli source add rss https://example.com/feed.xml --name "Release Feed"
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
List and manage them:
|
|
252
|
+
|
|
253
|
+
```bash
|
|
254
|
+
qli source list
|
|
255
|
+
qli source config <source-id> --retention-days 30
|
|
256
|
+
qli source config <source-id> --name "Docs Feed" --tag rss docs
|
|
257
|
+
qli source disable <source-id>
|
|
258
|
+
qli source enable <source-id>
|
|
259
|
+
qli source remove <source-id>
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### Find Related Documents
|
|
263
|
+
|
|
264
|
+
Build dense vectors first:
|
|
265
|
+
|
|
266
|
+
```bash
|
|
267
|
+
qli models pull --dense
|
|
268
|
+
qli rebuild
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
Or pull every model that is available on the current machine:
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
qli models pull
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
Then ask for documents related to an existing document id or URI:
|
|
278
|
+
|
|
279
|
+
```bash
|
|
280
|
+
qli related <document-id>
|
|
281
|
+
qli related https://example.com/docs/auth
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
### Ingest, Chunk, Index
|
|
285
|
+
|
|
286
|
+
```bash
|
|
287
|
+
qli ingest
|
|
288
|
+
qli chunk
|
|
289
|
+
qli index build
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Run the full pipeline:
|
|
293
|
+
|
|
294
|
+
```bash
|
|
295
|
+
qli rebuild
|
|
296
|
+
qli rebuild --source <source-id>
|
|
297
|
+
qli rebuild --changed-only
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
### Search and Retrieval
|
|
301
|
+
|
|
302
|
+
Search:
|
|
303
|
+
|
|
304
|
+
```bash
|
|
305
|
+
qli search "pricing API limits"
|
|
306
|
+
qli search "refund policy" --tag support --top-k 20
|
|
307
|
+
qli search --source-type rss,url --since 2026-05-01 --has-publication-date --top-k 25
|
|
308
|
+
qli search --source-name "Release Feed,Company Blog" --uri-prefix https://example.com/news,https://example.com/blog
|
|
309
|
+
qli search --source-type rss,url --top-k 25 --json
|
|
310
|
+
qli search "authentication" --json
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
Build retrieval context:
|
|
314
|
+
|
|
315
|
+
```bash
|
|
316
|
+
qli context "How do I configure the API?"
|
|
317
|
+
qli context "What changed in pricing?" --top-k 12 --max-chars 12000
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
### Change Inspection
|
|
321
|
+
|
|
322
|
+
```bash
|
|
323
|
+
qli diff
|
|
324
|
+
qli diff --source <source-id>
|
|
325
|
+
qli diff --document <document-id>
|
|
326
|
+
qli diff --since 2026-05-01
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
```bash
|
|
330
|
+
qli report changes --since 2026-05-01
|
|
331
|
+
qli report changes --source <source-id>
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
### Workspace Inspection
|
|
335
|
+
|
|
336
|
+
```bash
|
|
337
|
+
qli status
|
|
338
|
+
qli doctor
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
## JSON Output
|
|
342
|
+
|
|
343
|
+
Agent-facing and automation-friendly commands support `--json`.
|
|
344
|
+
|
|
345
|
+
The output envelope is:
|
|
346
|
+
|
|
347
|
+
```json
|
|
348
|
+
{
|
|
349
|
+
"ok": true,
|
|
350
|
+
"command": "search",
|
|
351
|
+
"workspace": "/absolute/path/.kb",
|
|
352
|
+
"version": "0.1.0",
|
|
353
|
+
"data": {}
|
|
354
|
+
}
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
## Docker
|
|
358
|
+
|
|
359
|
+
Build the image:
|
|
360
|
+
|
|
361
|
+
```bash
|
|
362
|
+
docker build -t querylight-cli .
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
Run commands against a mounted workspace:
|
|
366
|
+
|
|
367
|
+
```bash
|
|
368
|
+
docker run --rm -v "$PWD:/data" querylight-cli init --workspace /data/.kb
|
|
369
|
+
docker run --rm -v "$PWD:/data" querylight-cli rebuild --workspace /data/.kb
|
|
370
|
+
docker run --rm -v "$PWD:/data" querylight-cli search --workspace /data/.kb "authentication"
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
## Development
|
|
374
|
+
|
|
375
|
+
Install dependencies:
|
|
376
|
+
|
|
377
|
+
```bash
|
|
378
|
+
npm install
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
Run checks:
|
|
382
|
+
|
|
383
|
+
```bash
|
|
384
|
+
npm run check
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
Build:
|
|
388
|
+
|
|
389
|
+
```bash
|
|
390
|
+
npm run build
|
|
391
|
+
```
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { ChunkRecord } from "../types/models.js";
|
|
2
|
+
export declare function chunksFile(workspacePath: string): string;
|
|
3
|
+
export declare function loadChunks(workspacePath: string): Promise<ChunkRecord[]>;
|
|
4
|
+
export declare function saveChunks(workspacePath: string, chunks: ChunkRecord[]): Promise<void>;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { ChunkRecord, DocumentRecord, WorkspaceConfig } from "../types/models.js";
|
|
2
|
+
export declare function buildChunksForDocument(document: DocumentRecord, markdown: string, config: WorkspaceConfig, prior?: Map<string, ChunkRecord>, seenAt?: string): ChunkRecord[];
|
|
3
|
+
export declare function chunkDocuments({ workspacePath, sourceId, documentId }: {
|
|
4
|
+
workspacePath: string;
|
|
5
|
+
sourceId?: string;
|
|
6
|
+
documentId?: string;
|
|
7
|
+
}): Promise<{
|
|
8
|
+
chunksWritten: number;
|
|
9
|
+
}>;
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { RelatedDocumentResult, SearchResult, Source } from "../types/models.js";
|
|
2
|
+
export declare function formatSourcesTable(sources: Source[]): string;
|
|
3
|
+
export declare function formatSearchResults(results: SearchResult[]): string;
|
|
4
|
+
export declare function formatRelatedDocuments(results: RelatedDocumentResult[]): string;
|