@tryformation/querylight-cli 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/Dockerfile +7 -0
  2. package/LICENSE +21 -0
  3. package/README.md +391 -0
  4. package/dist/chunk/chunk-store.d.ts +4 -0
  5. package/dist/chunk/chunker.d.ts +9 -0
  6. package/dist/cli/format.d.ts +4 -0
  7. package/dist/cli/main.d.ts +2 -0
  8. package/dist/cli/main.js +3523 -0
  9. package/dist/cli/run-cli.d.ts +5 -0
  10. package/dist/core/config.d.ts +4 -0
  11. package/dist/core/constants.d.ts +3 -0
  12. package/dist/core/errors.d.ts +17 -0
  13. package/dist/core/files.d.ts +1 -0
  14. package/dist/core/hashing.d.ts +1 -0
  15. package/dist/core/ids.d.ts +1 -0
  16. package/dist/core/jsonl.d.ts +2 -0
  17. package/dist/core/runs.d.ts +3 -0
  18. package/dist/core/workspace.d.ts +7 -0
  19. package/dist/index/index-store.d.ts +11 -0
  20. package/dist/index/querylight-indexer.d.ts +14 -0
  21. package/dist/index.d.ts +11 -0
  22. package/dist/index.js +2794 -0
  23. package/dist/ingest/adapters/crawl4ai-adapter.d.ts +1 -0
  24. package/dist/ingest/adapters/directory-adapter.d.ts +2 -0
  25. package/dist/ingest/adapters/file-adapter.d.ts +16 -0
  26. package/dist/ingest/adapters/rss-adapter.d.ts +7 -0
  27. package/dist/ingest/adapters/url-adapter.d.ts +11 -0
  28. package/dist/ingest/adapters/website-adapter.d.ts +2 -0
  29. package/dist/ingest/document-utils.d.ts +24 -0
  30. package/dist/ingest/extractors/docx-extractor.d.ts +1 -0
  31. package/dist/ingest/extractors/html-extractor.d.ts +5 -0
  32. package/dist/ingest/extractors/markdown-extractor.d.ts +1 -0
  33. package/dist/ingest/extractors/pdf-extractor.d.ts +1 -0
  34. package/dist/ingest/extractors/text-extractor.d.ts +1 -0
  35. package/dist/ingest/ingest-service.d.ts +23 -0
  36. package/dist/normalize/boilerplate.d.ts +1 -0
  37. package/dist/normalize/normalize-markdown.d.ts +2 -0
  38. package/dist/query/context-builder.d.ts +8 -0
  39. package/dist/query/related-service.d.ts +6 -0
  40. package/dist/query/search-service.d.ts +31 -0
  41. package/dist/report/diff-service.d.ts +23 -0
  42. package/dist/sources/source-model.d.ts +1 -0
  43. package/dist/sources/source-store.d.ts +7 -0
  44. package/dist/types/models.d.ts +309 -0
  45. package/dist/vector/dense.d.ts +13 -0
  46. package/dist/vector/runtime.d.ts +18 -0
  47. package/dist/vector/service.d.ts +26 -0
  48. package/dist/vector/sparse.d.ts +19 -0
  49. package/dist/vector/store.d.ts +20 -0
  50. package/dist/vector/text.d.ts +3 -0
  51. package/package.json +66 -0
  52. package/scripts/sparse-encode.py +104 -0
package/Dockerfile ADDED
@@ -0,0 +1,7 @@
1
+ FROM node:22-slim
2
+ WORKDIR /app
3
+ COPY package.json package-lock.json ./
4
+ RUN npm ci
5
+ COPY . .
6
+ RUN npm run build
7
+ ENTRYPOINT ["node", "/app/dist/cli/main.js"]
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026-present FORMATION GmbH
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,391 @@
1
+ # Querylight CLI
2
+
3
+ [![CI](https://github.com/formation-res/querylight-cli/actions/workflows/ci.yml/badge.svg)](https://github.com/formation-res/querylight-cli/actions/workflows/ci.yml)
4
+ [![npm](https://img.shields.io/npm/v/%40tryformation%2Fquerylight-cli?label=npm)](https://www.npmjs.com/package/@tryformation/querylight-cli)
5
+
6
+ `Querylight CLI` is a TypeScript command line application for building and querying local knowledge bases with Querylight TS.
7
+
8
+ - Package: `@tryformation/querylight-cli`
9
+ - Binary: `qli`
10
+ - Runtime: Node.js 22+
11
+
12
+ It is designed for local, inspectable workflows:
13
+
14
+ - ingest files, directories, URLs, and websites
15
+ - normalize content into Markdown-like text
16
+ - chunk documents for retrieval
17
+ - build a portable local Querylight index
18
+ - search and generate retrieval context for external agents and tools
19
+ - inspect workspace state, diffs, and change reports
20
+
21
+ ## Install
22
+
23
+ Run without installing globally:
24
+
25
+ ```bash
26
+ bunx @tryformation/querylight-cli init
27
+ ```
28
+
29
+ Install as a dependency:
30
+
31
+ ```bash
32
+ npm install @tryformation/querylight-cli
33
+ ```
34
+
35
+ Then run:
36
+
37
+ ```bash
38
+ npx qli --help
39
+ ```
40
+
41
+ ## Release
42
+
43
+ Publish releases from semantic version tags such as `0.1.1`.
44
+
45
+ The GitHub Actions publish workflow publishes `@tryformation/querylight-cli` to the public npm registry.
46
+
47
+ Configure npm trusted publishing for this repository before the first release. The publish workflow uses GitHub OIDC and does not use an `NPM_TOKEN` secret.
48
+
49
+ ### Local Development with `npm link`
50
+
51
+ If you are working from a local checkout of this repository and want a real `qli` command available in any directory:
52
+
53
+ ```bash
54
+ cd /path/to/querylight-cli
55
+ npm install
56
+ npm run build
57
+ npm link
58
+ ```
59
+
60
+ After that, you can use `qli` anywhere on your machine:
61
+
62
+ ```bash
63
+ cd /some/project
64
+ qli --help
65
+ ```
66
+
67
+ To remove the linked command later:
68
+
69
+ ```bash
70
+ npm unlink -g @tryformation/querylight-cli
71
+ ```
72
+
73
+ ## Quick Start
74
+
75
+ Initialize a workspace:
76
+
77
+ ```bash
78
+ qli init
79
+ ```
80
+
81
+ Add a local docs directory:
82
+
83
+ ```bash
84
+ qli source add directory ./docs --name "Local Docs" --tag docs
85
+ ```
86
+
87
+ Build the knowledge base:
88
+
89
+ ```bash
90
+ qli rebuild
91
+ ```
92
+
93
+ Search it:
94
+
95
+ ```bash
96
+ qli search "API authentication"
97
+ qli search --source-type rss --since 2026-05-01 --has-publication-date
98
+ ```
99
+
100
+ Find related documents for an existing one:
101
+
102
+ ```bash
103
+ qli related <document-id-or-uri>
104
+ ```
105
+
106
+ Generate retrieval context:
107
+
108
+ ```bash
109
+ qli context "How do I authenticate API requests?" --top-k 8
110
+ ```
111
+
112
+ ## Example: Index `querylight.tryformation.com`
113
+
114
+ This example uses a local linked build of `qli` to create a test knowledge base for the Querylight documentation website.
115
+
116
+ 1. Link the local CLI:
117
+
118
+ ```bash
119
+ cd /path/to/querylight-cli
120
+ npm install
121
+ npm run build
122
+ npm link
123
+ ```
124
+
125
+ 2. Create a fresh test workspace:
126
+
127
+ ```bash
128
+ mkdir -p ~/querylight-ts-search
129
+ cd ~/querylight-ts-search
130
+ ```
131
+
132
+ 3. Initialize the knowledge base:
133
+
134
+ ```bash
135
+ qli init
136
+ ```
137
+
138
+ 4. Add the Querylight website as a source:
139
+
140
+ ```bash
141
+ qli source add website https://querylight.tryformation.com \
142
+ --name "Querylight TS Docs" \
143
+ --max-depth 2 \
144
+ --max-pages 50 \
145
+ --include /docs/ \
146
+ --tag docs
147
+ ```
148
+
149
+ 5. Build the local index:
150
+
151
+ ```bash
152
+ qli rebuild
153
+ ```
154
+
155
+ 6. Inspect and query the result:
156
+
157
+ ```bash
158
+ qli status
159
+ qli source list
160
+ qli search "BM25 ranking"
161
+ qli context "How does Querylight TS handle BM25 ranking?" --top-k 8
162
+ ```
163
+
164
+ If you want the workspace somewhere else, use:
165
+
166
+ ```bash
167
+ qli --workspace /custom/path/.kb <command>
168
+ ```
169
+
170
+ ## Workspace
171
+
172
+ The default workspace is `.kb/`.
173
+
174
+ ```text
175
+ .kb/
176
+ config.yaml
177
+ sources/
178
+ sources.jsonl
179
+ documents/
180
+ documents.jsonl
181
+ chunks/
182
+ chunks.jsonl
183
+ raw/
184
+ normalized/
185
+ indexes/
186
+ latest.json
187
+ latest.meta.json
188
+ runs/
189
+ logs/
190
+ ```
191
+
192
+ Use a custom workspace with:
193
+
194
+ ```bash
195
+ qli --workspace ./my-kb <command>
196
+ ```
197
+
198
+ ## Supported Sources
199
+
200
+ Current source types:
201
+
202
+ - `file`
203
+ - `directory`
204
+ - `url`
205
+ - `website`
206
+ - `rss`
207
+ - `markdown`
208
+ - `text`
209
+
210
+ Current local file ingestion support:
211
+
212
+ - `.md`
213
+ - `.txt`
214
+ - `.html`
215
+ - `.htm`
216
+ - `.pdf`
217
+ - `.docx`
218
+
219
+ ## Commands
220
+
221
+ All commands support:
222
+
223
+ ```bash
224
+ --workspace <path>
225
+ --config <path>
226
+ --json
227
+ --verbose
228
+ --quiet
229
+ ```
230
+
231
+ ### Initialize
232
+
233
+ ```bash
234
+ qli init
235
+ qli init --workspace ./kb
236
+ qli init --force
237
+ ```
238
+
239
+ ### Manage Sources
240
+
241
+ Add sources:
242
+
243
+ ```bash
244
+ qli source add file ./docs/guide.md --name "Guide"
245
+ qli source add directory ./docs --name "Docs" --tag docs
246
+ qli source add url https://example.com/docs/auth --name "Auth Page"
247
+ qli source add website https://example.com --name "Example Site" --max-depth 2 --max-pages 50
248
+ qli source add rss https://example.com/feed.xml --name "Release Feed"
249
+ ```
250
+
251
+ List and manage them:
252
+
253
+ ```bash
254
+ qli source list
255
+ qli source config <source-id> --retention-days 30
256
+ qli source config <source-id> --name "Docs Feed" --tag rss docs
257
+ qli source disable <source-id>
258
+ qli source enable <source-id>
259
+ qli source remove <source-id>
260
+ ```
261
+
262
+ ### Find Related Documents
263
+
264
+ Build dense vectors first:
265
+
266
+ ```bash
267
+ qli models pull --dense
268
+ qli rebuild
269
+ ```
270
+
271
+ Or pull every model that is available on the current machine:
272
+
273
+ ```bash
274
+ qli models pull
275
+ ```
276
+
277
+ Then ask for documents related to an existing document id or URI:
278
+
279
+ ```bash
280
+ qli related <document-id>
281
+ qli related https://example.com/docs/auth
282
+ ```
283
+
284
+ ### Ingest, Chunk, Index
285
+
286
+ ```bash
287
+ qli ingest
288
+ qli chunk
289
+ qli index build
290
+ ```
291
+
292
+ Run the full pipeline:
293
+
294
+ ```bash
295
+ qli rebuild
296
+ qli rebuild --source <source-id>
297
+ qli rebuild --changed-only
298
+ ```
299
+
300
+ ### Search and Retrieval
301
+
302
+ Search:
303
+
304
+ ```bash
305
+ qli search "pricing API limits"
306
+ qli search "refund policy" --tag support --top-k 20
307
+ qli search --source-type rss,url --since 2026-05-01 --has-publication-date --top-k 25
308
+ qli search --source-name "Release Feed,Company Blog" --uri-prefix https://example.com/news,https://example.com/blog
309
+ qli search --source-type rss,url --top-k 25 --json
310
+ qli search "authentication" --json
311
+ ```
312
+
313
+ Build retrieval context:
314
+
315
+ ```bash
316
+ qli context "How do I configure the API?"
317
+ qli context "What changed in pricing?" --top-k 12 --max-chars 12000
318
+ ```
319
+
320
+ ### Change Inspection
321
+
322
+ ```bash
323
+ qli diff
324
+ qli diff --source <source-id>
325
+ qli diff --document <document-id>
326
+ qli diff --since 2026-05-01
327
+ ```
328
+
329
+ ```bash
330
+ qli report changes --since 2026-05-01
331
+ qli report changes --source <source-id>
332
+ ```
333
+
334
+ ### Workspace Inspection
335
+
336
+ ```bash
337
+ qli status
338
+ qli doctor
339
+ ```
340
+
341
+ ## JSON Output
342
+
343
+ Agent-facing and automation-friendly commands support `--json`.
344
+
345
+ The output envelope is:
346
+
347
+ ```json
348
+ {
349
+ "ok": true,
350
+ "command": "search",
351
+ "workspace": "/absolute/path/.kb",
352
+ "version": "0.1.0",
353
+ "data": {}
354
+ }
355
+ ```
356
+
357
+ ## Docker
358
+
359
+ Build the image:
360
+
361
+ ```bash
362
+ docker build -t querylight-cli .
363
+ ```
364
+
365
+ Run commands against a mounted workspace:
366
+
367
+ ```bash
368
+ docker run --rm -v "$PWD:/data" querylight-cli init --workspace /data/.kb
369
+ docker run --rm -v "$PWD:/data" querylight-cli rebuild --workspace /data/.kb
370
+ docker run --rm -v "$PWD:/data" querylight-cli search --workspace /data/.kb "authentication"
371
+ ```
372
+
373
+ ## Development
374
+
375
+ Install dependencies:
376
+
377
+ ```bash
378
+ npm install
379
+ ```
380
+
381
+ Run checks:
382
+
383
+ ```bash
384
+ npm run check
385
+ ```
386
+
387
+ Build:
388
+
389
+ ```bash
390
+ npm run build
391
+ ```
@@ -0,0 +1,4 @@
1
+ import type { ChunkRecord } from "../types/models.js";
2
+ export declare function chunksFile(workspacePath: string): string;
3
+ export declare function loadChunks(workspacePath: string): Promise<ChunkRecord[]>;
4
+ export declare function saveChunks(workspacePath: string, chunks: ChunkRecord[]): Promise<void>;
@@ -0,0 +1,9 @@
1
+ import type { ChunkRecord, DocumentRecord, WorkspaceConfig } from "../types/models.js";
2
+ export declare function buildChunksForDocument(document: DocumentRecord, markdown: string, config: WorkspaceConfig, prior?: Map<string, ChunkRecord>, seenAt?: string): ChunkRecord[];
3
+ export declare function chunkDocuments({ workspacePath, sourceId, documentId }: {
4
+ workspacePath: string;
5
+ sourceId?: string;
6
+ documentId?: string;
7
+ }): Promise<{
8
+ chunksWritten: number;
9
+ }>;
@@ -0,0 +1,4 @@
1
+ import type { RelatedDocumentResult, SearchResult, Source } from "../types/models.js";
2
+ export declare function formatSourcesTable(sources: Source[]): string;
3
+ export declare function formatSearchResults(results: SearchResult[]): string;
4
+ export declare function formatRelatedDocuments(results: RelatedDocumentResult[]): string;
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};