4lt7ab-grimoire-cli 0.0.2__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,3 +13,4 @@ dist/
13
13
  *.db
14
14
  .grimoire/
15
15
  .local/
16
+ CLAUDE.local.md
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: 4lt7ab-grimoire-cli
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Summary: CLI for managing a grimoire datastore
5
5
  Requires-Python: >=3.12
6
6
  Requires-Dist: 4lt7ab-grimoire
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "4lt7ab-grimoire-cli"
3
- version = "0.0.2"
3
+ version = "0.0.4"
4
4
  description = "CLI for managing a grimoire datastore"
5
5
  requires-python = ">=3.12"
6
6
  dependencies = ["4lt7ab-grimoire", "typer>=0.12"]
@@ -0,0 +1,435 @@
1
+ import json
2
+ from datetime import datetime
3
+ from pathlib import Path
4
+ from typing import Annotated, NoReturn
5
+
6
+ import typer
7
+ from grimoire import Entry, Grimoire, GrimoireError, GrimoireNotFound
8
+
9
+ RECOGNIZED_FIELDS = {"kind", "content", "payload", "threshold", "keywords"}
10
+ REQUIRED_FIELDS = {"kind", "content"}
11
+ # Each batch is one atomic transaction; on failure, only the in-flight batch
12
+ # rolls back. Smaller = better recovery granularity, slightly more overhead.
13
+ # 200 captures ~95% of fastembed's batching speedup vs single calls.
14
+ INGEST_BATCH_SIZE = 200
15
+ PROGRESS_EVERY = 1000
16
+ DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
17
+ DB_FILENAME = "grimoire.db"
18
+ MODELS_DIRNAME = "models"
19
+
20
+ # Reusable annotations — every command needs --mount, and the read commands
21
+ # share --kind, --k, --created-after, and --created-before. Defining them
22
+ # once keeps help text in lockstep across the CLI.
23
+ Mount = Annotated[
24
+ Path,
25
+ typer.Option(
26
+ help="Path to the grimoire mount directory.",
27
+ envvar="GRIMOIRE_MOUNT",
28
+ ),
29
+ ]
30
+ Kind = Annotated[
31
+ str | None,
32
+ typer.Option(help="Restrict results to entries of this kind."),
33
+ ]
34
+ K = Annotated[int, typer.Option(help="Number of results to return.")]
35
+ CreatedAfter = Annotated[
36
+ str | None,
37
+ typer.Option(
38
+ "--created-after",
39
+ help="ISO 8601 lower bound on entry creation time (inclusive).",
40
+ ),
41
+ ]
42
+ CreatedBefore = Annotated[
43
+ str | None,
44
+ typer.Option(
45
+ "--created-before",
46
+ help="ISO 8601 upper bound on entry creation time (exclusive).",
47
+ ),
48
+ ]
49
+
50
+
51
+ app = typer.Typer(
52
+ name="grimoire",
53
+ no_args_is_help=True,
54
+ pretty_exceptions_enable=False,
55
+ epilog=(
56
+ "Environment variables:\n\n"
57
+ " GRIMOIRE_MOUNT Default mount directory. Overridden by --mount."
58
+ ),
59
+ )
60
+
61
+
62
+ @app.callback()
63
+ def _callback() -> None:
64
+ """Manage a grimoire datastore — a single-file SQLite + sqlite-vec semantic store.
65
+
66
+ Every command operates over a mount directory that holds the SQLite file
67
+ (<mount>/grimoire.db) and the embedder model cache (<mount>/models/).
68
+ Specify it with --mount <dir> or set the GRIMOIRE_MOUNT environment
69
+ variable once for the shell.
70
+
71
+ Read commands (search, list, get, info) print one JSON object per line —
72
+ pipe to `jq` for filtering.
73
+
74
+ Run `grimoire init` for one-time setup, then `grimoire <command> --help`
75
+ for the flags and arguments of any subcommand.
76
+ """
77
+
78
+
79
+ @app.command()
80
+ def init(
81
+ mount: Mount,
82
+ model: Annotated[
83
+ str | None,
84
+ typer.Option(
85
+ help=(
86
+ "fastembed model name. Used only when creating a new grimoire; "
87
+ "passing this against an existing grimoire whose locked model "
88
+ "differs is an error."
89
+ ),
90
+ ),
91
+ ] = None,
92
+ ) -> None:
93
+ """Create or verify a grimoire and warm its embedder. One-time setup."""
94
+ db = mount / DB_FILENAME
95
+ cache_folder = mount / MODELS_DIRNAME
96
+ mount.mkdir(parents=True, exist_ok=True)
97
+ cache_folder.mkdir(parents=True, exist_ok=True)
98
+
99
+ stats = Grimoire.peek(db)
100
+ if stats is not None and model is not None and model != stats.model:
101
+ _fail(
102
+ f"file is locked to model {stats.model!r}; "
103
+ f"drop --model or use a different --mount path"
104
+ )
105
+
106
+ model_name = stats.model if stats else (model or DEFAULT_MODEL)
107
+
108
+ try:
109
+ from grimoire.embedders import FastembedEmbedder
110
+
111
+ embedder = FastembedEmbedder(model_name, cache_folder=cache_folder)
112
+ except ImportError as exc:
113
+ _fail(str(exc))
114
+
115
+ try:
116
+ Grimoire.init(db, embedder=embedder).close()
117
+ except GrimoireError as exc:
118
+ _fail(str(exc))
119
+
120
+ _emit_info(db)
121
+
122
+
123
+ @app.command()
124
+ def ingest(
125
+ file: Annotated[
126
+ Path,
127
+ typer.Argument(
128
+ help="Path to a JSONL file. One JSON object per line.",
129
+ exists=True,
130
+ file_okay=True,
131
+ dir_okay=False,
132
+ readable=True,
133
+ ),
134
+ ],
135
+ mount: Mount,
136
+ ) -> None:
137
+ """Bulk-ingest records into a grimoire."""
138
+ records = _load_records(file)
139
+ if not records:
140
+ typer.echo(f"No records to ingest from {file}")
141
+ return
142
+
143
+ total = 0
144
+ last_milestone = 0
145
+ with _open_grimoire(mount) as g:
146
+ for chunk_start in range(0, len(records), INGEST_BATCH_SIZE):
147
+ chunk = records[chunk_start : chunk_start + INGEST_BATCH_SIZE]
148
+ g.add_many(chunk)
149
+ total += len(chunk)
150
+ milestone = total // PROGRESS_EVERY
151
+ if milestone > last_milestone and total < len(records):
152
+ typer.echo(f" ingested {total}...", err=True)
153
+ last_milestone = milestone
154
+
155
+ typer.echo(f"Ingested {len(records)} records into {mount / DB_FILENAME}")
156
+
157
+
158
+ @app.command(name="vector-search")
159
+ def vector_search(
160
+ query: Annotated[str, typer.Argument(help="Query text to embed and search for.")],
161
+ mount: Mount,
162
+ kind: Kind = None,
163
+ k: K = 10,
164
+ dynamic_threshold: Annotated[
165
+ bool,
166
+ typer.Option(
167
+ "--dynamic-threshold",
168
+ help="Filter results by each entry's stored similarity threshold.",
169
+ ),
170
+ ] = False,
171
+ created_after: CreatedAfter = None,
172
+ created_before: CreatedBefore = None,
173
+ ) -> None:
174
+ """Run a vector (semantic) search against a grimoire."""
175
+ after = _parse_iso("--created-after", created_after)
176
+ before = _parse_iso("--created-before", created_before)
177
+ with _open_grimoire(mount) as g:
178
+ for entry in g.vector_search(
179
+ query,
180
+ kind=kind,
181
+ k=k,
182
+ dynamic_threshold=dynamic_threshold,
183
+ created_after=after,
184
+ created_before=before,
185
+ ):
186
+ _print_entry(entry)
187
+
188
+
189
+ @app.command(name="keyword-search")
190
+ def keyword_search(
191
+ query: Annotated[
192
+ str,
193
+ typer.Argument(
194
+ help=(
195
+ "FTS5 query string. Plain words match tokens; supports phrases, "
196
+ "prefix matches, and boolean operators."
197
+ ),
198
+ ),
199
+ ],
200
+ mount: Mount,
201
+ kind: Kind = None,
202
+ k: K = 10,
203
+ created_after: CreatedAfter = None,
204
+ created_before: CreatedBefore = None,
205
+ ) -> None:
206
+ """Run a keyword (FTS5) search against a grimoire."""
207
+ after = _parse_iso("--created-after", created_after)
208
+ before = _parse_iso("--created-before", created_before)
209
+ with _open_grimoire(mount) as g:
210
+ for entry in g.keyword_search(
211
+ query,
212
+ kind=kind,
213
+ k=k,
214
+ created_after=after,
215
+ created_before=before,
216
+ ):
217
+ _print_entry(entry)
218
+
219
+
220
+ @app.command()
221
+ def add(
222
+ content: Annotated[str, typer.Argument(help="Content text for the new entry.")],
223
+ mount: Mount,
224
+ kind: Annotated[str, typer.Option(help="Kind label for the entry.")] = "note",
225
+ payload: Annotated[
226
+ str | None,
227
+ typer.Option(help="Optional JSON object to attach as the entry payload."),
228
+ ] = None,
229
+ threshold: Annotated[
230
+ float | None,
231
+ typer.Option(help="Optional per-entry similarity threshold."),
232
+ ] = None,
233
+ keyword: Annotated[
234
+ list[str] | None,
235
+ typer.Option(
236
+ "--keyword",
237
+ help=(
238
+ "Add an explicit search keyword to boost recall in keyword-search. "
239
+ "Repeatable: --keyword foo --keyword bar."
240
+ ),
241
+ ),
242
+ ] = None,
243
+ ) -> None:
244
+ """Add a single record to a grimoire."""
245
+ payload_obj: dict | None = None
246
+ if payload is not None:
247
+ try:
248
+ parsed = json.loads(payload)
249
+ except json.JSONDecodeError as exc:
250
+ _fail(f"--payload is not valid JSON: {exc.msg}")
251
+ if not isinstance(parsed, dict):
252
+ _fail("--payload must be a JSON object")
253
+ payload_obj = parsed
254
+ with _open_grimoire(mount) as g:
255
+ entry = g.add(
256
+ kind=kind,
257
+ content=content,
258
+ payload=payload_obj,
259
+ threshold=threshold,
260
+ keywords=keyword or None,
261
+ )
262
+ _print_entry(entry)
263
+
264
+
265
+ @app.command()
266
+ def info(mount: Mount) -> None:
267
+ """Show metadata and counts for a grimoire file."""
268
+ _emit_info(mount / DB_FILENAME)
269
+
270
+
271
+ @app.command(name="list")
272
+ def list_entries(
273
+ mount: Mount,
274
+ kind: Kind = None,
275
+ limit: Annotated[
276
+ int, typer.Option(help="Maximum number of entries to return.")
277
+ ] = 100,
278
+ after_id: Annotated[
279
+ str | None, typer.Option(help="Cursor: return entries with id > this value.")
280
+ ] = None,
281
+ created_after: CreatedAfter = None,
282
+ created_before: CreatedBefore = None,
283
+ ) -> None:
284
+ """Paginate entries in chronological order (by id)."""
285
+ after = _parse_iso("--created-after", created_after)
286
+ before = _parse_iso("--created-before", created_before)
287
+ with _open_grimoire(mount) as g:
288
+ for entry in g.list(
289
+ kind=kind,
290
+ limit=limit,
291
+ after_id=after_id,
292
+ created_after=after,
293
+ created_before=before,
294
+ ):
295
+ _print_entry(entry)
296
+
297
+
298
+ @app.command()
299
+ def get(
300
+ entry_id: Annotated[str, typer.Argument(help="Entry id (ULID).")],
301
+ mount: Mount,
302
+ ) -> None:
303
+ """Fetch a single entry by id."""
304
+ with _open_grimoire(mount) as g:
305
+ entry = g.get(entry_id)
306
+ if entry is None:
307
+ _fail(f"No entry with id {entry_id!r}")
308
+ _print_entry(entry)
309
+
310
+
311
+ @app.command()
312
+ def delete(
313
+ entry_id: Annotated[str, typer.Argument(help="Entry id (ULID).")],
314
+ mount: Mount,
315
+ ) -> None:
316
+ """Delete an entry by id."""
317
+ with _open_grimoire(mount) as g:
318
+ if not g.delete(entry_id):
319
+ _fail(f"No entry with id {entry_id!r}")
320
+ typer.echo(f"Deleted {entry_id}")
321
+
322
+
323
+ def _open_grimoire(mount: Path) -> Grimoire:
324
+ """Open the grimoire under `mount`, auto-detecting the model from the file.
325
+
326
+ Surfaces `GrimoireNotFound` as a friendly "run grimoire init first" error.
327
+ """
328
+ db = mount / DB_FILENAME
329
+ cache_folder = mount / MODELS_DIRNAME
330
+ cache_folder.mkdir(parents=True, exist_ok=True)
331
+ stats = Grimoire.peek(db)
332
+ if stats is None:
333
+ _fail(f"no grimoire at {db}; run 'grimoire init' first")
334
+ try:
335
+ from grimoire.embedders import FastembedEmbedder
336
+
337
+ embedder = FastembedEmbedder(stats.model, cache_folder=cache_folder)
338
+ except ImportError as exc:
339
+ _fail(str(exc))
340
+ try:
341
+ return Grimoire.open(db, embedder=embedder)
342
+ except GrimoireNotFound:
343
+ _fail(f"no grimoire at {db}; run 'grimoire init' first")
344
+ except GrimoireError as exc:
345
+ _fail(str(exc))
346
+
347
+
348
+ def _emit_info(db: Path) -> None:
349
+ stats = Grimoire.peek(db)
350
+ if stats is None:
351
+ _fail(f"No grimoire at {db}")
352
+ typer.echo(
353
+ json.dumps(
354
+ {
355
+ "path": str(db),
356
+ "model": stats.model,
357
+ "dimension": stats.dimension,
358
+ "schema_version": stats.schema_version,
359
+ "entry_count": stats.entry_count,
360
+ "kinds": stats.kinds,
361
+ }
362
+ )
363
+ )
364
+
365
+
366
+ def _load_records(path: Path) -> list[dict]:
367
+ records: list[dict] = []
368
+ with path.open(encoding="utf-8") as f:
369
+ for line_no, raw in enumerate(f, 1):
370
+ line = raw.strip()
371
+ if not line:
372
+ continue
373
+ try:
374
+ record = json.loads(line)
375
+ except json.JSONDecodeError as exc:
376
+ _fail(f"{path}:{line_no}: invalid JSON: {exc.msg}")
377
+ _validate_record(record, path, line_no)
378
+ records.append(record)
379
+ return records
380
+
381
+
382
+ def _validate_record(record: object, path: Path, line_no: int) -> None:
383
+ if not isinstance(record, dict):
384
+ _fail(f"{path}:{line_no}: record must be a JSON object")
385
+ missing = REQUIRED_FIELDS - record.keys()
386
+ if missing:
387
+ _fail(f"{path}:{line_no}: missing required fields: {sorted(missing)}")
388
+ unknown = record.keys() - RECOGNIZED_FIELDS
389
+ if unknown:
390
+ _fail(
391
+ f"{path}:{line_no}: unknown fields {sorted(unknown)}. "
392
+ f"Put extra metadata in `payload`."
393
+ )
394
+
395
+
396
+ def _print_entry(entry: Entry) -> None:
397
+ record: dict[str, object] = {
398
+ "id": entry.id,
399
+ "kind": entry.kind,
400
+ "content": entry.content,
401
+ }
402
+ if entry.keywords is not None:
403
+ record["keywords"] = entry.keywords
404
+ if entry.payload is not None:
405
+ record["payload"] = entry.payload
406
+ if entry.threshold is not None:
407
+ record["threshold"] = entry.threshold
408
+ if entry.distance is not None:
409
+ record["distance"] = entry.distance
410
+ if entry.rank is not None:
411
+ record["rank"] = entry.rank
412
+ typer.echo(json.dumps(record))
413
+
414
+
415
+ def _parse_iso(flag: str, value: str | None) -> datetime | None:
416
+ if value is None:
417
+ return None
418
+ try:
419
+ return datetime.fromisoformat(value)
420
+ except ValueError:
421
+ _fail(f"{flag} must be ISO 8601 (e.g. 2026-05-04 or 2026-05-04T10:00:00)")
422
+
423
+
424
+ def _fail(message: str) -> NoReturn:
425
+ typer.echo(f"Error: {message}", err=True)
426
+ raise typer.Exit(code=1)
427
+
428
+
429
+ def main() -> None:
430
+ """Console-script entrypoint for the `grimoire` CLI."""
431
+ app()
432
+
433
+
434
+ if __name__ == "__main__":
435
+ main()