4lt7ab-grimoire-cli 0.0.2__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {4lt7ab_grimoire_cli-0.0.2 → 4lt7ab_grimoire_cli-0.0.4}/.gitignore +1 -0
- {4lt7ab_grimoire_cli-0.0.2 → 4lt7ab_grimoire_cli-0.0.4}/PKG-INFO +1 -1
- {4lt7ab_grimoire_cli-0.0.2 → 4lt7ab_grimoire_cli-0.0.4}/pyproject.toml +1 -1
- 4lt7ab_grimoire_cli-0.0.4/src/grimoire_cli/main.py +435 -0
- 4lt7ab_grimoire_cli-0.0.4/tests/test_smoke.py +667 -0
- 4lt7ab_grimoire_cli-0.0.2/src/grimoire_cli/main.py +0 -372
- 4lt7ab_grimoire_cli-0.0.2/tests/test_smoke.py +0 -376
- {4lt7ab_grimoire_cli-0.0.2 → 4lt7ab_grimoire_cli-0.0.4}/src/grimoire_cli/__init__.py +0 -0
- {4lt7ab_grimoire_cli-0.0.2 → 4lt7ab_grimoire_cli-0.0.4}/src/grimoire_cli/errors.py +0 -0
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated, NoReturn
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
from grimoire import Entry, Grimoire, GrimoireError, GrimoireNotFound
|
|
8
|
+
|
|
9
|
+
RECOGNIZED_FIELDS = {"kind", "content", "payload", "threshold", "keywords"}
|
|
10
|
+
REQUIRED_FIELDS = {"kind", "content"}
|
|
11
|
+
# Each batch is one atomic transaction; on failure, only the in-flight batch
|
|
12
|
+
# rolls back. Smaller = better recovery granularity, slightly more overhead.
|
|
13
|
+
# 200 captures ~95% of fastembed's batching speedup vs single calls.
|
|
14
|
+
INGEST_BATCH_SIZE = 200
|
|
15
|
+
PROGRESS_EVERY = 1000
|
|
16
|
+
DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
|
|
17
|
+
DB_FILENAME = "grimoire.db"
|
|
18
|
+
MODELS_DIRNAME = "models"
|
|
19
|
+
|
|
20
|
+
# Reusable annotations — every command needs --mount, and the read commands
|
|
21
|
+
# share --kind, --k, --created-after, and --created-before. Defining them
|
|
22
|
+
# once keeps help text in lockstep across the CLI.
|
|
23
|
+
Mount = Annotated[
|
|
24
|
+
Path,
|
|
25
|
+
typer.Option(
|
|
26
|
+
help="Path to the grimoire mount directory.",
|
|
27
|
+
envvar="GRIMOIRE_MOUNT",
|
|
28
|
+
),
|
|
29
|
+
]
|
|
30
|
+
Kind = Annotated[
|
|
31
|
+
str | None,
|
|
32
|
+
typer.Option(help="Restrict results to entries of this kind."),
|
|
33
|
+
]
|
|
34
|
+
K = Annotated[int, typer.Option(help="Number of results to return.")]
|
|
35
|
+
CreatedAfter = Annotated[
|
|
36
|
+
str | None,
|
|
37
|
+
typer.Option(
|
|
38
|
+
"--created-after",
|
|
39
|
+
help="ISO 8601 lower bound on entry creation time (inclusive).",
|
|
40
|
+
),
|
|
41
|
+
]
|
|
42
|
+
CreatedBefore = Annotated[
|
|
43
|
+
str | None,
|
|
44
|
+
typer.Option(
|
|
45
|
+
"--created-before",
|
|
46
|
+
help="ISO 8601 upper bound on entry creation time (exclusive).",
|
|
47
|
+
),
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
app = typer.Typer(
|
|
52
|
+
name="grimoire",
|
|
53
|
+
no_args_is_help=True,
|
|
54
|
+
pretty_exceptions_enable=False,
|
|
55
|
+
epilog=(
|
|
56
|
+
"Environment variables:\n\n"
|
|
57
|
+
" GRIMOIRE_MOUNT Default mount directory. Overridden by --mount."
|
|
58
|
+
),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@app.callback()
|
|
63
|
+
def _callback() -> None:
|
|
64
|
+
"""Manage a grimoire datastore — a single-file SQLite + sqlite-vec semantic store.
|
|
65
|
+
|
|
66
|
+
Every command operates over a mount directory that holds the SQLite file
|
|
67
|
+
(<mount>/grimoire.db) and the embedder model cache (<mount>/models/).
|
|
68
|
+
Specify it with --mount <dir> or set the GRIMOIRE_MOUNT environment
|
|
69
|
+
variable once for the shell.
|
|
70
|
+
|
|
71
|
+
Read commands (search, list, get, info) print one JSON object per line —
|
|
72
|
+
pipe to `jq` for filtering.
|
|
73
|
+
|
|
74
|
+
Run `grimoire init` for one-time setup, then `grimoire <command> --help`
|
|
75
|
+
for the flags and arguments of any subcommand.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@app.command()
|
|
80
|
+
def init(
|
|
81
|
+
mount: Mount,
|
|
82
|
+
model: Annotated[
|
|
83
|
+
str | None,
|
|
84
|
+
typer.Option(
|
|
85
|
+
help=(
|
|
86
|
+
"fastembed model name. Used only when creating a new grimoire; "
|
|
87
|
+
"passing this against an existing grimoire whose locked model "
|
|
88
|
+
"differs is an error."
|
|
89
|
+
),
|
|
90
|
+
),
|
|
91
|
+
] = None,
|
|
92
|
+
) -> None:
|
|
93
|
+
"""Create or verify a grimoire and warm its embedder. One-time setup."""
|
|
94
|
+
db = mount / DB_FILENAME
|
|
95
|
+
cache_folder = mount / MODELS_DIRNAME
|
|
96
|
+
mount.mkdir(parents=True, exist_ok=True)
|
|
97
|
+
cache_folder.mkdir(parents=True, exist_ok=True)
|
|
98
|
+
|
|
99
|
+
stats = Grimoire.peek(db)
|
|
100
|
+
if stats is not None and model is not None and model != stats.model:
|
|
101
|
+
_fail(
|
|
102
|
+
f"file is locked to model {stats.model!r}; "
|
|
103
|
+
f"drop --model or use a different --mount path"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
model_name = stats.model if stats else (model or DEFAULT_MODEL)
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
from grimoire.embedders import FastembedEmbedder
|
|
110
|
+
|
|
111
|
+
embedder = FastembedEmbedder(model_name, cache_folder=cache_folder)
|
|
112
|
+
except ImportError as exc:
|
|
113
|
+
_fail(str(exc))
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
Grimoire.init(db, embedder=embedder).close()
|
|
117
|
+
except GrimoireError as exc:
|
|
118
|
+
_fail(str(exc))
|
|
119
|
+
|
|
120
|
+
_emit_info(db)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@app.command()
|
|
124
|
+
def ingest(
|
|
125
|
+
file: Annotated[
|
|
126
|
+
Path,
|
|
127
|
+
typer.Argument(
|
|
128
|
+
help="Path to a JSONL file. One JSON object per line.",
|
|
129
|
+
exists=True,
|
|
130
|
+
file_okay=True,
|
|
131
|
+
dir_okay=False,
|
|
132
|
+
readable=True,
|
|
133
|
+
),
|
|
134
|
+
],
|
|
135
|
+
mount: Mount,
|
|
136
|
+
) -> None:
|
|
137
|
+
"""Bulk-ingest records into a grimoire."""
|
|
138
|
+
records = _load_records(file)
|
|
139
|
+
if not records:
|
|
140
|
+
typer.echo(f"No records to ingest from {file}")
|
|
141
|
+
return
|
|
142
|
+
|
|
143
|
+
total = 0
|
|
144
|
+
last_milestone = 0
|
|
145
|
+
with _open_grimoire(mount) as g:
|
|
146
|
+
for chunk_start in range(0, len(records), INGEST_BATCH_SIZE):
|
|
147
|
+
chunk = records[chunk_start : chunk_start + INGEST_BATCH_SIZE]
|
|
148
|
+
g.add_many(chunk)
|
|
149
|
+
total += len(chunk)
|
|
150
|
+
milestone = total // PROGRESS_EVERY
|
|
151
|
+
if milestone > last_milestone and total < len(records):
|
|
152
|
+
typer.echo(f" ingested {total}...", err=True)
|
|
153
|
+
last_milestone = milestone
|
|
154
|
+
|
|
155
|
+
typer.echo(f"Ingested {len(records)} records into {mount / DB_FILENAME}")
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@app.command(name="vector-search")
|
|
159
|
+
def vector_search(
|
|
160
|
+
query: Annotated[str, typer.Argument(help="Query text to embed and search for.")],
|
|
161
|
+
mount: Mount,
|
|
162
|
+
kind: Kind = None,
|
|
163
|
+
k: K = 10,
|
|
164
|
+
dynamic_threshold: Annotated[
|
|
165
|
+
bool,
|
|
166
|
+
typer.Option(
|
|
167
|
+
"--dynamic-threshold",
|
|
168
|
+
help="Filter results by each entry's stored similarity threshold.",
|
|
169
|
+
),
|
|
170
|
+
] = False,
|
|
171
|
+
created_after: CreatedAfter = None,
|
|
172
|
+
created_before: CreatedBefore = None,
|
|
173
|
+
) -> None:
|
|
174
|
+
"""Run a vector (semantic) search against a grimoire."""
|
|
175
|
+
after = _parse_iso("--created-after", created_after)
|
|
176
|
+
before = _parse_iso("--created-before", created_before)
|
|
177
|
+
with _open_grimoire(mount) as g:
|
|
178
|
+
for entry in g.vector_search(
|
|
179
|
+
query,
|
|
180
|
+
kind=kind,
|
|
181
|
+
k=k,
|
|
182
|
+
dynamic_threshold=dynamic_threshold,
|
|
183
|
+
created_after=after,
|
|
184
|
+
created_before=before,
|
|
185
|
+
):
|
|
186
|
+
_print_entry(entry)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@app.command(name="keyword-search")
|
|
190
|
+
def keyword_search(
|
|
191
|
+
query: Annotated[
|
|
192
|
+
str,
|
|
193
|
+
typer.Argument(
|
|
194
|
+
help=(
|
|
195
|
+
"FTS5 query string. Plain words match tokens; supports phrases, "
|
|
196
|
+
"prefix matches, and boolean operators."
|
|
197
|
+
),
|
|
198
|
+
),
|
|
199
|
+
],
|
|
200
|
+
mount: Mount,
|
|
201
|
+
kind: Kind = None,
|
|
202
|
+
k: K = 10,
|
|
203
|
+
created_after: CreatedAfter = None,
|
|
204
|
+
created_before: CreatedBefore = None,
|
|
205
|
+
) -> None:
|
|
206
|
+
"""Run a keyword (FTS5) search against a grimoire."""
|
|
207
|
+
after = _parse_iso("--created-after", created_after)
|
|
208
|
+
before = _parse_iso("--created-before", created_before)
|
|
209
|
+
with _open_grimoire(mount) as g:
|
|
210
|
+
for entry in g.keyword_search(
|
|
211
|
+
query,
|
|
212
|
+
kind=kind,
|
|
213
|
+
k=k,
|
|
214
|
+
created_after=after,
|
|
215
|
+
created_before=before,
|
|
216
|
+
):
|
|
217
|
+
_print_entry(entry)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@app.command()
|
|
221
|
+
def add(
|
|
222
|
+
content: Annotated[str, typer.Argument(help="Content text for the new entry.")],
|
|
223
|
+
mount: Mount,
|
|
224
|
+
kind: Annotated[str, typer.Option(help="Kind label for the entry.")] = "note",
|
|
225
|
+
payload: Annotated[
|
|
226
|
+
str | None,
|
|
227
|
+
typer.Option(help="Optional JSON object to attach as the entry payload."),
|
|
228
|
+
] = None,
|
|
229
|
+
threshold: Annotated[
|
|
230
|
+
float | None,
|
|
231
|
+
typer.Option(help="Optional per-entry similarity threshold."),
|
|
232
|
+
] = None,
|
|
233
|
+
keyword: Annotated[
|
|
234
|
+
list[str] | None,
|
|
235
|
+
typer.Option(
|
|
236
|
+
"--keyword",
|
|
237
|
+
help=(
|
|
238
|
+
"Add an explicit search keyword to boost recall in keyword-search. "
|
|
239
|
+
"Repeatable: --keyword foo --keyword bar."
|
|
240
|
+
),
|
|
241
|
+
),
|
|
242
|
+
] = None,
|
|
243
|
+
) -> None:
|
|
244
|
+
"""Add a single record to a grimoire."""
|
|
245
|
+
payload_obj: dict | None = None
|
|
246
|
+
if payload is not None:
|
|
247
|
+
try:
|
|
248
|
+
parsed = json.loads(payload)
|
|
249
|
+
except json.JSONDecodeError as exc:
|
|
250
|
+
_fail(f"--payload is not valid JSON: {exc.msg}")
|
|
251
|
+
if not isinstance(parsed, dict):
|
|
252
|
+
_fail("--payload must be a JSON object")
|
|
253
|
+
payload_obj = parsed
|
|
254
|
+
with _open_grimoire(mount) as g:
|
|
255
|
+
entry = g.add(
|
|
256
|
+
kind=kind,
|
|
257
|
+
content=content,
|
|
258
|
+
payload=payload_obj,
|
|
259
|
+
threshold=threshold,
|
|
260
|
+
keywords=keyword or None,
|
|
261
|
+
)
|
|
262
|
+
_print_entry(entry)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
@app.command()
|
|
266
|
+
def info(mount: Mount) -> None:
|
|
267
|
+
"""Show metadata and counts for a grimoire file."""
|
|
268
|
+
_emit_info(mount / DB_FILENAME)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
@app.command(name="list")
|
|
272
|
+
def list_entries(
|
|
273
|
+
mount: Mount,
|
|
274
|
+
kind: Kind = None,
|
|
275
|
+
limit: Annotated[
|
|
276
|
+
int, typer.Option(help="Maximum number of entries to return.")
|
|
277
|
+
] = 100,
|
|
278
|
+
after_id: Annotated[
|
|
279
|
+
str | None, typer.Option(help="Cursor: return entries with id > this value.")
|
|
280
|
+
] = None,
|
|
281
|
+
created_after: CreatedAfter = None,
|
|
282
|
+
created_before: CreatedBefore = None,
|
|
283
|
+
) -> None:
|
|
284
|
+
"""Paginate entries in chronological order (by id)."""
|
|
285
|
+
after = _parse_iso("--created-after", created_after)
|
|
286
|
+
before = _parse_iso("--created-before", created_before)
|
|
287
|
+
with _open_grimoire(mount) as g:
|
|
288
|
+
for entry in g.list(
|
|
289
|
+
kind=kind,
|
|
290
|
+
limit=limit,
|
|
291
|
+
after_id=after_id,
|
|
292
|
+
created_after=after,
|
|
293
|
+
created_before=before,
|
|
294
|
+
):
|
|
295
|
+
_print_entry(entry)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
@app.command()
|
|
299
|
+
def get(
|
|
300
|
+
entry_id: Annotated[str, typer.Argument(help="Entry id (ULID).")],
|
|
301
|
+
mount: Mount,
|
|
302
|
+
) -> None:
|
|
303
|
+
"""Fetch a single entry by id."""
|
|
304
|
+
with _open_grimoire(mount) as g:
|
|
305
|
+
entry = g.get(entry_id)
|
|
306
|
+
if entry is None:
|
|
307
|
+
_fail(f"No entry with id {entry_id!r}")
|
|
308
|
+
_print_entry(entry)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
@app.command()
|
|
312
|
+
def delete(
|
|
313
|
+
entry_id: Annotated[str, typer.Argument(help="Entry id (ULID).")],
|
|
314
|
+
mount: Mount,
|
|
315
|
+
) -> None:
|
|
316
|
+
"""Delete an entry by id."""
|
|
317
|
+
with _open_grimoire(mount) as g:
|
|
318
|
+
if not g.delete(entry_id):
|
|
319
|
+
_fail(f"No entry with id {entry_id!r}")
|
|
320
|
+
typer.echo(f"Deleted {entry_id}")
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def _open_grimoire(mount: Path) -> Grimoire:
|
|
324
|
+
"""Open the grimoire under `mount`, auto-detecting the model from the file.
|
|
325
|
+
|
|
326
|
+
Surfaces `GrimoireNotFound` as a friendly "run grimoire init first" error.
|
|
327
|
+
"""
|
|
328
|
+
db = mount / DB_FILENAME
|
|
329
|
+
cache_folder = mount / MODELS_DIRNAME
|
|
330
|
+
cache_folder.mkdir(parents=True, exist_ok=True)
|
|
331
|
+
stats = Grimoire.peek(db)
|
|
332
|
+
if stats is None:
|
|
333
|
+
_fail(f"no grimoire at {db}; run 'grimoire init' first")
|
|
334
|
+
try:
|
|
335
|
+
from grimoire.embedders import FastembedEmbedder
|
|
336
|
+
|
|
337
|
+
embedder = FastembedEmbedder(stats.model, cache_folder=cache_folder)
|
|
338
|
+
except ImportError as exc:
|
|
339
|
+
_fail(str(exc))
|
|
340
|
+
try:
|
|
341
|
+
return Grimoire.open(db, embedder=embedder)
|
|
342
|
+
except GrimoireNotFound:
|
|
343
|
+
_fail(f"no grimoire at {db}; run 'grimoire init' first")
|
|
344
|
+
except GrimoireError as exc:
|
|
345
|
+
_fail(str(exc))
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def _emit_info(db: Path) -> None:
|
|
349
|
+
stats = Grimoire.peek(db)
|
|
350
|
+
if stats is None:
|
|
351
|
+
_fail(f"No grimoire at {db}")
|
|
352
|
+
typer.echo(
|
|
353
|
+
json.dumps(
|
|
354
|
+
{
|
|
355
|
+
"path": str(db),
|
|
356
|
+
"model": stats.model,
|
|
357
|
+
"dimension": stats.dimension,
|
|
358
|
+
"schema_version": stats.schema_version,
|
|
359
|
+
"entry_count": stats.entry_count,
|
|
360
|
+
"kinds": stats.kinds,
|
|
361
|
+
}
|
|
362
|
+
)
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def _load_records(path: Path) -> list[dict]:
|
|
367
|
+
records: list[dict] = []
|
|
368
|
+
with path.open(encoding="utf-8") as f:
|
|
369
|
+
for line_no, raw in enumerate(f, 1):
|
|
370
|
+
line = raw.strip()
|
|
371
|
+
if not line:
|
|
372
|
+
continue
|
|
373
|
+
try:
|
|
374
|
+
record = json.loads(line)
|
|
375
|
+
except json.JSONDecodeError as exc:
|
|
376
|
+
_fail(f"{path}:{line_no}: invalid JSON: {exc.msg}")
|
|
377
|
+
_validate_record(record, path, line_no)
|
|
378
|
+
records.append(record)
|
|
379
|
+
return records
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def _validate_record(record: object, path: Path, line_no: int) -> None:
|
|
383
|
+
if not isinstance(record, dict):
|
|
384
|
+
_fail(f"{path}:{line_no}: record must be a JSON object")
|
|
385
|
+
missing = REQUIRED_FIELDS - record.keys()
|
|
386
|
+
if missing:
|
|
387
|
+
_fail(f"{path}:{line_no}: missing required fields: {sorted(missing)}")
|
|
388
|
+
unknown = record.keys() - RECOGNIZED_FIELDS
|
|
389
|
+
if unknown:
|
|
390
|
+
_fail(
|
|
391
|
+
f"{path}:{line_no}: unknown fields {sorted(unknown)}. "
|
|
392
|
+
f"Put extra metadata in `payload`."
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def _print_entry(entry: Entry) -> None:
|
|
397
|
+
record: dict[str, object] = {
|
|
398
|
+
"id": entry.id,
|
|
399
|
+
"kind": entry.kind,
|
|
400
|
+
"content": entry.content,
|
|
401
|
+
}
|
|
402
|
+
if entry.keywords is not None:
|
|
403
|
+
record["keywords"] = entry.keywords
|
|
404
|
+
if entry.payload is not None:
|
|
405
|
+
record["payload"] = entry.payload
|
|
406
|
+
if entry.threshold is not None:
|
|
407
|
+
record["threshold"] = entry.threshold
|
|
408
|
+
if entry.distance is not None:
|
|
409
|
+
record["distance"] = entry.distance
|
|
410
|
+
if entry.rank is not None:
|
|
411
|
+
record["rank"] = entry.rank
|
|
412
|
+
typer.echo(json.dumps(record))
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def _parse_iso(flag: str, value: str | None) -> datetime | None:
|
|
416
|
+
if value is None:
|
|
417
|
+
return None
|
|
418
|
+
try:
|
|
419
|
+
return datetime.fromisoformat(value)
|
|
420
|
+
except ValueError:
|
|
421
|
+
_fail(f"{flag} must be ISO 8601 (e.g. 2026-05-04 or 2026-05-04T10:00:00)")
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def _fail(message: str) -> NoReturn:
|
|
425
|
+
typer.echo(f"Error: {message}", err=True)
|
|
426
|
+
raise typer.Exit(code=1)
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def main() -> None:
|
|
430
|
+
"""Console-script entrypoint for the `grimoire` CLI."""
|
|
431
|
+
app()
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
if __name__ == "__main__":
|
|
435
|
+
main()
|