4lt7ab-grimoire-cli 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: 4lt7ab-grimoire-cli
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: CLI for managing a grimoire datastore
|
|
5
|
+
Requires-Python: >=3.14
|
|
6
|
+
Requires-Dist: 4lt7ab-grimoire
|
|
7
|
+
Requires-Dist: typer>=0.12
|
|
8
|
+
Provides-Extra: fastembed
|
|
9
|
+
Requires-Dist: 4lt7ab-grimoire[fastembed]; extra == 'fastembed'
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
grimoire_cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
grimoire_cli/errors.py,sha256=mHzDEP8EYnbkVghYxAbJC1O0o1MAKoEMRJIYwQngVsI,85
|
|
3
|
+
grimoire_cli/main.py,sha256=JYRgoT3oLmGZzYls7x0I51DleEDpXZg5pdagle0upYs,10706
|
|
4
|
+
4lt7ab_grimoire_cli-0.0.1.dist-info/METADATA,sha256=GqHshUcDhWOibSmgAHK6eV1aFxP9yqVs51dFDKa_76I,282
|
|
5
|
+
4lt7ab_grimoire_cli-0.0.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
6
|
+
4lt7ab_grimoire_cli-0.0.1.dist-info/entry_points.txt,sha256=-zQ91T-ryoFt0OXJFHDWa5L2u0XPBugoEQry-ifOPIA,52
|
|
7
|
+
4lt7ab_grimoire_cli-0.0.1.dist-info/RECORD,,
|
grimoire_cli/__init__.py
ADDED
|
File without changes
|
grimoire_cli/errors.py
ADDED
grimoire_cli/main.py
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Annotated, NoReturn
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from grimoire import Entry, Grimoire, GrimoireError
|
|
7
|
+
|
|
8
|
+
RECOGNIZED_FIELDS = {"kind", "content", "payload", "threshold"}
|
|
9
|
+
REQUIRED_FIELDS = {"kind", "content"}
|
|
10
|
+
PROGRESS_EVERY = 1000
|
|
11
|
+
DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
|
|
12
|
+
|
|
13
|
+
app = typer.Typer(
|
|
14
|
+
name="grimoire",
|
|
15
|
+
no_args_is_help=True,
|
|
16
|
+
pretty_exceptions_enable=False,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@app.callback()
|
|
21
|
+
def _callback() -> None:
|
|
22
|
+
"""Manage a grimoire datastore."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@app.command()
|
|
26
|
+
def ingest(
|
|
27
|
+
file: Annotated[
|
|
28
|
+
Path,
|
|
29
|
+
typer.Argument(
|
|
30
|
+
help="Path to a JSONL file. One JSON object per line.",
|
|
31
|
+
exists=True,
|
|
32
|
+
file_okay=True,
|
|
33
|
+
dir_okay=False,
|
|
34
|
+
readable=True,
|
|
35
|
+
),
|
|
36
|
+
],
|
|
37
|
+
db: Annotated[
|
|
38
|
+
Path,
|
|
39
|
+
typer.Option(
|
|
40
|
+
help="Path to the grimoire SQLite file.",
|
|
41
|
+
envvar="GRIMOIRE_DB",
|
|
42
|
+
),
|
|
43
|
+
],
|
|
44
|
+
cache_folder: Annotated[
|
|
45
|
+
Path,
|
|
46
|
+
typer.Option(
|
|
47
|
+
"--cache-folder",
|
|
48
|
+
help="Directory for the embedder's model cache.",
|
|
49
|
+
envvar="GRIMOIRE_CACHE",
|
|
50
|
+
),
|
|
51
|
+
],
|
|
52
|
+
model: Annotated[
|
|
53
|
+
str,
|
|
54
|
+
typer.Option(
|
|
55
|
+
help=("fastembed model name (only used when creating a new file).")
|
|
56
|
+
),
|
|
57
|
+
] = DEFAULT_MODEL,
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Bulk-ingest records into a grimoire."""
|
|
60
|
+
records = _load_records(file)
|
|
61
|
+
if not records:
|
|
62
|
+
typer.echo(f"No records to ingest from {file}")
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
with _open_grimoire(db, cache_folder, model_override=model) as g:
|
|
66
|
+
for i, record in enumerate(records, 1):
|
|
67
|
+
g.add(
|
|
68
|
+
kind=record["kind"],
|
|
69
|
+
content=record["content"],
|
|
70
|
+
payload=record.get("payload"),
|
|
71
|
+
threshold=record.get("threshold"),
|
|
72
|
+
)
|
|
73
|
+
if i % PROGRESS_EVERY == 0:
|
|
74
|
+
typer.echo(f" ingested {i}...", err=True)
|
|
75
|
+
|
|
76
|
+
typer.echo(f"Ingested {len(records)} records into {db}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@app.command()
|
|
80
|
+
def search(
|
|
81
|
+
query: Annotated[str, typer.Argument(help="Query text to embed and search for.")],
|
|
82
|
+
db: Annotated[
|
|
83
|
+
Path,
|
|
84
|
+
typer.Option(
|
|
85
|
+
help="Path to the grimoire SQLite file.",
|
|
86
|
+
envvar="GRIMOIRE_DB",
|
|
87
|
+
exists=True,
|
|
88
|
+
),
|
|
89
|
+
],
|
|
90
|
+
cache_folder: Annotated[
|
|
91
|
+
Path,
|
|
92
|
+
typer.Option(
|
|
93
|
+
"--cache-folder",
|
|
94
|
+
help="Directory for the embedder's model cache.",
|
|
95
|
+
envvar="GRIMOIRE_CACHE",
|
|
96
|
+
),
|
|
97
|
+
],
|
|
98
|
+
kind: Annotated[
|
|
99
|
+
str | None, typer.Option(help="Restrict results to entries of this kind.")
|
|
100
|
+
] = None,
|
|
101
|
+
k: Annotated[int, typer.Option(help="Number of results to return.")] = 10,
|
|
102
|
+
dynamic_threshold: Annotated[
|
|
103
|
+
bool,
|
|
104
|
+
typer.Option(
|
|
105
|
+
"--dynamic-threshold",
|
|
106
|
+
help="Filter results by each entry's stored similarity threshold.",
|
|
107
|
+
),
|
|
108
|
+
] = False,
|
|
109
|
+
) -> None:
|
|
110
|
+
"""Run a semantic search against a grimoire."""
|
|
111
|
+
with _open_grimoire(db, cache_folder) as g:
|
|
112
|
+
for entry in g.search(
|
|
113
|
+
query, kind=kind, k=k, dynamic_threshold=dynamic_threshold
|
|
114
|
+
):
|
|
115
|
+
_print_entry(entry)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@app.command()
|
|
119
|
+
def add(
|
|
120
|
+
content: Annotated[str, typer.Argument(help="Content text for the new entry.")],
|
|
121
|
+
db: Annotated[
|
|
122
|
+
Path,
|
|
123
|
+
typer.Option(
|
|
124
|
+
help="Path to the grimoire SQLite file.",
|
|
125
|
+
envvar="GRIMOIRE_DB",
|
|
126
|
+
),
|
|
127
|
+
],
|
|
128
|
+
cache_folder: Annotated[
|
|
129
|
+
Path,
|
|
130
|
+
typer.Option(
|
|
131
|
+
"--cache-folder",
|
|
132
|
+
help="Directory for the embedder's model cache.",
|
|
133
|
+
envvar="GRIMOIRE_CACHE",
|
|
134
|
+
),
|
|
135
|
+
],
|
|
136
|
+
kind: Annotated[str, typer.Option(help="Kind label for the entry.")] = "note",
|
|
137
|
+
payload: Annotated[
|
|
138
|
+
str | None,
|
|
139
|
+
typer.Option(help="Optional JSON object to attach as the entry payload."),
|
|
140
|
+
] = None,
|
|
141
|
+
threshold: Annotated[
|
|
142
|
+
float | None,
|
|
143
|
+
typer.Option(help="Optional per-entry similarity threshold."),
|
|
144
|
+
] = None,
|
|
145
|
+
model: Annotated[
|
|
146
|
+
str,
|
|
147
|
+
typer.Option(help="fastembed model name (only used when creating a new file)."),
|
|
148
|
+
] = DEFAULT_MODEL,
|
|
149
|
+
) -> None:
|
|
150
|
+
"""Add a single record to a grimoire."""
|
|
151
|
+
payload_obj: dict | None = None
|
|
152
|
+
if payload is not None:
|
|
153
|
+
try:
|
|
154
|
+
parsed = json.loads(payload)
|
|
155
|
+
except json.JSONDecodeError as exc:
|
|
156
|
+
_fail(f"--payload is not valid JSON: {exc.msg}")
|
|
157
|
+
if not isinstance(parsed, dict):
|
|
158
|
+
_fail("--payload must be a JSON object")
|
|
159
|
+
payload_obj = parsed
|
|
160
|
+
with _open_grimoire(db, cache_folder, model_override=model) as g:
|
|
161
|
+
entry = g.add(
|
|
162
|
+
kind=kind,
|
|
163
|
+
content=content,
|
|
164
|
+
payload=payload_obj,
|
|
165
|
+
threshold=threshold,
|
|
166
|
+
)
|
|
167
|
+
_print_entry(entry)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@app.command()
|
|
171
|
+
def info(
|
|
172
|
+
db: Annotated[
|
|
173
|
+
Path,
|
|
174
|
+
typer.Option(
|
|
175
|
+
help="Path to the grimoire SQLite file.",
|
|
176
|
+
envvar="GRIMOIRE_DB",
|
|
177
|
+
),
|
|
178
|
+
],
|
|
179
|
+
) -> None:
|
|
180
|
+
"""Show metadata and counts for a grimoire file."""
|
|
181
|
+
stats = Grimoire.peek(db)
|
|
182
|
+
if stats is None:
|
|
183
|
+
_fail(f"No grimoire at {db}")
|
|
184
|
+
typer.echo(
|
|
185
|
+
json.dumps(
|
|
186
|
+
{
|
|
187
|
+
"path": str(db),
|
|
188
|
+
"model": stats.model,
|
|
189
|
+
"dimension": stats.dimension,
|
|
190
|
+
"schema_version": stats.schema_version,
|
|
191
|
+
"entry_count": stats.entry_count,
|
|
192
|
+
"kinds": stats.kinds,
|
|
193
|
+
}
|
|
194
|
+
)
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@app.command(name="list")
|
|
199
|
+
def list_entries(
|
|
200
|
+
db: Annotated[
|
|
201
|
+
Path,
|
|
202
|
+
typer.Option(
|
|
203
|
+
help="Path to the grimoire SQLite file.",
|
|
204
|
+
envvar="GRIMOIRE_DB",
|
|
205
|
+
exists=True,
|
|
206
|
+
),
|
|
207
|
+
],
|
|
208
|
+
cache_folder: Annotated[
|
|
209
|
+
Path,
|
|
210
|
+
typer.Option(
|
|
211
|
+
"--cache-folder",
|
|
212
|
+
help="Directory for the embedder's model cache.",
|
|
213
|
+
envvar="GRIMOIRE_CACHE",
|
|
214
|
+
),
|
|
215
|
+
],
|
|
216
|
+
kind: Annotated[
|
|
217
|
+
str | None, typer.Option(help="Restrict to entries of this kind.")
|
|
218
|
+
] = None,
|
|
219
|
+
limit: Annotated[
|
|
220
|
+
int, typer.Option(help="Maximum number of entries to return.")
|
|
221
|
+
] = 100,
|
|
222
|
+
after_id: Annotated[
|
|
223
|
+
str | None, typer.Option(help="Cursor: return entries with id > this value.")
|
|
224
|
+
] = None,
|
|
225
|
+
) -> None:
|
|
226
|
+
"""Paginate entries in chronological order (by id)."""
|
|
227
|
+
with _open_grimoire(db, cache_folder) as g:
|
|
228
|
+
for entry in g.list(kind=kind, limit=limit, after_id=after_id):
|
|
229
|
+
_print_entry(entry)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
@app.command()
|
|
233
|
+
def get(
|
|
234
|
+
entry_id: Annotated[str, typer.Argument(help="Entry id (ULID).")],
|
|
235
|
+
db: Annotated[
|
|
236
|
+
Path,
|
|
237
|
+
typer.Option(
|
|
238
|
+
help="Path to the grimoire SQLite file.",
|
|
239
|
+
envvar="GRIMOIRE_DB",
|
|
240
|
+
exists=True,
|
|
241
|
+
),
|
|
242
|
+
],
|
|
243
|
+
cache_folder: Annotated[
|
|
244
|
+
Path,
|
|
245
|
+
typer.Option(
|
|
246
|
+
"--cache-folder",
|
|
247
|
+
help="Directory for the embedder's model cache.",
|
|
248
|
+
envvar="GRIMOIRE_CACHE",
|
|
249
|
+
),
|
|
250
|
+
],
|
|
251
|
+
) -> None:
|
|
252
|
+
"""Fetch a single entry by id."""
|
|
253
|
+
with _open_grimoire(db, cache_folder) as g:
|
|
254
|
+
entry = g.get(entry_id)
|
|
255
|
+
if entry is None:
|
|
256
|
+
_fail(f"No entry with id {entry_id!r}")
|
|
257
|
+
_print_entry(entry)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@app.command()
|
|
261
|
+
def delete(
|
|
262
|
+
entry_id: Annotated[str, typer.Argument(help="Entry id (ULID).")],
|
|
263
|
+
db: Annotated[
|
|
264
|
+
Path,
|
|
265
|
+
typer.Option(
|
|
266
|
+
help="Path to the grimoire SQLite file.",
|
|
267
|
+
envvar="GRIMOIRE_DB",
|
|
268
|
+
exists=True,
|
|
269
|
+
),
|
|
270
|
+
],
|
|
271
|
+
cache_folder: Annotated[
|
|
272
|
+
Path,
|
|
273
|
+
typer.Option(
|
|
274
|
+
"--cache-folder",
|
|
275
|
+
help="Directory for the embedder's model cache.",
|
|
276
|
+
envvar="GRIMOIRE_CACHE",
|
|
277
|
+
),
|
|
278
|
+
],
|
|
279
|
+
) -> None:
|
|
280
|
+
"""Delete an entry by id."""
|
|
281
|
+
with _open_grimoire(db, cache_folder) as g:
|
|
282
|
+
if not g.delete(entry_id):
|
|
283
|
+
_fail(f"No entry with id {entry_id!r}")
|
|
284
|
+
typer.echo(f"Deleted {entry_id}")
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _open_grimoire(
|
|
288
|
+
db: Path, cache_folder: Path, *, model_override: str | None = None
|
|
289
|
+
) -> Grimoire:
|
|
290
|
+
"""Open a Grimoire, auto-detecting the embedding model from the file when possible.
|
|
291
|
+
|
|
292
|
+
Resolution order:
|
|
293
|
+
1. Model name stored in the file (if file exists and is initialized).
|
|
294
|
+
2. `model_override` argument (typically a CLI --model flag).
|
|
295
|
+
3. The library default model.
|
|
296
|
+
"""
|
|
297
|
+
db.parent.mkdir(parents=True, exist_ok=True)
|
|
298
|
+
cache_folder.mkdir(parents=True, exist_ok=True)
|
|
299
|
+
stats = Grimoire.peek(db)
|
|
300
|
+
model_name = stats.model if stats else (model_override or DEFAULT_MODEL)
|
|
301
|
+
try:
|
|
302
|
+
from grimoire.embedders import FastembedEmbedder
|
|
303
|
+
|
|
304
|
+
embedder = FastembedEmbedder(model_name, cache_folder=cache_folder)
|
|
305
|
+
except ImportError as exc:
|
|
306
|
+
_fail(str(exc))
|
|
307
|
+
try:
|
|
308
|
+
return Grimoire.open(db, embedder=embedder)
|
|
309
|
+
except GrimoireError as exc:
|
|
310
|
+
_fail(str(exc))
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _load_records(path: Path) -> list[dict]:
|
|
314
|
+
records: list[dict] = []
|
|
315
|
+
with path.open() as f:
|
|
316
|
+
for line_no, raw in enumerate(f, 1):
|
|
317
|
+
line = raw.strip()
|
|
318
|
+
if not line:
|
|
319
|
+
continue
|
|
320
|
+
try:
|
|
321
|
+
record = json.loads(line)
|
|
322
|
+
except json.JSONDecodeError as exc:
|
|
323
|
+
_fail(f"{path}:{line_no}: invalid JSON: {exc.msg}")
|
|
324
|
+
_validate_record(record, path, line_no)
|
|
325
|
+
records.append(record)
|
|
326
|
+
return records
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _validate_record(record: object, path: Path, line_no: int) -> None:
|
|
330
|
+
if not isinstance(record, dict):
|
|
331
|
+
_fail(f"{path}:{line_no}: record must be a JSON object")
|
|
332
|
+
missing = REQUIRED_FIELDS - record.keys()
|
|
333
|
+
if missing:
|
|
334
|
+
_fail(f"{path}:{line_no}: missing required fields: {sorted(missing)}")
|
|
335
|
+
unknown = record.keys() - RECOGNIZED_FIELDS
|
|
336
|
+
if unknown:
|
|
337
|
+
_fail(
|
|
338
|
+
f"{path}:{line_no}: unknown fields {sorted(unknown)}. "
|
|
339
|
+
f"Put extra metadata in `payload`."
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _print_entry(entry: Entry) -> None:
|
|
344
|
+
record: dict[str, object] = {
|
|
345
|
+
"id": entry.id,
|
|
346
|
+
"kind": entry.kind,
|
|
347
|
+
"content": entry.content,
|
|
348
|
+
}
|
|
349
|
+
if entry.payload is not None:
|
|
350
|
+
try:
|
|
351
|
+
record["payload"] = json.loads(entry.payload)
|
|
352
|
+
except json.JSONDecodeError:
|
|
353
|
+
record["payload"] = entry.payload
|
|
354
|
+
if entry.threshold is not None:
|
|
355
|
+
record["threshold"] = entry.threshold
|
|
356
|
+
if entry.distance is not None:
|
|
357
|
+
record["distance"] = entry.distance
|
|
358
|
+
typer.echo(json.dumps(record))
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _fail(message: str) -> NoReturn:
|
|
362
|
+
typer.echo(f"Error: {message}", err=True)
|
|
363
|
+
raise typer.Exit(code=1)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def main() -> None:
|
|
367
|
+
"""Console-script entrypoint for the `grimoire` CLI."""
|
|
368
|
+
app()
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
if __name__ == "__main__":
|
|
372
|
+
main()
|