4lt7ab-grimoire-cli 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- 4lt7ab_grimoire_cli-0.0.1/.gitignore +15 -0
- 4lt7ab_grimoire_cli-0.0.1/PKG-INFO +9 -0
- 4lt7ab_grimoire_cli-0.0.1/pyproject.toml +22 -0
- 4lt7ab_grimoire_cli-0.0.1/src/grimoire_cli/__init__.py +0 -0
- 4lt7ab_grimoire_cli-0.0.1/src/grimoire_cli/errors.py +2 -0
- 4lt7ab_grimoire_cli-0.0.1/src/grimoire_cli/main.py +372 -0
- 4lt7ab_grimoire_cli-0.0.1/tests/test_smoke.py +376 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: 4lt7ab-grimoire-cli
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: CLI for managing a grimoire datastore
|
|
5
|
+
Requires-Python: >=3.14
|
|
6
|
+
Requires-Dist: 4lt7ab-grimoire
|
|
7
|
+
Requires-Dist: typer>=0.12
|
|
8
|
+
Provides-Extra: fastembed
|
|
9
|
+
Requires-Dist: 4lt7ab-grimoire[fastembed]; extra == 'fastembed'
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "4lt7ab-grimoire-cli"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "CLI for managing a grimoire datastore"
|
|
5
|
+
requires-python = ">=3.14"
|
|
6
|
+
dependencies = ["4lt7ab-grimoire", "typer>=0.12"]
|
|
7
|
+
|
|
8
|
+
[project.optional-dependencies]
|
|
9
|
+
fastembed = ["4lt7ab-grimoire[fastembed]"]
|
|
10
|
+
|
|
11
|
+
[project.scripts]
|
|
12
|
+
grimoire = "grimoire_cli.main:main"
|
|
13
|
+
|
|
14
|
+
[tool.uv.sources]
|
|
15
|
+
4lt7ab-grimoire = { workspace = true }
|
|
16
|
+
|
|
17
|
+
[build-system]
|
|
18
|
+
requires = ["hatchling"]
|
|
19
|
+
build-backend = "hatchling.build"
|
|
20
|
+
|
|
21
|
+
[tool.hatch.build.targets.wheel]
|
|
22
|
+
packages = ["src/grimoire_cli"]
|
|
File without changes
|
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Annotated, NoReturn
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from grimoire import Entry, Grimoire, GrimoireError
|
|
7
|
+
|
|
8
|
+
RECOGNIZED_FIELDS = {"kind", "content", "payload", "threshold"}
|
|
9
|
+
REQUIRED_FIELDS = {"kind", "content"}
|
|
10
|
+
PROGRESS_EVERY = 1000
|
|
11
|
+
DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
|
|
12
|
+
|
|
13
|
+
app = typer.Typer(
|
|
14
|
+
name="grimoire",
|
|
15
|
+
no_args_is_help=True,
|
|
16
|
+
pretty_exceptions_enable=False,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@app.callback()
|
|
21
|
+
def _callback() -> None:
|
|
22
|
+
"""Manage a grimoire datastore."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@app.command()
|
|
26
|
+
def ingest(
|
|
27
|
+
file: Annotated[
|
|
28
|
+
Path,
|
|
29
|
+
typer.Argument(
|
|
30
|
+
help="Path to a JSONL file. One JSON object per line.",
|
|
31
|
+
exists=True,
|
|
32
|
+
file_okay=True,
|
|
33
|
+
dir_okay=False,
|
|
34
|
+
readable=True,
|
|
35
|
+
),
|
|
36
|
+
],
|
|
37
|
+
db: Annotated[
|
|
38
|
+
Path,
|
|
39
|
+
typer.Option(
|
|
40
|
+
help="Path to the grimoire SQLite file.",
|
|
41
|
+
envvar="GRIMOIRE_DB",
|
|
42
|
+
),
|
|
43
|
+
],
|
|
44
|
+
cache_folder: Annotated[
|
|
45
|
+
Path,
|
|
46
|
+
typer.Option(
|
|
47
|
+
"--cache-folder",
|
|
48
|
+
help="Directory for the embedder's model cache.",
|
|
49
|
+
envvar="GRIMOIRE_CACHE",
|
|
50
|
+
),
|
|
51
|
+
],
|
|
52
|
+
model: Annotated[
|
|
53
|
+
str,
|
|
54
|
+
typer.Option(
|
|
55
|
+
help=("fastembed model name (only used when creating a new file).")
|
|
56
|
+
),
|
|
57
|
+
] = DEFAULT_MODEL,
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Bulk-ingest records into a grimoire."""
|
|
60
|
+
records = _load_records(file)
|
|
61
|
+
if not records:
|
|
62
|
+
typer.echo(f"No records to ingest from {file}")
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
with _open_grimoire(db, cache_folder, model_override=model) as g:
|
|
66
|
+
for i, record in enumerate(records, 1):
|
|
67
|
+
g.add(
|
|
68
|
+
kind=record["kind"],
|
|
69
|
+
content=record["content"],
|
|
70
|
+
payload=record.get("payload"),
|
|
71
|
+
threshold=record.get("threshold"),
|
|
72
|
+
)
|
|
73
|
+
if i % PROGRESS_EVERY == 0:
|
|
74
|
+
typer.echo(f" ingested {i}...", err=True)
|
|
75
|
+
|
|
76
|
+
typer.echo(f"Ingested {len(records)} records into {db}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@app.command()
|
|
80
|
+
def search(
|
|
81
|
+
query: Annotated[str, typer.Argument(help="Query text to embed and search for.")],
|
|
82
|
+
db: Annotated[
|
|
83
|
+
Path,
|
|
84
|
+
typer.Option(
|
|
85
|
+
help="Path to the grimoire SQLite file.",
|
|
86
|
+
envvar="GRIMOIRE_DB",
|
|
87
|
+
exists=True,
|
|
88
|
+
),
|
|
89
|
+
],
|
|
90
|
+
cache_folder: Annotated[
|
|
91
|
+
Path,
|
|
92
|
+
typer.Option(
|
|
93
|
+
"--cache-folder",
|
|
94
|
+
help="Directory for the embedder's model cache.",
|
|
95
|
+
envvar="GRIMOIRE_CACHE",
|
|
96
|
+
),
|
|
97
|
+
],
|
|
98
|
+
kind: Annotated[
|
|
99
|
+
str | None, typer.Option(help="Restrict results to entries of this kind.")
|
|
100
|
+
] = None,
|
|
101
|
+
k: Annotated[int, typer.Option(help="Number of results to return.")] = 10,
|
|
102
|
+
dynamic_threshold: Annotated[
|
|
103
|
+
bool,
|
|
104
|
+
typer.Option(
|
|
105
|
+
"--dynamic-threshold",
|
|
106
|
+
help="Filter results by each entry's stored similarity threshold.",
|
|
107
|
+
),
|
|
108
|
+
] = False,
|
|
109
|
+
) -> None:
|
|
110
|
+
"""Run a semantic search against a grimoire."""
|
|
111
|
+
with _open_grimoire(db, cache_folder) as g:
|
|
112
|
+
for entry in g.search(
|
|
113
|
+
query, kind=kind, k=k, dynamic_threshold=dynamic_threshold
|
|
114
|
+
):
|
|
115
|
+
_print_entry(entry)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@app.command()
|
|
119
|
+
def add(
|
|
120
|
+
content: Annotated[str, typer.Argument(help="Content text for the new entry.")],
|
|
121
|
+
db: Annotated[
|
|
122
|
+
Path,
|
|
123
|
+
typer.Option(
|
|
124
|
+
help="Path to the grimoire SQLite file.",
|
|
125
|
+
envvar="GRIMOIRE_DB",
|
|
126
|
+
),
|
|
127
|
+
],
|
|
128
|
+
cache_folder: Annotated[
|
|
129
|
+
Path,
|
|
130
|
+
typer.Option(
|
|
131
|
+
"--cache-folder",
|
|
132
|
+
help="Directory for the embedder's model cache.",
|
|
133
|
+
envvar="GRIMOIRE_CACHE",
|
|
134
|
+
),
|
|
135
|
+
],
|
|
136
|
+
kind: Annotated[str, typer.Option(help="Kind label for the entry.")] = "note",
|
|
137
|
+
payload: Annotated[
|
|
138
|
+
str | None,
|
|
139
|
+
typer.Option(help="Optional JSON object to attach as the entry payload."),
|
|
140
|
+
] = None,
|
|
141
|
+
threshold: Annotated[
|
|
142
|
+
float | None,
|
|
143
|
+
typer.Option(help="Optional per-entry similarity threshold."),
|
|
144
|
+
] = None,
|
|
145
|
+
model: Annotated[
|
|
146
|
+
str,
|
|
147
|
+
typer.Option(help="fastembed model name (only used when creating a new file)."),
|
|
148
|
+
] = DEFAULT_MODEL,
|
|
149
|
+
) -> None:
|
|
150
|
+
"""Add a single record to a grimoire."""
|
|
151
|
+
payload_obj: dict | None = None
|
|
152
|
+
if payload is not None:
|
|
153
|
+
try:
|
|
154
|
+
parsed = json.loads(payload)
|
|
155
|
+
except json.JSONDecodeError as exc:
|
|
156
|
+
_fail(f"--payload is not valid JSON: {exc.msg}")
|
|
157
|
+
if not isinstance(parsed, dict):
|
|
158
|
+
_fail("--payload must be a JSON object")
|
|
159
|
+
payload_obj = parsed
|
|
160
|
+
with _open_grimoire(db, cache_folder, model_override=model) as g:
|
|
161
|
+
entry = g.add(
|
|
162
|
+
kind=kind,
|
|
163
|
+
content=content,
|
|
164
|
+
payload=payload_obj,
|
|
165
|
+
threshold=threshold,
|
|
166
|
+
)
|
|
167
|
+
_print_entry(entry)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@app.command()
|
|
171
|
+
def info(
|
|
172
|
+
db: Annotated[
|
|
173
|
+
Path,
|
|
174
|
+
typer.Option(
|
|
175
|
+
help="Path to the grimoire SQLite file.",
|
|
176
|
+
envvar="GRIMOIRE_DB",
|
|
177
|
+
),
|
|
178
|
+
],
|
|
179
|
+
) -> None:
|
|
180
|
+
"""Show metadata and counts for a grimoire file."""
|
|
181
|
+
stats = Grimoire.peek(db)
|
|
182
|
+
if stats is None:
|
|
183
|
+
_fail(f"No grimoire at {db}")
|
|
184
|
+
typer.echo(
|
|
185
|
+
json.dumps(
|
|
186
|
+
{
|
|
187
|
+
"path": str(db),
|
|
188
|
+
"model": stats.model,
|
|
189
|
+
"dimension": stats.dimension,
|
|
190
|
+
"schema_version": stats.schema_version,
|
|
191
|
+
"entry_count": stats.entry_count,
|
|
192
|
+
"kinds": stats.kinds,
|
|
193
|
+
}
|
|
194
|
+
)
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@app.command(name="list")
|
|
199
|
+
def list_entries(
|
|
200
|
+
db: Annotated[
|
|
201
|
+
Path,
|
|
202
|
+
typer.Option(
|
|
203
|
+
help="Path to the grimoire SQLite file.",
|
|
204
|
+
envvar="GRIMOIRE_DB",
|
|
205
|
+
exists=True,
|
|
206
|
+
),
|
|
207
|
+
],
|
|
208
|
+
cache_folder: Annotated[
|
|
209
|
+
Path,
|
|
210
|
+
typer.Option(
|
|
211
|
+
"--cache-folder",
|
|
212
|
+
help="Directory for the embedder's model cache.",
|
|
213
|
+
envvar="GRIMOIRE_CACHE",
|
|
214
|
+
),
|
|
215
|
+
],
|
|
216
|
+
kind: Annotated[
|
|
217
|
+
str | None, typer.Option(help="Restrict to entries of this kind.")
|
|
218
|
+
] = None,
|
|
219
|
+
limit: Annotated[
|
|
220
|
+
int, typer.Option(help="Maximum number of entries to return.")
|
|
221
|
+
] = 100,
|
|
222
|
+
after_id: Annotated[
|
|
223
|
+
str | None, typer.Option(help="Cursor: return entries with id > this value.")
|
|
224
|
+
] = None,
|
|
225
|
+
) -> None:
|
|
226
|
+
"""Paginate entries in chronological order (by id)."""
|
|
227
|
+
with _open_grimoire(db, cache_folder) as g:
|
|
228
|
+
for entry in g.list(kind=kind, limit=limit, after_id=after_id):
|
|
229
|
+
_print_entry(entry)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
@app.command()
|
|
233
|
+
def get(
|
|
234
|
+
entry_id: Annotated[str, typer.Argument(help="Entry id (ULID).")],
|
|
235
|
+
db: Annotated[
|
|
236
|
+
Path,
|
|
237
|
+
typer.Option(
|
|
238
|
+
help="Path to the grimoire SQLite file.",
|
|
239
|
+
envvar="GRIMOIRE_DB",
|
|
240
|
+
exists=True,
|
|
241
|
+
),
|
|
242
|
+
],
|
|
243
|
+
cache_folder: Annotated[
|
|
244
|
+
Path,
|
|
245
|
+
typer.Option(
|
|
246
|
+
"--cache-folder",
|
|
247
|
+
help="Directory for the embedder's model cache.",
|
|
248
|
+
envvar="GRIMOIRE_CACHE",
|
|
249
|
+
),
|
|
250
|
+
],
|
|
251
|
+
) -> None:
|
|
252
|
+
"""Fetch a single entry by id."""
|
|
253
|
+
with _open_grimoire(db, cache_folder) as g:
|
|
254
|
+
entry = g.get(entry_id)
|
|
255
|
+
if entry is None:
|
|
256
|
+
_fail(f"No entry with id {entry_id!r}")
|
|
257
|
+
_print_entry(entry)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@app.command()
|
|
261
|
+
def delete(
|
|
262
|
+
entry_id: Annotated[str, typer.Argument(help="Entry id (ULID).")],
|
|
263
|
+
db: Annotated[
|
|
264
|
+
Path,
|
|
265
|
+
typer.Option(
|
|
266
|
+
help="Path to the grimoire SQLite file.",
|
|
267
|
+
envvar="GRIMOIRE_DB",
|
|
268
|
+
exists=True,
|
|
269
|
+
),
|
|
270
|
+
],
|
|
271
|
+
cache_folder: Annotated[
|
|
272
|
+
Path,
|
|
273
|
+
typer.Option(
|
|
274
|
+
"--cache-folder",
|
|
275
|
+
help="Directory for the embedder's model cache.",
|
|
276
|
+
envvar="GRIMOIRE_CACHE",
|
|
277
|
+
),
|
|
278
|
+
],
|
|
279
|
+
) -> None:
|
|
280
|
+
"""Delete an entry by id."""
|
|
281
|
+
with _open_grimoire(db, cache_folder) as g:
|
|
282
|
+
if not g.delete(entry_id):
|
|
283
|
+
_fail(f"No entry with id {entry_id!r}")
|
|
284
|
+
typer.echo(f"Deleted {entry_id}")
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _open_grimoire(
|
|
288
|
+
db: Path, cache_folder: Path, *, model_override: str | None = None
|
|
289
|
+
) -> Grimoire:
|
|
290
|
+
"""Open a Grimoire, auto-detecting the embedding model from the file when possible.
|
|
291
|
+
|
|
292
|
+
Resolution order:
|
|
293
|
+
1. Model name stored in the file (if file exists and is initialized).
|
|
294
|
+
2. `model_override` argument (typically a CLI --model flag).
|
|
295
|
+
3. The library default model.
|
|
296
|
+
"""
|
|
297
|
+
db.parent.mkdir(parents=True, exist_ok=True)
|
|
298
|
+
cache_folder.mkdir(parents=True, exist_ok=True)
|
|
299
|
+
stats = Grimoire.peek(db)
|
|
300
|
+
model_name = stats.model if stats else (model_override or DEFAULT_MODEL)
|
|
301
|
+
try:
|
|
302
|
+
from grimoire.embedders import FastembedEmbedder
|
|
303
|
+
|
|
304
|
+
embedder = FastembedEmbedder(model_name, cache_folder=cache_folder)
|
|
305
|
+
except ImportError as exc:
|
|
306
|
+
_fail(str(exc))
|
|
307
|
+
try:
|
|
308
|
+
return Grimoire.open(db, embedder=embedder)
|
|
309
|
+
except GrimoireError as exc:
|
|
310
|
+
_fail(str(exc))
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _load_records(path: Path) -> list[dict]:
|
|
314
|
+
records: list[dict] = []
|
|
315
|
+
with path.open() as f:
|
|
316
|
+
for line_no, raw in enumerate(f, 1):
|
|
317
|
+
line = raw.strip()
|
|
318
|
+
if not line:
|
|
319
|
+
continue
|
|
320
|
+
try:
|
|
321
|
+
record = json.loads(line)
|
|
322
|
+
except json.JSONDecodeError as exc:
|
|
323
|
+
_fail(f"{path}:{line_no}: invalid JSON: {exc.msg}")
|
|
324
|
+
_validate_record(record, path, line_no)
|
|
325
|
+
records.append(record)
|
|
326
|
+
return records
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _validate_record(record: object, path: Path, line_no: int) -> None:
|
|
330
|
+
if not isinstance(record, dict):
|
|
331
|
+
_fail(f"{path}:{line_no}: record must be a JSON object")
|
|
332
|
+
missing = REQUIRED_FIELDS - record.keys()
|
|
333
|
+
if missing:
|
|
334
|
+
_fail(f"{path}:{line_no}: missing required fields: {sorted(missing)}")
|
|
335
|
+
unknown = record.keys() - RECOGNIZED_FIELDS
|
|
336
|
+
if unknown:
|
|
337
|
+
_fail(
|
|
338
|
+
f"{path}:{line_no}: unknown fields {sorted(unknown)}. "
|
|
339
|
+
f"Put extra metadata in `payload`."
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _print_entry(entry: Entry) -> None:
|
|
344
|
+
record: dict[str, object] = {
|
|
345
|
+
"id": entry.id,
|
|
346
|
+
"kind": entry.kind,
|
|
347
|
+
"content": entry.content,
|
|
348
|
+
}
|
|
349
|
+
if entry.payload is not None:
|
|
350
|
+
try:
|
|
351
|
+
record["payload"] = json.loads(entry.payload)
|
|
352
|
+
except json.JSONDecodeError:
|
|
353
|
+
record["payload"] = entry.payload
|
|
354
|
+
if entry.threshold is not None:
|
|
355
|
+
record["threshold"] = entry.threshold
|
|
356
|
+
if entry.distance is not None:
|
|
357
|
+
record["distance"] = entry.distance
|
|
358
|
+
typer.echo(json.dumps(record))
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _fail(message: str) -> NoReturn:
|
|
362
|
+
typer.echo(f"Error: {message}", err=True)
|
|
363
|
+
raise typer.Exit(code=1)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def main() -> None:
|
|
367
|
+
"""Console-script entrypoint for the `grimoire` CLI."""
|
|
368
|
+
app()
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
if __name__ == "__main__":
|
|
372
|
+
main()
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from grimoire_cli.main import app
|
|
5
|
+
from typer.testing import CliRunner
|
|
6
|
+
|
|
7
|
+
runner = CliRunner()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture(scope="session")
|
|
11
|
+
def _grimoire_cache_dir(tmp_path_factory):
|
|
12
|
+
"""A session-shared cache so the embedder model downloads once across tests."""
|
|
13
|
+
return tmp_path_factory.mktemp("grimoire-cache")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.fixture(autouse=True)
|
|
17
|
+
def _set_grimoire_cache(monkeypatch, _grimoire_cache_dir):
|
|
18
|
+
monkeypatch.setenv("GRIMOIRE_CACHE", str(_grimoire_cache_dir))
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ---------- help / no-args ----------
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_help_lists_all_commands():
|
|
25
|
+
result = runner.invoke(app, ["--help"])
|
|
26
|
+
assert result.exit_code == 0
|
|
27
|
+
for cmd in ("ingest", "search", "list", "get", "delete", "add", "info"):
|
|
28
|
+
assert cmd in result.output
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_no_args_shows_help():
|
|
32
|
+
# Click convention: missing subcommand exits 2, but help is still shown.
|
|
33
|
+
result = runner.invoke(app, [])
|
|
34
|
+
assert "ingest" in result.output
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------- ingest ----------
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_ingest_help_describes_options():
|
|
41
|
+
result = runner.invoke(app, ["ingest", "--help"])
|
|
42
|
+
assert result.exit_code == 0
|
|
43
|
+
assert "--db" in result.output
|
|
44
|
+
assert "--model" in result.output
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_ingest_missing_file_fails(tmp_path):
|
|
48
|
+
db = tmp_path / "store.db"
|
|
49
|
+
result = runner.invoke(
|
|
50
|
+
app, ["ingest", str(tmp_path / "nope.jsonl"), "--db", str(db)]
|
|
51
|
+
)
|
|
52
|
+
assert result.exit_code != 0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_ingest_rejects_invalid_json(tmp_path):
|
|
56
|
+
db = tmp_path / "store.db"
|
|
57
|
+
data = tmp_path / "bad.jsonl"
|
|
58
|
+
data.write_text("not valid json\n")
|
|
59
|
+
result = runner.invoke(app, ["ingest", str(data), "--db", str(db)])
|
|
60
|
+
assert result.exit_code == 1
|
|
61
|
+
assert "invalid JSON" in result.output
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_ingest_rejects_missing_required_field(tmp_path):
|
|
65
|
+
db = tmp_path / "store.db"
|
|
66
|
+
data = tmp_path / "missing.jsonl"
|
|
67
|
+
data.write_text(json.dumps({"content": "no kind"}) + "\n")
|
|
68
|
+
result = runner.invoke(app, ["ingest", str(data), "--db", str(db)])
|
|
69
|
+
assert result.exit_code == 1
|
|
70
|
+
assert "missing required fields" in result.output
|
|
71
|
+
assert "'kind'" in result.output
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_ingest_rejects_unknown_field(tmp_path):
|
|
75
|
+
db = tmp_path / "store.db"
|
|
76
|
+
data = tmp_path / "extra.jsonl"
|
|
77
|
+
data.write_text(
|
|
78
|
+
json.dumps({"kind": "note", "content": "x", "extra": "stuff"}) + "\n"
|
|
79
|
+
)
|
|
80
|
+
result = runner.invoke(app, ["ingest", str(data), "--db", str(db)])
|
|
81
|
+
assert result.exit_code == 1
|
|
82
|
+
assert "unknown fields" in result.output
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_ingest_empty_file_succeeds(tmp_path):
|
|
86
|
+
db = tmp_path / "store.db"
|
|
87
|
+
data = tmp_path / "empty.jsonl"
|
|
88
|
+
data.write_text("")
|
|
89
|
+
result = runner.invoke(app, ["ingest", str(data), "--db", str(db)])
|
|
90
|
+
assert result.exit_code == 0
|
|
91
|
+
assert "No records" in result.output
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ---------- read-side commands: missing-file rejection (no ST needed) ----------
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@pytest.mark.parametrize(
|
|
98
|
+
"cmd_args",
|
|
99
|
+
[
|
|
100
|
+
["search", "query"],
|
|
101
|
+
["list"],
|
|
102
|
+
["get", "01HXXXXXXXXXXXXXXXXXXXXXXX"],
|
|
103
|
+
["delete", "01HXXXXXXXXXXXXXXXXXXXXXXX"],
|
|
104
|
+
["info"],
|
|
105
|
+
],
|
|
106
|
+
)
|
|
107
|
+
def test_command_rejects_missing_db(tmp_path, cmd_args):
|
|
108
|
+
result = runner.invoke(app, [*cmd_args, "--db", str(tmp_path / "nope.db")])
|
|
109
|
+
assert result.exit_code != 0
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@pytest.mark.parametrize("cmd", ["search", "list", "get", "delete", "add", "info"])
|
|
113
|
+
def test_command_help_describes_db_option(cmd):
|
|
114
|
+
result = runner.invoke(app, [cmd, "--help"])
|
|
115
|
+
assert result.exit_code == 0
|
|
116
|
+
assert "--db" in result.output
|
|
117
|
+
assert "GRIMOIRE_DB" in result.output
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@pytest.mark.parametrize("cmd", ["search", "list", "get", "delete", "add", "ingest"])
|
|
121
|
+
def test_command_help_describes_cache_folder(cmd):
|
|
122
|
+
result = runner.invoke(app, [cmd, "--help"])
|
|
123
|
+
assert result.exit_code == 0
|
|
124
|
+
assert "--cache-folder" in result.output
|
|
125
|
+
assert "GRIMOIRE_CACHE" in result.output
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@pytest.mark.parametrize(
|
|
129
|
+
"cmd_args",
|
|
130
|
+
[
|
|
131
|
+
["info"],
|
|
132
|
+
["list"],
|
|
133
|
+
["search", "query"],
|
|
134
|
+
["get", "01HXXXXXXXXXXXXXXXXXXXXXXX"],
|
|
135
|
+
["delete", "01HXXXXXXXXXXXXXXXXXXXXXXX"],
|
|
136
|
+
],
|
|
137
|
+
)
|
|
138
|
+
def test_command_requires_db_when_envvar_unset(monkeypatch, cmd_args):
|
|
139
|
+
monkeypatch.delenv("GRIMOIRE_DB", raising=False)
|
|
140
|
+
result = runner.invoke(app, cmd_args)
|
|
141
|
+
assert result.exit_code != 0
|
|
142
|
+
assert "GRIMOIRE_DB" in result.output
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def test_command_requires_cache_folder_when_envvar_unset(monkeypatch, tmp_path):
|
|
146
|
+
monkeypatch.delenv("GRIMOIRE_CACHE", raising=False)
|
|
147
|
+
monkeypatch.setenv("GRIMOIRE_DB", str(tmp_path / "store.db"))
|
|
148
|
+
result = runner.invoke(app, ["add", "hello"])
|
|
149
|
+
assert result.exit_code != 0
|
|
150
|
+
assert "GRIMOIRE_CACHE" in result.output
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_envvar_supplies_db_path(monkeypatch, tmp_path):
|
|
154
|
+
monkeypatch.setenv("GRIMOIRE_DB", str(tmp_path / "missing.db"))
|
|
155
|
+
result = runner.invoke(app, ["info"])
|
|
156
|
+
assert result.exit_code == 1
|
|
157
|
+
assert "No grimoire at" in result.output
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# ---------- end-to-end (gated on fastembed) ----------
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@pytest.fixture
|
|
164
|
+
def populated_db(tmp_path):
|
|
165
|
+
pytest.importorskip("fastembed")
|
|
166
|
+
|
|
167
|
+
db = tmp_path / "store.db"
|
|
168
|
+
data = tmp_path / "records.jsonl"
|
|
169
|
+
data.write_text(
|
|
170
|
+
json.dumps({"kind": "note", "content": "the moon is full"})
|
|
171
|
+
+ "\n"
|
|
172
|
+
+ json.dumps({"kind": "note", "content": "dragons fly at midnight"})
|
|
173
|
+
+ "\n"
|
|
174
|
+
)
|
|
175
|
+
result = runner.invoke(app, ["ingest", str(data), "--db", str(db)])
|
|
176
|
+
assert result.exit_code == 0
|
|
177
|
+
return db
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def test_list_outputs_jsonl(populated_db):
|
|
181
|
+
result = runner.invoke(app, ["list", "--db", str(populated_db)])
|
|
182
|
+
assert result.exit_code == 0
|
|
183
|
+
lines = [line for line in result.output.splitlines() if line.strip()]
|
|
184
|
+
assert len(lines) == 2
|
|
185
|
+
parsed = [json.loads(line) for line in lines]
|
|
186
|
+
assert {p["content"] for p in parsed} == {
|
|
187
|
+
"the moon is full",
|
|
188
|
+
"dragons fly at midnight",
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def test_search_returns_relevant_entry_first(populated_db):
|
|
193
|
+
result = runner.invoke(
|
|
194
|
+
app, ["search", "the moon is full", "--db", str(populated_db), "--k", "2"]
|
|
195
|
+
)
|
|
196
|
+
assert result.exit_code == 0
|
|
197
|
+
lines = [line for line in result.output.splitlines() if line.strip()]
|
|
198
|
+
parsed = [json.loads(line) for line in lines]
|
|
199
|
+
assert parsed[0]["content"] == "the moon is full"
|
|
200
|
+
assert "distance" in parsed[0]
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def test_get_fetches_by_id(populated_db):
|
|
204
|
+
list_result = runner.invoke(
|
|
205
|
+
app, ["list", "--db", str(populated_db), "--limit", "1"]
|
|
206
|
+
)
|
|
207
|
+
first = json.loads(list_result.output.strip())
|
|
208
|
+
|
|
209
|
+
get_result = runner.invoke(app, ["get", first["id"], "--db", str(populated_db)])
|
|
210
|
+
assert get_result.exit_code == 0
|
|
211
|
+
assert json.loads(get_result.output.strip())["id"] == first["id"]
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def test_get_missing_id_fails(populated_db):
|
|
215
|
+
result = runner.invoke(
|
|
216
|
+
app, ["get", "01HXXXXXXXXXXXXXXXXXXXXXXX", "--db", str(populated_db)]
|
|
217
|
+
)
|
|
218
|
+
assert result.exit_code == 1
|
|
219
|
+
assert "No entry" in result.output
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def test_delete_removes_entry(populated_db):
|
|
223
|
+
list_result = runner.invoke(
|
|
224
|
+
app, ["list", "--db", str(populated_db), "--limit", "1"]
|
|
225
|
+
)
|
|
226
|
+
first = json.loads(list_result.output.strip())
|
|
227
|
+
|
|
228
|
+
del_result = runner.invoke(app, ["delete", first["id"], "--db", str(populated_db)])
|
|
229
|
+
assert del_result.exit_code == 0
|
|
230
|
+
assert f"Deleted {first['id']}" in del_result.output
|
|
231
|
+
|
|
232
|
+
after = runner.invoke(app, ["list", "--db", str(populated_db)])
|
|
233
|
+
remaining = [json.loads(line) for line in after.output.splitlines() if line.strip()]
|
|
234
|
+
assert all(r["id"] != first["id"] for r in remaining)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def test_delete_missing_id_fails(populated_db):
|
|
238
|
+
result = runner.invoke(
|
|
239
|
+
app, ["delete", "01HXXXXXXXXXXXXXXXXXXXXXXX", "--db", str(populated_db)]
|
|
240
|
+
)
|
|
241
|
+
assert result.exit_code == 1
|
|
242
|
+
assert "No entry" in result.output
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def test_list_filters_by_kind(populated_db):
|
|
246
|
+
pytest.importorskip("fastembed")
|
|
247
|
+
|
|
248
|
+
# Add a record of a different kind.
|
|
249
|
+
data = populated_db.parent / "second.jsonl"
|
|
250
|
+
data.write_text(json.dumps({"kind": "spell", "content": "lumos"}) + "\n")
|
|
251
|
+
runner.invoke(app, ["ingest", str(data), "--db", str(populated_db)])
|
|
252
|
+
|
|
253
|
+
result = runner.invoke(app, ["list", "--db", str(populated_db), "--kind", "spell"])
|
|
254
|
+
assert result.exit_code == 0
|
|
255
|
+
lines = [line for line in result.output.splitlines() if line.strip()]
|
|
256
|
+
assert len(lines) == 1
|
|
257
|
+
assert json.loads(lines[0])["kind"] == "spell"
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# ---------- info / add / search --dynamic-threshold ----------
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def test_info_reports_metadata_and_counts(populated_db):
|
|
264
|
+
result = runner.invoke(app, ["info", "--db", str(populated_db)])
|
|
265
|
+
assert result.exit_code == 0
|
|
266
|
+
parsed = json.loads(result.output.strip())
|
|
267
|
+
assert parsed["path"] == str(populated_db)
|
|
268
|
+
assert parsed["model"]
|
|
269
|
+
assert parsed["dimension"] > 0
|
|
270
|
+
assert parsed["schema_version"] == 1
|
|
271
|
+
assert parsed["entry_count"] == 2
|
|
272
|
+
assert parsed["kinds"] == {"note": 2}
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def test_info_rejects_non_grimoire_file(tmp_path):
|
|
276
|
+
import sqlite3
|
|
277
|
+
|
|
278
|
+
db = tmp_path / "stranger.db"
|
|
279
|
+
conn = sqlite3.connect(db)
|
|
280
|
+
conn.execute("CREATE TABLE other (x INTEGER)")
|
|
281
|
+
conn.commit()
|
|
282
|
+
conn.close()
|
|
283
|
+
result = runner.invoke(app, ["info", "--db", str(db)])
|
|
284
|
+
assert result.exit_code == 1
|
|
285
|
+
assert "No grimoire at" in result.output
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def test_info_reports_missing_path_with_friendly_error(tmp_path):
|
|
289
|
+
result = runner.invoke(app, ["info", "--db", str(tmp_path / "nope.db")])
|
|
290
|
+
assert result.exit_code == 1
|
|
291
|
+
assert "No grimoire at" in result.output
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def test_add_inserts_a_single_record(tmp_path):
|
|
295
|
+
pytest.importorskip("fastembed")
|
|
296
|
+
|
|
297
|
+
db = tmp_path / "store.db"
|
|
298
|
+
add_result = runner.invoke(
|
|
299
|
+
app, ["add", "the moon is full", "--kind", "note", "--db", str(db)]
|
|
300
|
+
)
|
|
301
|
+
assert add_result.exit_code == 0
|
|
302
|
+
parsed = json.loads(add_result.output.strip())
|
|
303
|
+
assert parsed["content"] == "the moon is full"
|
|
304
|
+
assert parsed["kind"] == "note"
|
|
305
|
+
assert "id" in parsed
|
|
306
|
+
|
|
307
|
+
list_result = runner.invoke(app, ["list", "--db", str(db)])
|
|
308
|
+
assert list_result.exit_code == 0
|
|
309
|
+
rows = [
|
|
310
|
+
json.loads(line) for line in list_result.output.splitlines() if line.strip()
|
|
311
|
+
]
|
|
312
|
+
assert len(rows) == 1
|
|
313
|
+
assert rows[0]["id"] == parsed["id"]
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def test_add_rejects_non_object_payload(tmp_path):
|
|
317
|
+
pytest.importorskip("fastembed")
|
|
318
|
+
|
|
319
|
+
db = tmp_path / "store.db"
|
|
320
|
+
result = runner.invoke(
|
|
321
|
+
app, ["add", "hello", "--db", str(db), "--payload", '"just a string"']
|
|
322
|
+
)
|
|
323
|
+
assert result.exit_code == 1
|
|
324
|
+
assert "JSON object" in result.output
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def test_add_rejects_invalid_payload_json(tmp_path):
|
|
328
|
+
pytest.importorskip("fastembed")
|
|
329
|
+
|
|
330
|
+
db = tmp_path / "store.db"
|
|
331
|
+
result = runner.invoke(
|
|
332
|
+
app, ["add", "hello", "--db", str(db), "--payload", "{not json"]
|
|
333
|
+
)
|
|
334
|
+
assert result.exit_code == 1
|
|
335
|
+
assert "valid JSON" in result.output
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def test_search_dynamic_threshold_filters_results(tmp_path):
|
|
339
|
+
pytest.importorskip("fastembed")
|
|
340
|
+
|
|
341
|
+
db = tmp_path / "store.db"
|
|
342
|
+
# Two entries both gated on a very tight threshold (0.0); only an
|
|
343
|
+
# exact-match query should make it through.
|
|
344
|
+
data = tmp_path / "records.jsonl"
|
|
345
|
+
data.write_text(
|
|
346
|
+
json.dumps({"kind": "note", "content": "the moon is full", "threshold": 0.0})
|
|
347
|
+
+ "\n"
|
|
348
|
+
+ json.dumps(
|
|
349
|
+
{"kind": "note", "content": "dragons fly at midnight", "threshold": 0.0}
|
|
350
|
+
)
|
|
351
|
+
+ "\n"
|
|
352
|
+
)
|
|
353
|
+
assert runner.invoke(app, ["ingest", str(data), "--db", str(db)]).exit_code == 0
|
|
354
|
+
|
|
355
|
+
ungated = runner.invoke(
|
|
356
|
+
app, ["search", "the moon is full", "--db", str(db), "--k", "5"]
|
|
357
|
+
)
|
|
358
|
+
assert ungated.exit_code == 0
|
|
359
|
+
assert len([line for line in ungated.output.splitlines() if line.strip()]) == 2
|
|
360
|
+
|
|
361
|
+
gated = runner.invoke(
|
|
362
|
+
app,
|
|
363
|
+
[
|
|
364
|
+
"search",
|
|
365
|
+
"the moon is full",
|
|
366
|
+
"--db",
|
|
367
|
+
str(db),
|
|
368
|
+
"--k",
|
|
369
|
+
"5",
|
|
370
|
+
"--dynamic-threshold",
|
|
371
|
+
],
|
|
372
|
+
)
|
|
373
|
+
assert gated.exit_code == 0
|
|
374
|
+
gated_lines = [line for line in gated.output.splitlines() if line.strip()]
|
|
375
|
+
assert len(gated_lines) == 1
|
|
376
|
+
assert json.loads(gated_lines[0])["content"] == "the moon is full"
|