4lt7ab-grimoire 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .pytest_cache/
5
+ .ruff_cache/
6
+ .venv/
7
+ build/
8
+ dist/
9
+ .DS_Store
10
+ .idea/
11
+ .vscode/
12
+ *.swp
13
+ *.db
14
+ .grimoire/
15
+ .local/
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: 4lt7ab-grimoire
3
+ Version: 0.0.1
4
+ Summary: SQLite + sqlite-vec semantic search datastore
5
+ Requires-Python: >=3.14
6
+ Requires-Dist: python-ulid>=3.0
7
+ Requires-Dist: sqlite-vec>=0.1.6
8
+ Requires-Dist: typing-extensions>=4.0
9
+ Provides-Extra: fastembed
10
+ Requires-Dist: fastembed>=0.4; extra == 'fastembed'
11
+ Requires-Dist: socksio>=1.0; extra == 'fastembed'
@@ -0,0 +1,20 @@
1
+ [project]
2
+ name = "4lt7ab-grimoire"
3
+ version = "0.0.1"
4
+ description = "SQLite + sqlite-vec semantic search datastore"
5
+ requires-python = ">=3.14"
6
+ dependencies = [
7
+ "python-ulid>=3.0",
8
+ "sqlite-vec>=0.1.6",
9
+ "typing-extensions>=4.0",
10
+ ]
11
+
12
+ [project.optional-dependencies]
13
+ fastembed = ["fastembed>=0.4", "socksio>=1.0"]
14
+
15
+ [build-system]
16
+ requires = ["hatchling"]
17
+ build-backend = "hatchling.build"
18
+
19
+ [tool.hatch.build.targets.wheel]
20
+ packages = ["src/grimoire"]
@@ -0,0 +1,20 @@
1
+ from grimoire.core import Grimoire
2
+ from grimoire.embedder import Embedder
3
+ from grimoire.errors import (
4
+ GrimoireError,
5
+ GrimoireMismatch,
6
+ InvalidEmbedder,
7
+ SchemaVersionError,
8
+ )
9
+ from grimoire.models import Entry, Stats
10
+
11
+ __all__ = [
12
+ "Embedder",
13
+ "Entry",
14
+ "Grimoire",
15
+ "GrimoireError",
16
+ "GrimoireMismatch",
17
+ "InvalidEmbedder",
18
+ "SchemaVersionError",
19
+ "Stats",
20
+ ]
@@ -0,0 +1,209 @@
1
+ import json
2
+ import sqlite3
3
+ import struct
4
+ from pathlib import Path
5
+ from typing import Any, Self
6
+
7
+ import sqlite_vec
8
+ from ulid import ULID
9
+
10
+ from grimoire.embedder import Embedder
11
+ from grimoire.models import Entry, Stats
12
+ from grimoire.schema import bootstrap
13
+
14
+
15
+ class Grimoire:
16
+ """A semantically-indexed datastore backed by one SQLite file."""
17
+
18
+ def __init__(self, *, conn: sqlite3.Connection, embedder: Embedder) -> None:
19
+ self._conn = conn
20
+ self._embedder = embedder
21
+
22
+ @classmethod
23
+ def open(cls, path: str | Path, *, embedder: Embedder) -> Self:
24
+ conn = _open_conn(str(path))
25
+ try:
26
+ bootstrap(conn, embedder)
27
+ return cls(conn=conn, embedder=embedder)
28
+ except BaseException:
29
+ conn.close()
30
+ raise
31
+
32
+ @classmethod
33
+ def peek(cls, path: str | Path) -> Stats | None:
34
+ """Read metadata and counts from a grimoire file without opening it for use.
35
+
36
+ Returns None if the file does not exist or is not a grimoire database.
37
+ Does not load sqlite-vec or require an embedder, so it is safe for
38
+ inspection (CLI `info`, model auto-detect) before deciding how to open.
39
+ """
40
+ path = Path(path)
41
+ if not path.exists():
42
+ return None
43
+ try:
44
+ conn = sqlite3.connect(path)
45
+ try:
46
+ row = conn.execute(
47
+ "SELECT model, dimension FROM grimoire WHERE id = 1"
48
+ ).fetchone()
49
+ if row is None:
50
+ return None
51
+ version = conn.execute("PRAGMA user_version").fetchone()[0]
52
+ count = conn.execute("SELECT COUNT(*) FROM entries").fetchone()[0]
53
+ kind_rows = conn.execute(
54
+ "SELECT kind, COUNT(*) FROM entries GROUP BY kind ORDER BY kind"
55
+ ).fetchall()
56
+ finally:
57
+ conn.close()
58
+ except sqlite3.Error:
59
+ return None
60
+ return Stats(
61
+ model=row[0],
62
+ dimension=row[1],
63
+ schema_version=version,
64
+ entry_count=count,
65
+ kinds=dict(kind_rows),
66
+ )
67
+
68
+ def add(
69
+ self,
70
+ *,
71
+ kind: str,
72
+ content: str,
73
+ payload: dict[str, Any] | None = None,
74
+ threshold: float | None = None,
75
+ ) -> Entry:
76
+ entry_id = str(ULID())
77
+ vector = self._embedder.embed(content)
78
+ payload_json = json.dumps(payload) if payload is not None else None
79
+
80
+ with self._conn:
81
+ self._conn.execute(
82
+ """
83
+ INSERT INTO entries (id, kind, content, payload, threshold)
84
+ VALUES (?, ?, ?, ?, ?)
85
+ """,
86
+ (entry_id, kind, content, payload_json, threshold),
87
+ )
88
+ self._conn.execute(
89
+ "INSERT INTO vectors (entry_id, kind, embedding) VALUES (?, ?, ?)",
90
+ (entry_id, kind, _pack(vector)),
91
+ )
92
+
93
+ return Entry(
94
+ id=entry_id,
95
+ kind=kind,
96
+ content=content,
97
+ payload=payload_json,
98
+ threshold=threshold,
99
+ )
100
+
101
+ def get(self, entry_id: str) -> Entry | None:
102
+ row = self._conn.execute(
103
+ "SELECT id, kind, content, payload, threshold FROM entries WHERE id = ?",
104
+ (entry_id,),
105
+ ).fetchone()
106
+ return _row_to_entry(row) if row is not None else None
107
+
108
+ def list(
109
+ self,
110
+ *,
111
+ kind: str | None = None,
112
+ limit: int = 100,
113
+ after_id: str | None = None,
114
+ ) -> list[Entry]:
115
+ sql = "SELECT id, kind, content, payload, threshold FROM entries"
116
+ params: list[Any] = []
117
+ clauses: list[str] = []
118
+ if kind is not None:
119
+ clauses.append("kind = ?")
120
+ params.append(kind)
121
+ if after_id is not None:
122
+ clauses.append("id > ?")
123
+ params.append(after_id)
124
+ if clauses:
125
+ sql += " WHERE " + " AND ".join(clauses)
126
+ sql += " ORDER BY id LIMIT ?"
127
+ params.append(limit)
128
+
129
+ rows = self._conn.execute(sql, params).fetchall()
130
+ return [_row_to_entry(r) for r in rows]
131
+
132
+ def delete(self, entry_id: str) -> bool:
133
+ with self._conn:
134
+ cursor = self._conn.execute("DELETE FROM entries WHERE id = ?", (entry_id,))
135
+ if cursor.rowcount == 0:
136
+ return False
137
+ self._conn.execute("DELETE FROM vectors WHERE entry_id = ?", (entry_id,))
138
+ return True
139
+
140
+ def search(
141
+ self,
142
+ query: str,
143
+ *,
144
+ kind: str | None = None,
145
+ k: int = 10,
146
+ dynamic_threshold: bool = False,
147
+ ) -> list[Entry]:
148
+ vector = self._embedder.embed(query)
149
+
150
+ sql = (
151
+ "SELECT e.id, e.kind, e.content, e.payload, e.threshold, v.distance "
152
+ "FROM vectors v JOIN entries e ON e.id = v.entry_id "
153
+ "WHERE v.embedding MATCH ? AND k = ?"
154
+ )
155
+ params: list[Any] = [_pack(vector), k]
156
+ if kind is not None:
157
+ sql += " AND v.kind = ?"
158
+ params.append(kind)
159
+ sql += " ORDER BY v.distance"
160
+
161
+ rows = self._conn.execute(sql, params).fetchall()
162
+ results = [
163
+ Entry(
164
+ id=r[0],
165
+ kind=r[1],
166
+ content=r[2],
167
+ payload=r[3],
168
+ threshold=r[4],
169
+ distance=r[5],
170
+ )
171
+ for r in rows
172
+ ]
173
+ if dynamic_threshold:
174
+ results = [
175
+ r for r in results if r.threshold is None or r.distance <= r.threshold
176
+ ]
177
+ return results
178
+
179
+ def close(self) -> None:
180
+ self._conn.close()
181
+
182
+ def __enter__(self) -> Self:
183
+ return self
184
+
185
+ def __exit__(self, *exc: object) -> None:
186
+ self.close()
187
+
188
+
189
+ def _open_conn(path: str) -> sqlite3.Connection:
190
+ conn = sqlite3.connect(path)
191
+ conn.enable_load_extension(True)
192
+ sqlite_vec.load(conn)
193
+ conn.enable_load_extension(False)
194
+ conn.execute("PRAGMA foreign_keys = ON")
195
+ return conn
196
+
197
+
198
+ def _pack(vector: list[float]) -> bytes:
199
+ return struct.pack(f"{len(vector)}f", *vector)
200
+
201
+
202
+ def _row_to_entry(row: tuple) -> Entry:
203
+ return Entry(
204
+ id=row[0],
205
+ kind=row[1],
206
+ content=row[2],
207
+ payload=row[3],
208
+ threshold=row[4],
209
+ )
@@ -0,0 +1,14 @@
1
+ from typing import Protocol, runtime_checkable
2
+
3
+
4
+ @runtime_checkable
5
+ class Embedder(Protocol):
6
+ """Produces fixed-dimension vectors for text. Caller-supplied to a Grimoire."""
7
+
8
+ @property
9
+ def model(self) -> str: ...
10
+
11
+ @property
12
+ def dimension(self) -> int: ...
13
+
14
+ def embed(self, text: str) -> list[float]: ...
@@ -0,0 +1,3 @@
1
+ from grimoire.embedders.fastembed import FastembedEmbedder
2
+
3
+ __all__ = ["FastembedEmbedder"]
@@ -0,0 +1,49 @@
1
+ from pathlib import Path
2
+
3
+
4
+ class FastembedEmbedder:
5
+ """Embedder backed by Qdrant's `fastembed` library (ONNX Runtime).
6
+
7
+ Requires the optional extra: `pip install grimoire[fastembed]`.
8
+
9
+ `cache_folder` is required — the library does not pick a default
10
+ filesystem location on the caller's behalf.
11
+ """
12
+
13
+ def __init__(
14
+ self,
15
+ model_name: str = "BAAI/bge-small-en-v1.5",
16
+ *,
17
+ cache_folder: str | Path,
18
+ threads: int | None = None,
19
+ ) -> None:
20
+ try:
21
+ from fastembed import TextEmbedding
22
+ except ImportError as exc:
23
+ raise ImportError(
24
+ "FastembedEmbedder requires the `fastembed` extra. "
25
+ "Install with: pip install grimoire[fastembed]"
26
+ ) from exc
27
+
28
+ self._model_name = model_name
29
+ self._model = TextEmbedding(
30
+ model_name=model_name,
31
+ cache_dir=str(cache_folder),
32
+ threads=threads,
33
+ )
34
+ # Determine dimension by embedding a probe — works regardless of
35
+ # fastembed's internal model registry shape.
36
+ [probe] = list(self._model.embed(["dimension probe"]))
37
+ self._dimension = len(probe)
38
+
39
+ @property
40
+ def model(self) -> str:
41
+ return self._model_name
42
+
43
+ @property
44
+ def dimension(self) -> int:
45
+ return self._dimension
46
+
47
+ def embed(self, text: str) -> list[float]:
48
+ [vector] = list(self._model.embed([text]))
49
+ return vector.tolist()
@@ -0,0 +1,14 @@
1
+ class GrimoireError(Exception):
2
+ """Base exception for all grimoire errors."""
3
+
4
+
5
+ class GrimoireMismatch(GrimoireError):
6
+ """An embedder's model or dimension does not match the stored grimoire."""
7
+
8
+
9
+ class SchemaVersionError(GrimoireError):
10
+ """The database file's schema version does not match what the library expects."""
11
+
12
+
13
+ class InvalidEmbedder(GrimoireError):
14
+ """An embedder reported a model or dimension outside the allowed shape."""
@@ -0,0 +1,20 @@
1
+ from dataclasses import dataclass, field
2
+
3
+
4
+ @dataclass
5
+ class Entry:
6
+ id: str
7
+ kind: str
8
+ content: str
9
+ payload: str | None = None
10
+ threshold: float | None = None
11
+ distance: float | None = None
12
+
13
+
14
+ @dataclass
15
+ class Stats:
16
+ model: str
17
+ dimension: int
18
+ schema_version: int
19
+ entry_count: int
20
+ kinds: dict[str, int] = field(default_factory=dict)
@@ -0,0 +1,71 @@
1
+ import sqlite3
2
+
3
+ from grimoire.embedder import Embedder
4
+ from grimoire.errors import GrimoireMismatch, InvalidEmbedder, SchemaVersionError
5
+
6
+ SCHEMA_VERSION = 1
7
+
8
+
9
+ def bootstrap(conn: sqlite3.Connection, embedder: Embedder) -> None:
10
+ _validate_embedder(embedder)
11
+ version = conn.execute("PRAGMA user_version").fetchone()[0]
12
+ if version == 0:
13
+ conn.executescript(
14
+ f"""
15
+ CREATE TABLE grimoire (
16
+ id INTEGER PRIMARY KEY CHECK (id = 1),
17
+ model TEXT NOT NULL,
18
+ dimension INTEGER NOT NULL
19
+ );
20
+ CREATE TABLE entries (
21
+ id TEXT PRIMARY KEY,
22
+ kind TEXT NOT NULL,
23
+ content TEXT NOT NULL,
24
+ payload TEXT,
25
+ threshold REAL
26
+ );
27
+ CREATE INDEX entries_kind ON entries(kind);
28
+ CREATE VIRTUAL TABLE vectors USING vec0(
29
+ entry_id TEXT PRIMARY KEY,
30
+ kind TEXT partition key,
31
+ embedding FLOAT[{embedder.dimension}]
32
+ );
33
+ """
34
+ )
35
+ conn.execute(
36
+ "INSERT INTO grimoire (id, model, dimension) VALUES (1, ?, ?)",
37
+ (embedder.model, embedder.dimension),
38
+ )
39
+ conn.execute(f"PRAGMA user_version = {SCHEMA_VERSION}")
40
+ conn.commit()
41
+ return
42
+ if version != SCHEMA_VERSION:
43
+ raise SchemaVersionError(
44
+ f"Database schema version is {version}, library expects {SCHEMA_VERSION}"
45
+ )
46
+ row = conn.execute("SELECT model, dimension FROM grimoire WHERE id = 1").fetchone()
47
+ if row is None:
48
+ raise SchemaVersionError("Database is missing its grimoire row")
49
+ stored_model, stored_dim = row
50
+ if stored_model != embedder.model or stored_dim != embedder.dimension:
51
+ raise GrimoireMismatch(
52
+ f"Embedder (model={embedder.model!r}, dim={embedder.dimension}) "
53
+ f"does not match grimoire "
54
+ f"(model={stored_model!r}, dim={stored_dim})"
55
+ )
56
+
57
+
58
+ def _validate_embedder(embedder: Embedder) -> None:
59
+ if not isinstance(embedder.dimension, int) or isinstance(embedder.dimension, bool):
60
+ raise InvalidEmbedder(
61
+ f"Embedder dimension must be an int, "
62
+ f"got {type(embedder.dimension).__name__}"
63
+ )
64
+ if embedder.dimension <= 0:
65
+ raise InvalidEmbedder(
66
+ f"Embedder dimension must be positive, got {embedder.dimension}"
67
+ )
68
+ if not isinstance(embedder.model, str) or not embedder.model:
69
+ raise InvalidEmbedder(
70
+ f"Embedder model must be a non-empty string, got {embedder.model!r}"
71
+ )
@@ -0,0 +1,65 @@
1
+ """Integration tests for the FastembedEmbedder.
2
+
3
+ Skipped unless the `fastembed` extra is installed:
4
+ uv sync --package grimoire --extra fastembed
5
+ """
6
+
7
+ import os
8
+
9
+ import pytest
10
+
11
+ pytest.importorskip("fastembed")
12
+
13
+ from grimoire import Grimoire # noqa: E402
14
+ from grimoire.embedders import FastembedEmbedder # noqa: E402
15
+
16
+
17
+ @pytest.fixture
18
+ def cache_dir(tmp_path, monkeypatch):
19
+ """Pin the model cache to a sandbox-safe location for tests."""
20
+ cache = tmp_path / "fastembed_cache"
21
+ monkeypatch.setenv("HF_HOME", str(cache))
22
+ return cache
23
+
24
+
25
+ def test_default_model_dimension(cache_dir):
26
+ e = FastembedEmbedder(cache_folder=cache_dir)
27
+ assert e.model == "BAAI/bge-small-en-v1.5"
28
+ assert e.dimension == 384
29
+
30
+
31
+ def test_embed_returns_correct_length_vector(cache_dir):
32
+ e = FastembedEmbedder(cache_folder=cache_dir)
33
+ vector = e.embed("hello world")
34
+ assert len(vector) == e.dimension
35
+ assert all(isinstance(x, float) for x in vector)
36
+
37
+
38
+ def test_round_trip_through_grimoire(tmp_path, cache_dir):
39
+ e = FastembedEmbedder(cache_folder=cache_dir)
40
+ with Grimoire.open(tmp_path / "store.db", embedder=e) as g:
41
+ g.add(kind="note", content="the moon is full tonight")
42
+ g.add(kind="note", content="dragons fly at midnight")
43
+
44
+ results = g.search("the moon is full tonight", k=2)
45
+ assert len(results) == 2
46
+ assert results[0].content == "the moon is full tonight"
47
+ assert results[0].distance < results[1].distance
48
+
49
+
50
+ def test_cache_folder_pass_through(tmp_path):
51
+ cache = tmp_path / "models"
52
+ e = FastembedEmbedder(cache_folder=cache)
53
+ assert e.dimension == 384
54
+ # fastembed creates the cache directory lazily; just confirm the embedder
55
+ # initialized without error and the path is at least a directory or its
56
+ # parent exists.
57
+ assert cache.exists() or cache.parent.exists()
58
+
59
+
60
+ # Skip-marker safety: ensure HF_HOME doesn't leak into other tests.
61
+ def test_env_isolation():
62
+ if os.environ.get("HF_HOME"):
63
+ # monkeypatch.setenv from earlier fixtures restores on teardown,
64
+ # so this should never be set when this test runs without the fixture.
65
+ pass
@@ -0,0 +1,283 @@
1
+ import hashlib
2
+
3
+ import pytest
4
+ from grimoire import Entry, Grimoire, GrimoireMismatch, InvalidEmbedder, Stats
5
+
6
+
7
+ class FakeEmbedder:
8
+ def __init__(self, model: str = "fake-v1", dimension: int = 8) -> None:
9
+ self._model = model
10
+ self._dimension = dimension
11
+
12
+ @property
13
+ def model(self) -> str:
14
+ return self._model
15
+
16
+ @property
17
+ def dimension(self) -> int:
18
+ return self._dimension
19
+
20
+ def embed(self, text: str) -> list[float]:
21
+ digest = hashlib.sha256(text.encode()).digest()
22
+ return [(b - 128) / 128.0 for b in digest[: self._dimension]]
23
+
24
+
25
+ def test_open_creates_file_idempotently(tmp_path):
26
+ db = tmp_path / "store.db"
27
+ Grimoire.open(db, embedder=FakeEmbedder()).close()
28
+ Grimoire.open(db, embedder=FakeEmbedder()).close()
29
+ assert db.exists()
30
+
31
+
32
+ def test_embedder_model_mismatch_raises(tmp_path):
33
+ db = tmp_path / "store.db"
34
+ Grimoire.open(db, embedder=FakeEmbedder(model="alpha")).close()
35
+ with pytest.raises(GrimoireMismatch):
36
+ Grimoire.open(db, embedder=FakeEmbedder(model="beta"))
37
+
38
+
39
+ def test_embedder_dimension_mismatch_raises(tmp_path):
40
+ db = tmp_path / "store.db"
41
+ Grimoire.open(db, embedder=FakeEmbedder(dimension=8)).close()
42
+ with pytest.raises(GrimoireMismatch):
43
+ Grimoire.open(db, embedder=FakeEmbedder(dimension=16))
44
+
45
+
46
+ def test_add_returns_entry(tmp_path):
47
+ with Grimoire.open(tmp_path / "store.db", embedder=FakeEmbedder()) as g:
48
+ entry = g.add(kind="note", content="the moon is full")
49
+ assert isinstance(entry, Entry)
50
+ assert entry.kind == "note"
51
+ assert entry.content == "the moon is full"
52
+
53
+
54
+ def test_search_finds_exact_match_first(tmp_path):
55
+ with Grimoire.open(tmp_path / "store.db", embedder=FakeEmbedder()) as g:
56
+ g.add(kind="note", content="the moon is full")
57
+ g.add(kind="note", content="dragons fly at midnight")
58
+ g.add(kind="note", content="potions bubble in the cauldron")
59
+
60
+ results = g.search("the moon is full", k=3)
61
+ assert len(results) == 3
62
+ assert results[0].content == "the moon is full"
63
+ assert results[0].distance == 0.0
64
+
65
+
66
+ def test_search_filters_by_kind(tmp_path):
67
+ with Grimoire.open(tmp_path / "store.db", embedder=FakeEmbedder()) as g:
68
+ g.add(kind="spell", content="lumos")
69
+ g.add(kind="potion", content="lumos")
70
+
71
+ results = g.search("lumos", kind="spell", k=10)
72
+ assert len(results) == 1
73
+ assert results[0].kind == "spell"
74
+
75
+
76
+ def test_dynamic_threshold_drops_low_match(tmp_path):
77
+ with Grimoire.open(tmp_path / "store.db", embedder=FakeEmbedder()) as g:
78
+ g.add(kind="note", content="the moon is full", threshold=0.0)
79
+ g.add(kind="note", content="dragons fly at midnight", threshold=0.0)
80
+
81
+ all_results = g.search("the moon is full", k=10)
82
+ assert len(all_results) == 2
83
+
84
+ gated = g.search("the moon is full", k=10, dynamic_threshold=True)
85
+ assert len(gated) == 1
86
+ assert gated[0].content == "the moon is full"
87
+
88
+
89
+ def test_two_files_are_independent(tmp_path):
90
+ a_path = tmp_path / "a.db"
91
+ b_path = tmp_path / "b.db"
92
+ with Grimoire.open(a_path, embedder=FakeEmbedder()) as a:
93
+ a.add(kind="note", content="alpha")
94
+ with Grimoire.open(b_path, embedder=FakeEmbedder()) as b:
95
+ b.add(kind="note", content="beta")
96
+ results = b.search("alpha", k=10)
97
+ assert all(r.content != "alpha" for r in results)
98
+
99
+
100
+ def test_data_persists_across_reopens(tmp_path):
101
+ db = tmp_path / "store.db"
102
+ with Grimoire.open(db, embedder=FakeEmbedder()) as g:
103
+ g.add(kind="note", content="the moon is full")
104
+
105
+ with Grimoire.open(db, embedder=FakeEmbedder()) as g:
106
+ results = g.search("the moon is full", k=1)
107
+ assert len(results) == 1
108
+ assert results[0].content == "the moon is full"
109
+
110
+
111
+ def test_get_returns_entry(tmp_path):
112
+ with Grimoire.open(tmp_path / "store.db", embedder=FakeEmbedder()) as g:
113
+ added = g.add(kind="note", content="lumos")
114
+ fetched = g.get(added.id)
115
+ assert fetched is not None
116
+ assert fetched.id == added.id
117
+ assert fetched.content == "lumos"
118
+
119
+
120
+ def test_get_returns_none_for_missing_id(tmp_path):
121
+ with Grimoire.open(tmp_path / "store.db", embedder=FakeEmbedder()) as g:
122
+ assert g.get("01HXXXXXXXXXXXXXXXXXXXXXXX") is None
123
+
124
+
125
+ def test_list_returns_all_entries_in_chronological_order(tmp_path):
126
+ with Grimoire.open(tmp_path / "store.db", embedder=FakeEmbedder()) as g:
127
+ a = g.add(kind="note", content="first")
128
+ b = g.add(kind="note", content="second")
129
+ c = g.add(kind="note", content="third")
130
+ results = g.list()
131
+ assert [r.id for r in results] == [a.id, b.id, c.id]
132
+
133
+
134
+ def test_list_filters_by_kind(tmp_path):
135
+ with Grimoire.open(tmp_path / "store.db", embedder=FakeEmbedder()) as g:
136
+ g.add(kind="spell", content="lumos")
137
+ g.add(kind="potion", content="felix felicis")
138
+ g.add(kind="spell", content="alohomora")
139
+
140
+ spells = g.list(kind="spell")
141
+ assert len(spells) == 2
142
+ assert all(r.kind == "spell" for r in spells)
143
+
144
+
145
+ def test_list_paginates_via_after_id(tmp_path):
146
+ with Grimoire.open(tmp_path / "store.db", embedder=FakeEmbedder()) as g:
147
+ added = [g.add(kind="note", content=f"e{i}") for i in range(5)]
148
+
149
+ page1 = g.list(limit=2)
150
+ assert [r.id for r in page1] == [added[0].id, added[1].id]
151
+
152
+ page2 = g.list(limit=2, after_id=page1[-1].id)
153
+ assert [r.id for r in page2] == [added[2].id, added[3].id]
154
+
155
+ page3 = g.list(limit=2, after_id=page2[-1].id)
156
+ assert [r.id for r in page3] == [added[4].id]
157
+
158
+ page4 = g.list(limit=2, after_id=page3[-1].id)
159
+ assert page4 == []
160
+
161
+
162
+ def test_list_respects_limit(tmp_path):
163
+ with Grimoire.open(tmp_path / "store.db", embedder=FakeEmbedder()) as g:
164
+ for i in range(5):
165
+ g.add(kind="note", content=f"e{i}")
166
+ assert len(g.list(limit=3)) == 3
167
+
168
+
169
+ def test_delete_removes_entry_and_vector(tmp_path):
170
+ with Grimoire.open(tmp_path / "store.db", embedder=FakeEmbedder()) as g:
171
+ added = g.add(kind="note", content="ephemeral")
172
+ assert g.delete(added.id) is True
173
+ assert g.get(added.id) is None
174
+
175
+ # Search should no longer return it.
176
+ results = g.search("ephemeral", k=10)
177
+ assert all(r.id != added.id for r in results)
178
+
179
+
180
+ def test_delete_returns_false_for_missing_id(tmp_path):
181
+ with Grimoire.open(tmp_path / "store.db", embedder=FakeEmbedder()) as g:
182
+ assert g.delete("01HXXXXXXXXXXXXXXXXXXXXXXX") is False
183
+
184
+
185
+ class _BadDimensionEmbedder:
186
+ @property
187
+ def model(self) -> str:
188
+ return "bad"
189
+
190
+ @property
191
+ def dimension(self): # not annotated, returns whatever
192
+ return "8); DROP TABLE entries; --"
193
+
194
+ def embed(self, text: str) -> list[float]:
195
+ return [0.0] * 8
196
+
197
+
198
+ class _NonPositiveDimensionEmbedder:
199
+ @property
200
+ def model(self) -> str:
201
+ return "bad"
202
+
203
+ @property
204
+ def dimension(self) -> int:
205
+ return 0
206
+
207
+ def embed(self, text: str) -> list[float]:
208
+ return []
209
+
210
+
211
+ class _EmptyModelEmbedder:
212
+ @property
213
+ def model(self) -> str:
214
+ return ""
215
+
216
+ @property
217
+ def dimension(self) -> int:
218
+ return 8
219
+
220
+ def embed(self, text: str) -> list[float]:
221
+ return [0.0] * 8
222
+
223
+
224
+ def test_embedder_with_non_int_dimension_rejected(tmp_path):
225
+ db = tmp_path / "store.db"
226
+ with pytest.raises(InvalidEmbedder):
227
+ Grimoire.open(db, embedder=_BadDimensionEmbedder())
228
+ assert not db.exists() or db.stat().st_size == 0
229
+
230
+
231
+ def test_embedder_with_zero_dimension_rejected(tmp_path):
232
+ with pytest.raises(InvalidEmbedder):
233
+ Grimoire.open(tmp_path / "store.db", embedder=_NonPositiveDimensionEmbedder())
234
+
235
+
236
+ def test_embedder_with_empty_model_rejected(tmp_path):
237
+ with pytest.raises(InvalidEmbedder):
238
+ Grimoire.open(tmp_path / "store.db", embedder=_EmptyModelEmbedder())
239
+
240
+
241
+ # ---------- peek ----------
242
+
243
+
244
+ def test_peek_returns_none_for_missing_file(tmp_path):
245
+ assert Grimoire.peek(tmp_path / "nope.db") is None
246
+
247
+
248
+ def test_peek_returns_none_for_non_grimoire_file(tmp_path):
249
+ import sqlite3
250
+
251
+ db = tmp_path / "stranger.db"
252
+ conn = sqlite3.connect(db)
253
+ conn.execute("CREATE TABLE other (x INTEGER)")
254
+ conn.commit()
255
+ conn.close()
256
+ assert Grimoire.peek(db) is None
257
+
258
+
259
+ def test_peek_returns_stats_for_initialized_grimoire(tmp_path):
260
+ db = tmp_path / "store.db"
261
+ with Grimoire.open(db, embedder=FakeEmbedder(model="m1", dimension=8)) as g:
262
+ g.add(kind="note", content="alpha")
263
+ g.add(kind="note", content="beta")
264
+ g.add(kind="spell", content="lumos")
265
+
266
+ stats = Grimoire.peek(db)
267
+ assert isinstance(stats, Stats)
268
+ assert stats.model == "m1"
269
+ assert stats.dimension == 8
270
+ assert stats.schema_version == 1
271
+ assert stats.entry_count == 3
272
+ assert stats.kinds == {"note": 2, "spell": 1}
273
+
274
+
275
+ def test_peek_does_not_require_embedder_or_extension(tmp_path):
276
+ # peek must be safe on a freshly-created file from another process,
277
+ # without sqlite-vec or an embedder loaded.
278
+ db = tmp_path / "store.db"
279
+ Grimoire.open(db, embedder=FakeEmbedder()).close()
280
+ stats = Grimoire.peek(db)
281
+ assert stats is not None
282
+ assert stats.entry_count == 0
283
+ assert stats.kinds == {}