nerva-mneme 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ DATABASE_URL=postgresql://postgres@localhost:5432/mneme
2
+
3
+ # Path to documents for digest. Local path or URL.
4
+ DATA_PATH=https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json
5
+
6
+ # API key for cloud providers. Leave empty for local (Ollama, vLLM).
7
+ API_KEY=
8
+
9
+ # Override defaults (Ollama localhost with bge-m3 / llama3).
10
+ # Works with any /v1/-compatible API (Ollama, vLLM, etc).
11
+ # EMBEDDER_URL=http://localhost:11434
12
+ # EMBEDDER_MODEL=bge-m3
13
+ # EMBEDDING_DIM=1024
14
+ # INFERENCE_URL=http://localhost:11434
15
+ # INFERENCE_MODEL=llama3:8b-instruct-q4_K_M
@@ -0,0 +1,32 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ build/
7
+ dist/
8
+ .venv/
9
+ venv/
10
+
11
+ # Env
12
+ .env
13
+ .env.*
14
+ !.env.example
15
+
16
+ # Editors
17
+ .idea/
18
+ .vscode/
19
+
20
+ # Logs
21
+ *.log
22
+
23
+ # Cache
24
+ .cache/
25
+
26
+ # Project-local notes
27
+ CLAUDE.md
28
+ .design/
29
+ .research/
30
+ .eval/
31
+ todo.md
32
+ tests/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Pavel Dolgov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,73 @@
1
+ Metadata-Version: 2.4
2
+ Name: nerva-mneme
3
+ Version: 0.1.0
4
+ Summary: Local-first RAG with built-in eval. Hybrid search, parameter sweep, one command.
5
+ Project-URL: Repository, https://github.com/nerva-project/mneme
6
+ Author-email: Pavel Dolgov <mikepromogratus@proton.me>
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ Keywords: embeddings,eval,local-first,pgvector,rag,search
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Requires-Python: >=3.12
16
+ Requires-Dist: asyncpg>=0.29
17
+ Requires-Dist: click>=8.1
18
+ Requires-Dist: httpx>=0.27
19
+ Requires-Dist: numpy>=2.0
20
+ Requires-Dist: pgvector>=0.3
21
+ Requires-Dist: python-dotenv>=1.0
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
24
+ Requires-Dist: pytest>=8.0; extra == 'dev'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # Mneme
28
+
29
+ Local RAG pipeline with built-in evaluation. Postgres + pgvector for hybrid search, any /v1/-compatible LLM backend for embeddings and inference.
30
+
31
+ ## Setup
32
+
33
+ Requires Python 3.12+, [uv](https://docs.astral.sh/uv/), Postgres with the `pgvector` extension.
34
+
35
+ Copy `.env.example` to `.env` and fill in your values, then install:
36
+
37
+ ```bash
38
+ cp .env.example .env
39
+ uv sync
40
+ ```
41
+
42
+ ## Usage
43
+
44
+ ```bash
45
+ uv run mneme digest # parse DATA_PATH source into cache
46
+ uv run mneme ingest <file.jsonl>
47
+ uv run mneme ask "query"
48
+ uv run mneme sweep <fast|medium|thorough> --limit 30
49
+ ```
50
+
51
+ ## Library
52
+
53
+ ```python
54
+ from mneme import Mneme, Config
55
+
56
+ cfg = Config(database_url="postgresql://...", api_key="sk-...")
57
+
58
+ async with Mneme(cfg) as m:
59
+ await m.ingest("./corpus")
60
+ answer = await m.ask("What is X?")
61
+
62
+ rows = await Mneme.sweep(cfg, "medium", limit=30)
63
+ ```
64
+
65
+ ## Input format
66
+
67
+ JSONL, one document per line:
68
+
69
+ ```json
70
+ {"content": "...", "source": "optional", "created_at": "2026-04-01T12:00:00Z", "metadata": {}}
71
+ ```
72
+
73
+ Only `content` is required. `source` falls back to the file stem, `created_at` to the current time, `metadata` to `{}`.
@@ -0,0 +1,47 @@
1
+ # Mneme
2
+
3
+ Local RAG pipeline with built-in evaluation. Postgres + pgvector for hybrid search, any /v1/-compatible LLM backend for embeddings and inference.
4
+
5
+ ## Setup
6
+
7
+ Requires Python 3.12+, [uv](https://docs.astral.sh/uv/), Postgres with the `pgvector` extension.
8
+
9
+ Copy `.env.example` to `.env` and fill in your values, then install:
10
+
11
+ ```bash
12
+ cp .env.example .env
13
+ uv sync
14
+ ```
15
+
16
+ ## Usage
17
+
18
+ ```bash
19
+ uv run mneme digest # parse DATA_PATH source into cache
20
+ uv run mneme ingest <file.jsonl>
21
+ uv run mneme ask "query"
22
+ uv run mneme sweep <fast|medium|thorough> --limit 30
23
+ ```
24
+
25
+ ## Library
26
+
27
+ ```python
28
+ from mneme import Mneme, Config
29
+
30
+ cfg = Config(database_url="postgresql://...", api_key="sk-...")
31
+
32
+ async with Mneme(cfg) as m:
33
+ await m.ingest("./corpus")
34
+ answer = await m.ask("What is X?")
35
+
36
+ rows = await Mneme.sweep(cfg, "medium", limit=30)
37
+ ```
38
+
39
+ ## Input format
40
+
41
+ JSONL, one document per line:
42
+
43
+ ```json
44
+ {"content": "...", "source": "optional", "created_at": "2026-04-01T12:00:00Z", "metadata": {}}
45
+ ```
46
+
47
+ Only `content` is required. `source` falls back to the file stem, `created_at` to the current time, `metadata` to `{}`.
@@ -0,0 +1,113 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+
5
+ import httpx
6
+
7
+ from .chunker import chunk
8
+ from .config import Config
9
+ from .db import Db
10
+ from .loader import load_docs
11
+ from .models import chat, embed
12
+ from .types import Chunk, SearchHit
13
+
14
+
15
+ class Mneme:
16
+ """RAG engine with built-in eval. Lifecycle: Mneme(cfg) → open() → work → close().
17
+ Or use `async with Mneme(cfg) as m:` for automatic cleanup."""
18
+
19
+ def __init__(self, cfg: Config) -> None:
20
+ self.cfg = cfg.resolved()
21
+
22
+ async def open(self) -> None:
23
+ self.db = Db(self.cfg.database_url, self.cfg.embedding_dim)
24
+ await self.db.open()
25
+ await self.db.init_schema()
26
+ self.http = httpx.AsyncClient(timeout=60.0)
27
+
28
+ async def close(self) -> None:
29
+ await self.db.close()
30
+ await self.http.aclose()
31
+
32
+ async def __aenter__(self) -> Mneme:
33
+ await self.open()
34
+ return self
35
+
36
+ async def __aexit__(self, *_: object) -> None:
37
+ await self.close()
38
+
39
+ async def reset(self) -> None:
40
+ await self.db.truncate()
41
+
42
+ async def ingest(self, source_path: str) -> None:
43
+ docs = load_docs(source_path)
44
+
45
+ class Piece:
46
+ def __init__(self, doc, idx, raw):
47
+ self.doc = doc
48
+ self.idx = idx
49
+ self.raw = raw
50
+
51
+ pieces = []
52
+ for doc in docs:
53
+ for idx, raw in enumerate(chunk(doc.content, self.cfg.chunk_size, self.cfg.overlap)):
54
+ pieces.append(Piece(doc, idx, raw))
55
+
56
+ texts = [p.raw.overlapped() for p in pieces]
57
+ vectors = await embed(self.cfg, self.http, texts)
58
+ print(f"embedded {len(texts)} chunks in one call")
59
+
60
+ chunks = [
61
+ Chunk(
62
+ id=hashlib.md5(f"{p.doc.source}:{p.idx}:{p.raw.clean}".encode()).hexdigest(),
63
+ source=p.doc.source,
64
+ chunk_index=p.idx,
65
+ content=p.raw.clean,
66
+ embedding=vectors[i],
67
+ metadata=p.doc.metadata,
68
+ created_at=p.doc.created_at,
69
+ )
70
+ for i, p in enumerate(pieces)
71
+ ]
72
+ await self.db.insert(chunks)
73
+ print(f"ingest done: {len(chunks)} chunks from {len(docs)} docs")
74
+
75
+ async def ask(self, query: str) -> str:
76
+ vectors = await embed(self.cfg, self.http, [query])
77
+ hits = await self.db.search(self.cfg, vectors[0], query)
78
+
79
+ if hits:
80
+ return await self._answer_with_context(query, hits)
81
+ else:
82
+ return await self._answer_without_context(query)
83
+
84
+ async def _answer_with_context(self, query: str, hits: list[SearchHit]) -> str:
85
+ prompt = "You are a personal knowledge assistant. You answer questions based ONLY on the provided context. If the context doesn't contain enough information, say so honestly. Answer in the same language as the question. Be concise and direct."
86
+
87
+ parts: list[str] = []
88
+ for i, h in enumerate(hits):
89
+ c = h.chunk
90
+ date = c.created_at.date().isoformat()
91
+ parts.append(f"[{i + 1}] ({date}, {c.source}, sim={h.similarity:.3f})\n{c.content}")
92
+ context = "\n\n".join(parts)
93
+
94
+ return await chat(self.cfg, self.http, prompt, f"Context:\n{context}\n\nQuestion: {query}")
95
+
96
+ async def _answer_without_context(self, query: str) -> str:
97
+ prompt = "You are a knowledge assistant. Answer the question directly based on your general knowledge. Answer in the same language as the question. Be concise and direct."
98
+ return await chat(self.cfg, self.http, prompt, query)
99
+
100
+
101
+ from .digest import digest as _digest # noqa: E402
102
+ from .sweep import run_sweep, SweepRow, EvalMetrics, EvalResult # noqa: E402
103
+
104
+ Mneme.digest = staticmethod(_digest)
105
+ Mneme.sweep = staticmethod(run_sweep)
106
+
107
+ __all__ = [
108
+ "Mneme",
109
+ "Config",
110
+ "SweepRow",
111
+ "EvalMetrics",
112
+ "EvalResult",
113
+ ]
@@ -0,0 +1,50 @@
1
+ """File-based JSONL cache for expensive operations."""
2
+ import hashlib
3
+ import json
4
+ from pathlib import Path
5
+
6
+ CACHE_DIR = Path(".cache")
7
+
8
+
9
+ def corpus_hash(source_path: str) -> str:
10
+ """Deterministic hash of source file(s) content."""
11
+ h = hashlib.md5()
12
+ p = Path(source_path)
13
+ if p.is_file():
14
+ h.update(p.read_bytes())
15
+ elif p.is_dir():
16
+ for f in sorted(p.rglob("*.jsonl")):
17
+ h.update(f.read_bytes())
18
+ return h.hexdigest()[:12]
19
+
20
+
21
+ class Cache:
22
+ """Generic JSONL cache. Doesn't know what it stores — just dicts."""
23
+
24
+ def __init__(self, **params: object) -> None:
25
+ raw = json.dumps(params, sort_keys=True, default=str)
26
+ key = hashlib.md5(raw.encode()).hexdigest()[:16]
27
+ self._path = CACHE_DIR / f"{key}.jsonl"
28
+
29
+ @property
30
+ def path(self) -> Path:
31
+ return self._path
32
+
33
+ def exists(self) -> bool:
34
+ return self._path.exists()
35
+
36
+ def load(self) -> list[dict] | None:
37
+ if not self._path.exists():
38
+ return None
39
+ try:
40
+ return [json.loads(line) for line in self._path.read_text().splitlines()]
41
+ except (json.JSONDecodeError, UnicodeDecodeError):
42
+ self._path.unlink(missing_ok=True)
43
+ print(f"cache corrupt, deleted: {self._path}")
44
+ return None
45
+
46
+ def save(self, rows: list[dict]) -> None:
47
+ self._path.parent.mkdir(parents=True, exist_ok=True)
48
+ with open(self._path, "w") as f:
49
+ for row in rows:
50
+ f.write(json.dumps(row) + "\n")
@@ -0,0 +1,69 @@
1
+ from dataclasses import dataclass
2
+
3
+ # Strongest separators first. Empty string is the final fallback (per-character).
4
+ SEPARATORS = ["\n\n", "\n", ". ", ", ", " ", ""]
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class ChunkData:
9
+ """A chunk with its overlap context. The embedding is computed from
10
+ `overlapped()` so the vector 'knows' about its neighbors, but only
11
+ `clean` is stored in the database — no text duplication between rows."""
12
+ clean: str
13
+ head: str
14
+ tail: str
15
+
16
+ def overlapped(self) -> str:
17
+ return self.head + self.clean + self.tail
18
+
19
+
20
+ def chunk(text: str, chunk_size: int, overlap: float) -> list[ChunkData]:
21
+ """Recursive character text splitter: split by separator hierarchy,
22
+ then greedily merge adjacent pieces up to chunk_size."""
23
+ if not text:
24
+ return []
25
+
26
+ clean = _merge(_separate(text.strip(), chunk_size), chunk_size)
27
+ overlap_chars = int(chunk_size * overlap)
28
+ result: list[ChunkData] = []
29
+
30
+ for i, piece in enumerate(clean):
31
+ head = clean[i - 1][-overlap_chars:] if (overlap_chars > 0 and i > 0) else ""
32
+ tail = clean[i + 1][:overlap_chars] if (overlap_chars > 0 and i < len(clean) - 1) else ""
33
+ result.append(ChunkData(clean=piece, head=head, tail=tail))
34
+
35
+ return result
36
+
37
+
38
+ def _separate(text: str, chunk_size: int, depth: int = 0) -> list[str]:
39
+ if depth >= len(SEPARATORS):
40
+ return [text]
41
+
42
+ sep = SEPARATORS[depth]
43
+ parts = text.split(sep)
44
+ result: list[str] = []
45
+
46
+ for i, part in enumerate(parts):
47
+ if len(part) < chunk_size:
48
+ # Reattach the separator as a suffix so _merge can reconstruct
49
+ # the original text without losing whitespace or newlines.
50
+ suffix = sep if i < len(parts) - 1 else ""
51
+ result.append(part + suffix)
52
+ else:
53
+ result.extend(_separate(part, chunk_size, depth + 1))
54
+
55
+ return result
56
+
57
+
58
+ def _merge(splits: list[str], chunk_size: int) -> list[str]:
59
+ raw: list[str] = []
60
+ for s in splits:
61
+ if not s:
62
+ continue
63
+
64
+ if raw and len(raw[-1]) + len(s) < chunk_size:
65
+ raw[-1] += s
66
+ else:
67
+ raw.append(s)
68
+
69
+ return raw
@@ -0,0 +1,66 @@
1
+ import asyncio
2
+
3
+ import click
4
+ from dotenv import load_dotenv
5
+
6
+ from . import Mneme
7
+ from .config import Config
8
+
9
+
10
+ def _mneme() -> Mneme:
11
+ return Mneme(Config.from_env())
12
+
13
+
14
+ @click.group(help="Mneme — RAG with built-in eval")
15
+ def app() -> None:
16
+ pass
17
+
18
+
19
+ @app.command(help="Digest raw source into cached JSONL.")
20
+ @click.argument("source", default="")
21
+ def digest(source: str) -> None:
22
+ cfg = Config.from_env()
23
+ data_path = source or cfg.data_path
24
+ Mneme.digest(data_path)
25
+
26
+
27
+ @app.command(help="Ingest documents from a JSONL file or directory.")
28
+ @click.argument("source")
29
+ def ingest(source: str) -> None:
30
+ async def run() -> None:
31
+ async with _mneme() as m:
32
+ await m.ingest(source)
33
+ asyncio.run(run())
34
+
35
+
36
+ @app.command(help="Ask a question against the ingested corpus.")
37
+ @click.argument("query")
38
+ def ask(query: str) -> None:
39
+ async def run() -> None:
40
+ async with _mneme() as m:
41
+ answer = await m.ask(query)
42
+ print(f"\n{answer}\n")
43
+ asyncio.run(run())
44
+
45
+
46
+ @app.command(help="Run an eval sweep across preset configurations.")
47
+ @click.argument("level")
48
+ @click.option("--limit", "-l", default=30, type=int, help="Number of sample chunks for eval")
49
+ def sweep(level: str, limit: int) -> None:
50
+ async def run() -> None:
51
+ cfg = Config.from_env()
52
+ await Mneme.sweep(cfg, level, limit)
53
+ asyncio.run(run())
54
+
55
+
56
+ def main() -> None:
57
+ load_dotenv()
58
+ try:
59
+ app()
60
+ except (RuntimeError, ValueError) as exc:
61
+ print(f"error: {exc}")
62
+ raise SystemExit(1)
63
+
64
+
65
+ if __name__ == "__main__":
66
+ main()
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from dataclasses import dataclass, replace
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class Config:
9
+ database_url: str = ""
10
+ data_path: str = ""
11
+ api_key: str = ""
12
+ embedder_url: str = ""
13
+ embedder_model: str = ""
14
+ embedding_dim: int = 0
15
+ inference_url: str = ""
16
+ inference_model: str = ""
17
+ chunk_size: int = 600
18
+ overlap: float = 0.0
19
+ alpha: float = 0.7
20
+ k: int = 5
21
+
22
+ def resolved(self) -> Config:
23
+ """Fills empty fields from defaults, validates, returns new Config."""
24
+ if not self.database_url:
25
+ raise ValueError("config: database_url is required")
26
+
27
+ cfg = replace(
28
+ self,
29
+ embedder_url=self.embedder_url or DEFAULTS.embedder_url,
30
+ embedder_model=self.embedder_model or DEFAULTS.embedder_model,
31
+ embedding_dim=self.embedding_dim or DEFAULTS.embedding_dim,
32
+ inference_url=self.inference_url or DEFAULTS.inference_url,
33
+ inference_model=self.inference_model or DEFAULTS.inference_model,
34
+ )
35
+
36
+ if cfg.chunk_size < 100 or cfg.chunk_size > 10000:
37
+ raise ValueError(f"config: chunk_size must be 100..10000, got {cfg.chunk_size}")
38
+ if cfg.overlap < 0 or cfg.overlap > 0.5:
39
+ raise ValueError(f"config: overlap must be 0..0.5, got {cfg.overlap}")
40
+ if cfg.alpha < 0 or cfg.alpha > 1:
41
+ raise ValueError(f"config: alpha must be 0..1, got {cfg.alpha}")
42
+ if cfg.k < 1 or cfg.k > 20:
43
+ raise ValueError(f"config: k must be 1..20, got {cfg.k}")
44
+
45
+ return cfg
46
+
47
+ @staticmethod
48
+ def from_env() -> Config:
49
+ return Config(
50
+ database_url=os.environ.get("DATABASE_URL", ""),
51
+ data_path=os.environ.get("DATA_PATH", ""),
52
+ api_key=os.environ.get("API_KEY", ""),
53
+ embedder_url=os.environ.get("EMBEDDER_URL", ""),
54
+ embedder_model=os.environ.get("EMBEDDER_MODEL", ""),
55
+ embedding_dim=int(os.environ.get("EMBEDDING_DIM", "0")),
56
+ inference_url=os.environ.get("INFERENCE_URL", ""),
57
+ inference_model=os.environ.get("INFERENCE_MODEL", ""),
58
+ )
59
+
60
+
61
+ DEFAULTS = Config(
62
+ embedder_url="http://localhost:11434",
63
+ embedder_model="bge-m3",
64
+ embedding_dim=1024,
65
+ inference_url="http://localhost:11434",
66
+ inference_model="llama3:8b-instruct-q4_K_M",
67
+ )
@@ -0,0 +1,34 @@
1
+ """Built-in SQuAD parser. Used by digest when DATA_PATH is a URL."""
2
+ import json
3
+ from urllib.error import URLError
4
+ from urllib.request import urlopen
5
+
6
+ SQUAD_LIMIT = 200
7
+
8
+
9
+ def download_squad(url: str) -> list[dict]:
10
+ print(f"downloading from {url}...")
11
+ try:
12
+ raw = json.loads(urlopen(url).read())
13
+ except (URLError, json.JSONDecodeError) as exc:
14
+ raise RuntimeError(f"failed to download {url}: {exc}") from exc
15
+
16
+ docs = []
17
+ for article in raw["data"]:
18
+ title = article["title"]
19
+ for para in article["paragraphs"]:
20
+ context = para["context"].strip()
21
+ if len(context) < 50:
22
+ continue
23
+ docs.append({
24
+ "content": context,
25
+ "source": title,
26
+ "metadata": {"dataset": "squad-v2"},
27
+ })
28
+ if len(docs) >= SQUAD_LIMIT:
29
+ break
30
+ if len(docs) >= SQUAD_LIMIT:
31
+ break
32
+
33
+ print(f"downloaded {len(docs)} paragraphs")
34
+ return docs