epochdb 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
epochdb-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Jeff
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
epochdb-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.4
2
+ Name: epochdb
3
+ Version: 0.1.0
4
+ Summary: An agentic memory engine designed for lossless, tiered verbatim storage and multi-hop retrieval.
5
+ Author: Jeff
6
+ License: MIT License
7
+ Project-URL: Homepage, https://github.com/jeff/epochdb
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: numpy
15
+ Requires-Dist: pyarrow
16
+ Requires-Dist: hnswlib
17
+ Dynamic: license-file
18
+
19
+ # EpochDB
20
+
21
+ **EpochDB** is an agentic memory engine designed for lossless, tiered verbatim storage and multi-hop retrieval.
22
+
23
+ ## Why
24
+ I had this idea while playing with LMDB. I wanted to create a memory system that could store conversations in a hybrid way, using in-memory for the most recent conversations and on-disk for older conversations. So, in order to have immutable data, I decided to use Parquet files for the on-disk storage.
25
+
26
+ ## Overview
27
+ Traditional AI memory systems compress conversations through destructive summarization. EpochDB bypasses this constraint by storing "Unified Memory Atoms"—the raw text intrinsically paired with dense embeddings.
28
+
29
+ EpochDB uses a tiered architecture reminiscent of CPU caching:
30
+ 1. **L1: Working Memory**: Sub-millisecond HNSW vector index in RAM.
31
+ 2. **L2: Historical Archive**: Cold storage in immutable, time-partitioned `.parquet` files via PyArrow.
32
+
33
+ It uniquely handles multi-hop retrieval over time-partitioned data using a **Global Entity Index**.
34
+
35
+ ## How It Works
36
+ See [`how_it_works.md`](how_it_works.md) for a detailed technical dive into the architecture.
37
+
38
+ ## Benchmarks & Examples
39
+ See [`benchmark.md`](benchmark.md) for traces of EpochDB successfully integrated via `LangGraph`. Check out [`example_langgraph.py`](example_langgraph.py) for the source code.
@@ -0,0 +1,21 @@
1
+ # EpochDB
2
+
3
+ **EpochDB** is an agentic memory engine designed for lossless, tiered verbatim storage and multi-hop retrieval.
4
+
5
+ ## Why
6
+ I had this idea while playing with LMDB. I wanted to create a memory system that could store conversations in a hybrid way, using in-memory for the most recent conversations and on-disk for older conversations. So, in order to have immutable data, I decided to use Parquet files for the on-disk storage.
7
+
8
+ ## Overview
9
+ Traditional AI memory systems compress conversations through destructive summarization. EpochDB bypasses this constraint by storing "Unified Memory Atoms"—the raw text intrinsically paired with dense embeddings.
10
+
11
+ EpochDB uses a tiered architecture reminiscent of CPU caching:
12
+ 1. **L1: Working Memory**: Sub-millisecond HNSW vector index in RAM.
13
+ 2. **L2: Historical Archive**: Cold storage in immutable, time-partitioned `.parquet` files via PyArrow.
14
+
15
+ It uniquely handles multi-hop retrieval over time-partitioned data using a **Global Entity Index**.
16
+
17
+ ## How It Works
18
+ See [`how_it_works.md`](how_it_works.md) for a detailed technical dive into the architecture.
19
+
20
+ ## Benchmarks & Examples
21
+ See [`benchmark.md`](benchmark.md) for traces of EpochDB successfully integrated via `LangGraph`. Check out [`example_langgraph.py`](example_langgraph.py) for the source code.
@@ -0,0 +1,4 @@
1
+ from .engine import EpochDB
2
+ from .atom import UnifiedMemoryAtom
3
+
4
+ __all__ = ["EpochDB", "UnifiedMemoryAtom"]
@@ -0,0 +1,46 @@
1
+ import uuid
2
+ import time
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, List, Tuple
5
+ import numpy as np
6
+
7
+ @dataclass
8
+ class UnifiedMemoryAtom:
9
+ id: str = field(default_factory=lambda: str(uuid.uuid4()))
10
+ payload: Any = None
11
+ embedding: np.ndarray = field(default_factory=lambda: np.array([]))
12
+ triples: List[Tuple[str, str, str]] = field(default_factory=list)
13
+ created_at: float = field(default_factory=time.time)
14
+ access_count: int = 0
15
+ epoch_id: str = "active"
16
+
17
+ def calculate_saliency(self) -> float:
18
+ """
19
+ S = R / (T + 1)
20
+ where R is access_count and T is time since creation in hours.
21
+ """
22
+ T_hours = (time.time() - self.created_at) / 3600.0
23
+ return self.access_count / (T_hours + 1.0)
24
+
25
+ def to_dict(self):
26
+ return {
27
+ "id": self.id,
28
+ "payload": self.payload,
29
+ "embedding": self.embedding.tolist(),
30
+ "triples": self.triples,
31
+ "created_at": self.created_at,
32
+ "access_count": self.access_count,
33
+ "epoch_id": self.epoch_id
34
+ }
35
+
36
+ @classmethod
37
+ def from_dict(cls, data):
38
+ return cls(
39
+ id=data["id"],
40
+ payload=data["payload"],
41
+ embedding=np.array(data["embedding"], dtype=np.float32),
42
+ triples=[tuple(t) for t in data["triples"]],
43
+ created_at=data["created_at"],
44
+ access_count=data["access_count"],
45
+ epoch_id=data["epoch_id"]
46
+ )
@@ -0,0 +1,81 @@
1
+ import os
2
+ import pyarrow as pa
3
+ import pyarrow.parquet as pq
4
+ import numpy as np
5
+ from typing import List, Dict
6
+ from .atom import UnifiedMemoryAtom
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class ColdTier:
12
+ """
13
+ L2 Historical Archive. Resides on Disk.
14
+ Uses Parquet format.
15
+ """
16
+ def __init__(self, storage_dir: str):
17
+ self.storage_dir = storage_dir
18
+ os.makedirs(self.storage_dir, exist_ok=True)
19
+
20
+ def serialize_epoch(self, epoch_id: str, atoms: List[UnifiedMemoryAtom]):
21
+ """Flushes hot partition to Parquet blocks."""
22
+ if not atoms:
23
+ return
24
+
25
+ file_path = os.path.join(self.storage_dir, f"epoch_{epoch_id}.parquet")
26
+
27
+ ids = [a.id for a in atoms]
28
+ payloads = [str(a.payload) for a in atoms]
29
+ embeddings = [a.embedding.tolist() for a in atoms]
30
+ created_ats = [a.created_at for a in atoms]
31
+ access_counts = [a.access_count for a in atoms]
32
+ triples_str = [str(a.triples) for a in atoms]
33
+
34
+ table = pa.table({
35
+ "id": ids,
36
+ "payload": payloads,
37
+ "embedding": embeddings,
38
+ "triples": triples_str,
39
+ "created_at": created_ats,
40
+ "access_count": access_counts,
41
+ "epoch_id": [epoch_id] * len(atoms)
42
+ })
43
+
44
+ pq.write_table(table, file_path)
45
+ logger.info(f"Serialized {len(atoms)} atoms to {file_path}")
46
+
47
+ def load_epoch(self, epoch_id: str) -> List[UnifiedMemoryAtom]:
48
+ file_path = os.path.join(self.storage_dir, f"epoch_{epoch_id}.parquet")
49
+ if not os.path.exists(file_path):
50
+ return []
51
+
52
+ table = pq.read_table(file_path)
53
+ rows = table.to_pylist()
54
+
55
+ atoms = []
56
+ import ast
57
+ for row in rows:
58
+ try:
59
+ triples = ast.literal_eval(row['triples'])
60
+ except:
61
+ triples = []
62
+
63
+ atom = UnifiedMemoryAtom(
64
+ id=row['id'],
65
+ payload=row['payload'],
66
+ embedding=np.array(row['embedding'], dtype=np.float32),
67
+ triples=triples,
68
+ created_at=row['created_at'],
69
+ access_count=row['access_count'],
70
+ epoch_id=row['epoch_id']
71
+ )
72
+ atoms.append(atom)
73
+ return atoms
74
+
75
+ def get_all_epochs(self) -> List[str]:
76
+ epochs = []
77
+ for f in os.listdir(self.storage_dir):
78
+ if f.startswith("epoch_") and f.endswith(".parquet"):
79
+ epoch_id = f[len("epoch_"):-len(".parquet")]
80
+ epochs.append(epoch_id)
81
+ return epochs
@@ -0,0 +1,132 @@
1
+ import os
2
+ import time
3
+ import json
4
+ import logging
5
+ from typing import List, Optional, Dict
6
+ import numpy as np
7
+
8
+ from .atom import UnifiedMemoryAtom
9
+ from .hot_tier import HotTier
10
+ from .cold_tier import ColdTier
11
+ from .transaction import WriteAheadLog, FileLock, MultiIndexTransaction
12
+ from .retrieval import RetrievalManager
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ class EpochDB:
17
+ """The main client for the Agentic Memory Engine."""
18
+
19
+ def __init__(self, storage_dir: str = "./.epochdb_data", dim: int = 384, epoch_duration_secs: int = 3600, saliency_threshold: float = 0.1):
20
+ self.storage_dir = os.path.abspath(storage_dir)
21
+ self.dim = dim
22
+ self.epoch_duration_secs = epoch_duration_secs
23
+ self.saliency_threshold = saliency_threshold
24
+
25
+ os.makedirs(self.storage_dir, exist_ok=True)
26
+
27
+ # Concurrency Lock
28
+ self.lock = FileLock(os.path.join(self.storage_dir, ".lock"))
29
+ self.lock.acquire()
30
+
31
+ # WAL
32
+ self.wal = WriteAheadLog(os.path.join(self.storage_dir, "wal.jsonl"))
33
+
34
+ # Global Entity Index
35
+ self.global_kg_file = os.path.join(self.storage_dir, "global_kg.json")
36
+ self.global_kg: Dict[str, List[List[str]]] = {}
37
+ if os.path.exists(self.global_kg_file):
38
+ try:
39
+ with open(self.global_kg_file, "r") as f:
40
+ self.global_kg = json.load(f)
41
+ except Exception as e:
42
+ logger.error(f"Failed to load global KG: {e}")
43
+
44
+ # Epoch State
45
+ self.current_epoch_id = f"epoch_{int(time.time())}"
46
+ self.epoch_start_time = time.time()
47
+
48
+ # Tiers
49
+ self.hot_tier = HotTier(dim=self.dim)
50
+ self.cold_tier = ColdTier(self.storage_dir)
51
+
52
+ # Retrieval
53
+ # We pass self.global_kg explicitly to the retriever now
54
+ self.retriever = RetrievalManager(self.hot_tier, self.cold_tier, self.global_kg)
55
+
56
+ def _save_global_kg(self):
57
+ with open(self.global_kg_file, "w") as f:
58
+ json.dump(self.global_kg, f)
59
+
60
+ def add_memory(self, payload: any, embedding: np.ndarray, triples: List[tuple] = None) -> str:
61
+ """Agent adds a new memory atom."""
62
+ if triples is None:
63
+ triples = []
64
+
65
+ atom = UnifiedMemoryAtom(
66
+ payload=payload,
67
+ embedding=embedding,
68
+ triples=triples,
69
+ epoch_id=self.current_epoch_id
70
+ )
71
+
72
+ # ACID Multi-Index Transaction
73
+ with MultiIndexTransaction(self.wal, self.hot_tier) as tx:
74
+ tx.add(atom)
75
+
76
+ # Update Global Index
77
+ for subj, pred, obj in triples:
78
+ if subj not in self.global_kg: self.global_kg[subj] = []
79
+ if obj not in self.global_kg: self.global_kg[obj] = []
80
+
81
+ # Subj/Obj -> [atom.id, epoch_id]
82
+ self.global_kg[subj].append([atom.id, self.current_epoch_id])
83
+ self.global_kg[obj].append([atom.id, self.current_epoch_id])
84
+
85
+ self._save_global_kg()
86
+
87
+ self._check_epoch_expiry()
88
+ return atom.id
89
+
90
+ def recall(self, query_emb: np.ndarray, top_k: int = 5) -> List[UnifiedMemoryAtom]:
91
+ """Agent queries memory."""
92
+ results = self.retriever.search(query_emb, top_k=top_k)
93
+ self._check_epoch_expiry()
94
+ return results
95
+
96
+ def _check_epoch_expiry(self):
97
+ """Lifecycle Management: Hot -> Cold"""
98
+ if time.time() - self.epoch_start_time > self.epoch_duration_secs:
99
+ self._checkpoint()
100
+
101
+ def _checkpoint(self):
102
+ """Epoch Checkpoint: Flush to disk, clear memory."""
103
+ logger.info(f"Triggering Epoch Checkpoint for {self.current_epoch_id}")
104
+
105
+ # Gather atoms
106
+ atoms = list(self.hot_tier.atoms.values())
107
+
108
+ if atoms:
109
+ self.cold_tier.serialize_epoch(self.current_epoch_id, atoms)
110
+
111
+ # Clear Hot Tier & WAL
112
+ self.hot_tier.clear()
113
+ self.wal.clear()
114
+
115
+ # Start new Epoch
116
+ self.current_epoch_id = f"epoch_{int(time.time())}"
117
+ self.epoch_start_time = time.time()
118
+
119
+ def force_checkpoint(self):
120
+ """Manually trigger checkpoint for testing."""
121
+ self._checkpoint()
122
+
123
+ def close(self):
124
+ self._save_global_kg()
125
+ self.wal.close()
126
+ self.lock.release()
127
+
128
+ def __del__(self):
129
+ try:
130
+ self.lock.release()
131
+ except:
132
+ pass
@@ -0,0 +1,72 @@
1
+ import hnswlib
2
+ import numpy as np
3
+ import logging
4
+ from typing import Dict, List, Optional
5
+ from .atom import UnifiedMemoryAtom
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class HotTier:
10
+ """
11
+ L1 Working Memory. Resides in RAM.
12
+ Houses Active Partition, vector index. (Global KG is now in engine).
13
+ """
14
+ def __init__(self, dim: int, max_elements: int = 10000):
15
+ self.dim = dim
16
+ self.max_elements = max_elements
17
+
18
+ # HNSW Index for Vectors
19
+ self.vector_index = hnswlib.Index(space='cosine', dim=self.dim)
20
+ self.vector_index.init_index(max_elements=max_elements, ef_construction=200, M=16)
21
+
22
+ # Atom storage: id -> UnifiedMemoryAtom
23
+ self.atoms: Dict[str, UnifiedMemoryAtom] = {}
24
+
25
+ # map string uuid to integer for hnsw
26
+ self.uuid_to_int = {}
27
+ self.int_to_uuid = {}
28
+ self._next_int_id = 0
29
+
30
+ def _add_atom(self, atom: UnifiedMemoryAtom):
31
+ """Internal method called by MIT transaction."""
32
+ if atom.id in self.atoms:
33
+ return # Already exists
34
+
35
+ int_id = self._next_int_id
36
+ self.uuid_to_int[atom.id] = int_id
37
+ self.int_to_uuid[int_id] = atom.id
38
+ self._next_int_id += 1
39
+
40
+ # 1. Add to Vector space
41
+ if atom.embedding is not None and len(atom.embedding) == self.dim:
42
+ self.vector_index.add_items([atom.embedding], [int_id])
43
+ else:
44
+ logger.warning(f"Atom {atom.id} has no valid embedding for dim {self.dim}.")
45
+
46
+ # 3. Store payload
47
+ self.atoms[atom.id] = atom
48
+
49
+ def query_vector(self, query_emb: np.ndarray, top_k: int = 5) -> List[UnifiedMemoryAtom]:
50
+ if len(self.atoms) == 0:
51
+ return []
52
+
53
+ actual_k = min(top_k, len(self.atoms))
54
+ # knn_query returns (labels, distances)
55
+ labels, distances = self.vector_index.knn_query([query_emb], k=actual_k)
56
+
57
+ results = []
58
+ for int_lbl in labels[0]:
59
+ if int_lbl in self.int_to_uuid:
60
+ uuid_str = self.int_to_uuid[int_lbl]
61
+ results.append(self.atoms[uuid_str])
62
+
63
+ return results
64
+
65
+ def clear(self):
66
+ """Called upon Epoch Expiry after serialization."""
67
+ self.atoms.clear()
68
+ self.uuid_to_int.clear()
69
+ self.int_to_uuid.clear()
70
+ self._next_int_id = 0
71
+ self.vector_index = hnswlib.Index(space='cosine', dim=self.dim)
72
+ self.vector_index.init_index(max_elements=self.max_elements, ef_construction=200, M=16)
@@ -0,0 +1,88 @@
1
+ import numpy as np
2
+ from typing import List, Dict, Set
3
+ from .atom import UnifiedMemoryAtom
4
+ from .hot_tier import HotTier
5
+ from .cold_tier import ColdTier
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class RetrievalManager:
11
+ """Multi-stage retrieval process with Global KG."""
12
+ def __init__(self, hot_tier: HotTier, cold_tier: ColdTier, global_kg: Dict[str, List[List[str]]]):
13
+ self.hot_tier = hot_tier
14
+ self.cold_tier = cold_tier
15
+ self.global_kg = global_kg
16
+
17
+ def _fetch_atom_by_id(self, atom_id: str, epoch_id: str) -> UnifiedMemoryAtom:
18
+ # Check Hot Tier
19
+ if atom_id in self.hot_tier.atoms:
20
+ return self.hot_tier.atoms[atom_id]
21
+
22
+ # Check Cold Tier targeted by epoch
23
+ # epoch_id might be "epoch_X" but load_epoch expects X
24
+ epoch_str = epoch_id.replace("epoch_", "")
25
+ atoms = self.cold_tier.load_epoch(epoch_str)
26
+ for a in atoms:
27
+ if a.id == atom_id:
28
+ return a
29
+ return None
30
+
31
+ def search(self, query_emb: np.ndarray, top_k: int = 5, expand_hops: int = 1) -> List[UnifiedMemoryAtom]:
32
+ candidates: Dict[str, UnifiedMemoryAtom] = {}
33
+
34
+ # 1. Semantic Hook (Hot Tier)
35
+ hot_hits = self.hot_tier.query_vector(query_emb, top_k=top_k)
36
+ for atom in hot_hits:
37
+ candidates[atom.id] = atom
38
+
39
+ # Cold tier Semantic Hook
40
+ epochs = self.cold_tier.get_all_epochs()
41
+ for epoch in epochs:
42
+ cold_atoms = self.cold_tier.load_epoch(epoch)
43
+ if cold_atoms:
44
+ embeddings = np.array([a.embedding for a in cold_atoms])
45
+ norms = np.linalg.norm(embeddings, axis=1) * np.linalg.norm(query_emb)
46
+ norms = np.where(norms == 0, 1e-10, norms)
47
+ dots = np.dot(embeddings, query_emb)
48
+ sims = dots / norms
49
+
50
+ best_idx = np.argsort(sims)[-top_k:][::-1]
51
+ for idx in best_idx:
52
+ if sims[idx] > 0.0:
53
+ atom = cold_atoms[idx]
54
+ candidates[atom.id] = atom
55
+
56
+ # 2. Relational Expansion (Global KG)
57
+ if expand_hops > 0:
58
+ expansion_set = set(candidates.keys())
59
+ for _ in range(expand_hops):
60
+ new_neighbors = set()
61
+ # Step B: Identify Entities
62
+ for a_id in expansion_set:
63
+ atom = candidates.get(a_id)
64
+ if not atom: continue
65
+ entities = set()
66
+ for subj, pred, obj in atom.triples:
67
+ entities.add(subj)
68
+ entities.add(obj)
69
+
70
+ # Step C: Query KG for neighbors
71
+ for ent in entities:
72
+ if ent in self.global_kg:
73
+ for neighbor_atom_id, epoch_id in self.global_kg[ent]:
74
+ if neighbor_atom_id not in candidates:
75
+ # Step D: Targeted Fetch
76
+ n_atom = self._fetch_atom_by_id(neighbor_atom_id, epoch_id)
77
+ if n_atom:
78
+ new_neighbors.add(n_atom.id)
79
+ candidates[n_atom.id] = n_atom
80
+ expansion_set = new_neighbors
81
+
82
+ # 3. Temporal Re-ranking and Update Access
83
+ results = list(candidates.values())
84
+ for r in results:
85
+ r.access_count += 1
86
+
87
+ results.sort(key=lambda x: x.calculate_saliency(), reverse=True)
88
+ return results[:top_k * 2]
@@ -0,0 +1,73 @@
1
+ import os
2
+ import json
3
+ import logging
4
+ from typing import Dict, Any, Optional
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class FileLock:
9
+ """A simple file-based lock to prevent concurrent epochdb intances from writing."""
10
+ def __init__(self, lock_path: str):
11
+ self.lock_path = lock_path
12
+
13
+ def acquire(self):
14
+ if os.path.exists(self.lock_path):
15
+ raise RuntimeError(f"Database is locked by another process: {self.lock_path}")
16
+ with open(self.lock_path, "w") as f:
17
+ f.write(str(os.getpid()))
18
+
19
+ def release(self):
20
+ if os.path.exists(self.lock_path):
21
+ os.remove(self.lock_path)
22
+
23
+
24
+ class WriteAheadLog:
25
+ """Append-only JSONL log for crash recovery."""
26
+ def __init__(self, wal_path: str):
27
+ self.wal_path = wal_path
28
+ self._file = open(self.wal_path, "a")
29
+
30
+ def append(self, operation: str, data: Dict[str, Any]):
31
+ record = json.dumps({"op": operation, "data": data})
32
+ self._file.write(record + "\n")
33
+ self._file.flush()
34
+ os.fsync(self._file.fileno())
35
+
36
+ def close(self):
37
+ self._file.close()
38
+
39
+ def clear(self):
40
+ """Called upon successful Epoch Checkpoint."""
41
+ self._file.close()
42
+ open(self.wal_path, "w").close()
43
+ self._file = open(self.wal_path, "a")
44
+
45
+
46
+ class MultiIndexTransaction:
47
+ """
48
+ Context manager to ensure an atom is written to the WAL,
49
+ the Vector Index, and the Knowledge Graph atomically.
50
+ """
51
+ def __init__(self, wal: WriteAheadLog, hot_tier):
52
+ self.wal = wal
53
+ self.hot_tier = hot_tier
54
+ self.pending_atoms = []
55
+
56
+ def __enter__(self):
57
+ self.pending_atoms = []
58
+ return self
59
+
60
+ def add(self, atom):
61
+ self.pending_atoms.append(atom)
62
+ self.wal.append("ADD", atom.to_dict())
63
+
64
+ def __exit__(self, exc_type, exc_val, exc_tb):
65
+ if exc_type is not None:
66
+ logger.error(f"Transaction failed, rolling back. Reason: {exc_val}")
67
+ self.wal.append("ROLLBACK", {})
68
+ return False
69
+
70
+ for atom in self.pending_atoms:
71
+ self.hot_tier._add_atom(atom)
72
+ self.wal.append("COMMIT", {})
73
+ return True
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.4
2
+ Name: epochdb
3
+ Version: 0.1.0
4
+ Summary: An agentic memory engine designed for lossless, tiered verbatim storage and multi-hop retrieval.
5
+ Author: Jeff
6
+ License: MIT License
7
+ Project-URL: Homepage, https://github.com/jeff/epochdb
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: numpy
15
+ Requires-Dist: pyarrow
16
+ Requires-Dist: hnswlib
17
+ Dynamic: license-file
18
+
19
+ # EpochDB
20
+
21
+ **EpochDB** is an agentic memory engine designed for lossless, tiered verbatim storage and multi-hop retrieval.
22
+
23
+ ## Why
24
+ I had this idea while playing with LMDB. I wanted to create a memory system that could store conversations in a hybrid way, using in-memory for the most recent conversations and on-disk for older conversations. So, in order to have immutable data, I decided to use Parquet files for the on-disk storage.
25
+
26
+ ## Overview
27
+ Traditional AI memory systems compress conversations through destructive summarization. EpochDB bypasses this constraint by storing "Unified Memory Atoms"—the raw text intrinsically paired with dense embeddings.
28
+
29
+ EpochDB uses a tiered architecture reminiscent of CPU caching:
30
+ 1. **L1: Working Memory**: Sub-millisecond HNSW vector index in RAM.
31
+ 2. **L2: Historical Archive**: Cold storage in immutable, time-partitioned `.parquet` files via PyArrow.
32
+
33
+ It uniquely handles multi-hop retrieval over time-partitioned data using a **Global Entity Index**.
34
+
35
+ ## How It Works
36
+ See [`how_it_works.md`](how_it_works.md) for a detailed technical dive into the architecture.
37
+
38
+ ## Benchmarks & Examples
39
+ See [`benchmark.md`](benchmark.md) for traces of EpochDB successfully integrated via `LangGraph`. Check out [`example_langgraph.py`](example_langgraph.py) for the source code.
@@ -0,0 +1,15 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ epochdb/__init__.py
5
+ epochdb/atom.py
6
+ epochdb/cold_tier.py
7
+ epochdb/engine.py
8
+ epochdb/hot_tier.py
9
+ epochdb/retrieval.py
10
+ epochdb/transaction.py
11
+ epochdb.egg-info/PKG-INFO
12
+ epochdb.egg-info/SOURCES.txt
13
+ epochdb.egg-info/dependency_links.txt
14
+ epochdb.egg-info/requires.txt
15
+ epochdb.egg-info/top_level.txt
@@ -0,0 +1,3 @@
1
+ numpy
2
+ pyarrow
3
+ hnswlib
@@ -0,0 +1 @@
1
+ epochdb
@@ -0,0 +1,27 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "epochdb"
7
+ version = "0.1.0"
8
+ description = "An agentic memory engine designed for lossless, tiered verbatim storage and multi-hop retrieval."
9
+ readme = "README.md"
10
+ license = { text = "MIT License" }
11
+ authors = [
12
+ { name = "Jeff" }
13
+ ]
14
+ requires-python = ">=3.8"
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ ]
20
+ dependencies = [
21
+ "numpy",
22
+ "pyarrow",
23
+ "hnswlib"
24
+ ]
25
+
26
+ [project.urls]
27
+ "Homepage" = "https://github.com/jeff/epochdb"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+