coremem 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coremem/__init__.py +21 -0
- coremem/backends/__init__.py +5 -0
- coremem/backends/base.py +44 -0
- coremem/backends/chroma.py +130 -0
- coremem/backends/hybrid.py +185 -0
- coremem/core.py +103 -0
- coremem/heuristics.py +150 -0
- coremem/ingest.py +73 -0
- coremem/layers.py +88 -0
- coremem/types.py +51 -0
- coremem-0.1.0.dist-info/METADATA +170 -0
- coremem-0.1.0.dist-info/RECORD +14 -0
- coremem-0.1.0.dist-info/WHEEL +4 -0
- coremem-0.1.0.dist-info/licenses/LICENSE +21 -0
coremem/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""coremem — Zero-LLM memory for AI agents.
|
|
2
|
+
|
|
3
|
+
Dual-backend architecture:
|
|
4
|
+
- ChromaBackend: Pure ChromaDB (baseline, 95%+ LongMemEval target)
|
|
5
|
+
- HybridBackend: HybridDB (SQLite+FTS5+ChromaDB, >95% target)
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from coremem import MemoryCore
|
|
9
|
+
from coremem.backends.chroma import ChromaBackend
|
|
10
|
+
|
|
11
|
+
core = MemoryCore(backend=ChromaBackend(path="./memory"))
|
|
12
|
+
results = core.search("How many model kits?")
|
|
13
|
+
context = core.wakeup(user_id="alice")
|
|
14
|
+
core.ingest("user", "I built a Spitfire model kit")
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from coremem.core import MemoryCore
|
|
18
|
+
from coremem.heuristics import SearchHeuristics
|
|
19
|
+
from coremem.types import Memory, SearchQuery, SearchResult
|
|
20
|
+
|
|
21
|
+
__all__ = ["MemoryCore", "Memory", "SearchResult", "SearchQuery", "SearchHeuristics"]
|
coremem/backends/base.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Abstract backend interface."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
from coremem.types import Memory, SearchQuery, SearchResult
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StoreBackend(ABC):
|
|
9
|
+
"""Abstract memory storage and retrieval backend.
|
|
10
|
+
|
|
11
|
+
Implementations:
|
|
12
|
+
- ChromaBackend: pure ChromaDB
|
|
13
|
+
- HybridBackend: HybridDB (SQLite + FTS5 + ChromaDB)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def ingest(self, memory: Memory) -> str:
|
|
18
|
+
"""Store a memory. Returns the storage ID."""
|
|
19
|
+
...
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def ingest_batch(self, memories: list[Memory]) -> list[str]:
|
|
23
|
+
"""Store multiple memories in one batch. Returns storage IDs."""
|
|
24
|
+
...
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def search(self, query: SearchQuery) -> list[SearchResult]:
|
|
28
|
+
"""Search for memories by query text."""
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def get_recent(self, limit: int = 10) -> list[Memory]:
|
|
33
|
+
"""Return most recently stored memories."""
|
|
34
|
+
...
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def count(self) -> int:
|
|
38
|
+
"""Return total number of stored memories."""
|
|
39
|
+
...
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def clear(self) -> None:
|
|
43
|
+
"""Delete all memories."""
|
|
44
|
+
...
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Pure ChromaDB backend — baseline, zero LLM."""
|
|
2
|
+
|
|
3
|
+
from coremem.backends.base import StoreBackend
|
|
4
|
+
from coremem.types import Memory, SearchQuery, SearchResult
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ChromaBackend(StoreBackend):
|
|
8
|
+
"""ChromaDB-only backend for semantic search.
|
|
9
|
+
|
|
10
|
+
Stores verbatim text and retrieves via cosine similarity.
|
|
11
|
+
Metadata filtering supports wing/room scoping (optional).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, path: str):
|
|
15
|
+
try:
|
|
16
|
+
import chromadb
|
|
17
|
+
except ImportError:
|
|
18
|
+
raise ImportError(
|
|
19
|
+
"chromadb is required for ChromaBackend. "
|
|
20
|
+
"Install with: pip install chromadb"
|
|
21
|
+
)
|
|
22
|
+
self._client = chromadb.PersistentClient(path=path)
|
|
23
|
+
self._collection = self._client.get_or_create_collection(
|
|
24
|
+
name="mempalace_drawers",
|
|
25
|
+
metadata={"hnsw:space": "cosine"},
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
def ingest(self, memory: Memory) -> str:
|
|
29
|
+
ids = self.ingest_batch([memory])
|
|
30
|
+
return ids[0] if ids else ""
|
|
31
|
+
|
|
32
|
+
def ingest_batch(self, memories: list[Memory]) -> list[str]:
|
|
33
|
+
import uuid
|
|
34
|
+
|
|
35
|
+
if not memories:
|
|
36
|
+
return []
|
|
37
|
+
|
|
38
|
+
ids = [m.id or str(uuid.uuid4())[:12] for m in memories]
|
|
39
|
+
documents = [m.content for m in memories]
|
|
40
|
+
metadatas = [
|
|
41
|
+
{
|
|
42
|
+
"role": m.role,
|
|
43
|
+
"session_id": m.session_id or "",
|
|
44
|
+
**({"ts": m.ts.isoformat()} if m.ts else {}),
|
|
45
|
+
}
|
|
46
|
+
for m in memories
|
|
47
|
+
]
|
|
48
|
+
self._collection.add(ids=ids, documents=documents, metadatas=metadatas)
|
|
49
|
+
return ids
|
|
50
|
+
|
|
51
|
+
def ingest_batch(self, memories: list[Memory]) -> list[str]:
|
|
52
|
+
import uuid
|
|
53
|
+
if not memories:
|
|
54
|
+
return []
|
|
55
|
+
ids = [m.id or str(uuid.uuid4())[:12] for m in memories]
|
|
56
|
+
documents = [m.content for m in memories]
|
|
57
|
+
metadatas = [
|
|
58
|
+
{
|
|
59
|
+
"role": m.role,
|
|
60
|
+
"session_id": m.session_id or "",
|
|
61
|
+
**({"ts": m.ts.isoformat()} if m.ts else {}),
|
|
62
|
+
}
|
|
63
|
+
for m in memories
|
|
64
|
+
]
|
|
65
|
+
self._collection.add(ids=ids, documents=documents, metadatas=metadatas)
|
|
66
|
+
return ids
|
|
67
|
+
|
|
68
|
+
def search(self, query: SearchQuery) -> list[SearchResult]:
|
|
69
|
+
where = {}
|
|
70
|
+
if query.wing:
|
|
71
|
+
where["wing"] = query.wing
|
|
72
|
+
if query.room:
|
|
73
|
+
where["room"] = query.room
|
|
74
|
+
|
|
75
|
+
kwargs = {"n_results": query.limit}
|
|
76
|
+
if where:
|
|
77
|
+
kwargs["where"] = where
|
|
78
|
+
|
|
79
|
+
results = self._collection.query(
|
|
80
|
+
query_texts=[query.text],
|
|
81
|
+
include=["documents", "metadatas", "distances"],
|
|
82
|
+
**kwargs,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
search_results = []
|
|
86
|
+
if results["ids"] and results["ids"][0]:
|
|
87
|
+
for i, mid in enumerate(results["ids"][0]):
|
|
88
|
+
doc = results["documents"][0][i] if results["documents"] else ""
|
|
89
|
+
meta = results["metadatas"][0][i] if results["metadatas"] else {}
|
|
90
|
+
dist = results["distances"][0][i] if results["distances"] else 1.0
|
|
91
|
+
score = 1.0 - dist
|
|
92
|
+
|
|
93
|
+
memory = Memory(
|
|
94
|
+
id=mid,
|
|
95
|
+
content=doc,
|
|
96
|
+
role=meta.get("role", "user"),
|
|
97
|
+
session_id=meta.get("session_id"),
|
|
98
|
+
score=score,
|
|
99
|
+
)
|
|
100
|
+
search_results.append(SearchResult(memory=memory, score=score, source="semantic"))
|
|
101
|
+
|
|
102
|
+
return search_results
|
|
103
|
+
|
|
104
|
+
def get_recent(self, limit: int = 10) -> list[Memory]:
|
|
105
|
+
results = self._collection.get(
|
|
106
|
+
limit=limit,
|
|
107
|
+
include=["documents", "metadatas"],
|
|
108
|
+
)
|
|
109
|
+
memories = []
|
|
110
|
+
if results["ids"]:
|
|
111
|
+
for i, mid in enumerate(results["ids"]):
|
|
112
|
+
doc = results["documents"][i] if results["documents"] else ""
|
|
113
|
+
meta = results["metadatas"][i] if results["metadatas"] else {}
|
|
114
|
+
memories.append(Memory(
|
|
115
|
+
id=mid,
|
|
116
|
+
content=doc,
|
|
117
|
+
role=meta.get("role", "user"),
|
|
118
|
+
session_id=meta.get("session_id"),
|
|
119
|
+
))
|
|
120
|
+
return memories
|
|
121
|
+
|
|
122
|
+
def count(self) -> int:
|
|
123
|
+
return self._collection.count()
|
|
124
|
+
|
|
125
|
+
def clear(self) -> None:
|
|
126
|
+
self._client.delete_collection("mempalace_drawers")
|
|
127
|
+
self._collection = self._client.get_or_create_collection(
|
|
128
|
+
name="mempalace_drawers",
|
|
129
|
+
metadata={"hnsw:space": "cosine"},
|
|
130
|
+
)
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""HybridDB backend — SQLite + FTS5 + ChromaDB via hybriddb.
|
|
2
|
+
|
|
3
|
+
Leverages all three HybridDB layers:
|
|
4
|
+
- SQLite: structured storage, metadata, timestamps
|
|
5
|
+
- FTS5: exact keyword/term matching with BM25-like scoring
|
|
6
|
+
- ChromaDB: semantic similarity via all-MiniLM-L6-v2 embeddings
|
|
7
|
+
|
|
8
|
+
The fused ranking formula:
|
|
9
|
+
fused_score = semantic_score * (1 + fts_weight * keyword_overlap)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
|
|
15
|
+
from coremem.backends.base import StoreBackend
|
|
16
|
+
from coremem.types import Memory, SearchQuery, SearchResult
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class HybridBackend(StoreBackend):
|
|
20
|
+
"""HybridDB backend combining semantic + keyword + structured search.
|
|
21
|
+
|
|
22
|
+
Ingestion writes to SQLite via HybridDB's journal. The journal
|
|
23
|
+
automatically embeds and indexes into ChromaDB. FTS5 handles
|
|
24
|
+
keyword matching during retrieval. Results are fused via
|
|
25
|
+
semantic * (1 + fts_weight * keyword_overlap).
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, path: str):
|
|
29
|
+
from hybriddb import HybridDB
|
|
30
|
+
|
|
31
|
+
self._path = path
|
|
32
|
+
self._db = HybridDB(path=path)
|
|
33
|
+
self._ensure_tables()
|
|
34
|
+
|
|
35
|
+
def _ensure_tables(self) -> None:
|
|
36
|
+
try:
|
|
37
|
+
self._db.create_table(
|
|
38
|
+
"messages",
|
|
39
|
+
{
|
|
40
|
+
"id": "TEXT PRIMARY KEY",
|
|
41
|
+
"content": "LONGTEXT",
|
|
42
|
+
"role": "TEXT",
|
|
43
|
+
"metadata": "TEXT",
|
|
44
|
+
"ts": "TEXT",
|
|
45
|
+
},
|
|
46
|
+
)
|
|
47
|
+
except Exception:
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
def ingest(self, memory: Memory) -> str:
|
|
51
|
+
ids = self.ingest_batch([memory])
|
|
52
|
+
return ids[0] if ids else ""
|
|
53
|
+
|
|
54
|
+
def ingest_batch(self, memories: list[Memory]) -> list[str]:
|
|
55
|
+
import uuid
|
|
56
|
+
|
|
57
|
+
if not memories:
|
|
58
|
+
return []
|
|
59
|
+
rows = []
|
|
60
|
+
ids = []
|
|
61
|
+
for m in memories:
|
|
62
|
+
mid = str(uuid.uuid4())[:12]
|
|
63
|
+
ids.append(mid)
|
|
64
|
+
metadata = {
|
|
65
|
+
"role": m.role,
|
|
66
|
+
"session_id": m.session_id or "",
|
|
67
|
+
"ts": m.ts.isoformat() if m.ts else datetime.now().isoformat(),
|
|
68
|
+
}
|
|
69
|
+
rows.append({
|
|
70
|
+
"id": mid,
|
|
71
|
+
"content": m.content,
|
|
72
|
+
"role": m.role,
|
|
73
|
+
"metadata": json.dumps(metadata),
|
|
74
|
+
"ts": m.ts.isoformat() if m.ts else datetime.now().isoformat(),
|
|
75
|
+
})
|
|
76
|
+
self._db.insert_batch("messages", rows)
|
|
77
|
+
return ids
|
|
78
|
+
|
|
79
|
+
def search(self, query: SearchQuery) -> list[SearchResult]:
|
|
80
|
+
import sys
|
|
81
|
+
db_module = sys.modules.get(type(self._db).__module__)
|
|
82
|
+
search_mode = getattr(db_module, "SearchMode", None) if db_module else None
|
|
83
|
+
|
|
84
|
+
fetch_limit = query.limit * 3
|
|
85
|
+
fts_weight = 0.5
|
|
86
|
+
|
|
87
|
+
kwargs = {
|
|
88
|
+
"table": "messages",
|
|
89
|
+
"column": "content",
|
|
90
|
+
"query": query.text,
|
|
91
|
+
"limit": fetch_limit,
|
|
92
|
+
}
|
|
93
|
+
if search_mode is not None:
|
|
94
|
+
kwargs["mode"] = search_mode.HYBRID
|
|
95
|
+
kwargs["fts_weight"] = fts_weight
|
|
96
|
+
kwargs["recency_weight"] = 0.3
|
|
97
|
+
kwargs["recency_column"] = "ts"
|
|
98
|
+
|
|
99
|
+
raw_rows = self._db.search(**kwargs)
|
|
100
|
+
|
|
101
|
+
seen_sessions: set[str] = set()
|
|
102
|
+
results = []
|
|
103
|
+
for row in raw_rows:
|
|
104
|
+
rid = row.get("id", "")
|
|
105
|
+
content = row.get("content", "")
|
|
106
|
+
score = float(row.get("_score", 0.0))
|
|
107
|
+
ts_str = row.get("ts", "")
|
|
108
|
+
role = row.get("role", "")
|
|
109
|
+
|
|
110
|
+
meta_dict = {}
|
|
111
|
+
raw_meta = row.get("metadata")
|
|
112
|
+
if raw_meta:
|
|
113
|
+
if isinstance(raw_meta, str):
|
|
114
|
+
try:
|
|
115
|
+
meta_dict = json.loads(raw_meta)
|
|
116
|
+
except (json.JSONDecodeError, TypeError):
|
|
117
|
+
pass
|
|
118
|
+
elif isinstance(raw_meta, dict):
|
|
119
|
+
meta_dict = raw_meta
|
|
120
|
+
|
|
121
|
+
sid = meta_dict.get("session_id", "")
|
|
122
|
+
ws_id = meta_dict.get("workspace_id", "")
|
|
123
|
+
if sid and sid in seen_sessions:
|
|
124
|
+
continue
|
|
125
|
+
if sid:
|
|
126
|
+
seen_sessions.add(sid)
|
|
127
|
+
|
|
128
|
+
ts = None
|
|
129
|
+
if ts_str:
|
|
130
|
+
try:
|
|
131
|
+
ts = datetime.fromisoformat(ts_str)
|
|
132
|
+
except (ValueError, TypeError):
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
results.append(SearchResult(
|
|
136
|
+
memory=Memory(
|
|
137
|
+
id=str(rid),
|
|
138
|
+
content=content,
|
|
139
|
+
role=role,
|
|
140
|
+
ts=ts,
|
|
141
|
+
session_id=sid or None,
|
|
142
|
+
workspace_id=ws_id or None,
|
|
143
|
+
score=score,
|
|
144
|
+
),
|
|
145
|
+
score=score,
|
|
146
|
+
source="hybrid",
|
|
147
|
+
))
|
|
148
|
+
|
|
149
|
+
if len(results) >= query.limit:
|
|
150
|
+
break
|
|
151
|
+
|
|
152
|
+
return results
|
|
153
|
+
|
|
154
|
+
def get_recent(self, limit: int = 10) -> list[Memory]:
|
|
155
|
+
try:
|
|
156
|
+
rows = self._db.search(
|
|
157
|
+
table="messages",
|
|
158
|
+
column="content",
|
|
159
|
+
query="",
|
|
160
|
+
limit=limit,
|
|
161
|
+
order_by="ts DESC",
|
|
162
|
+
)
|
|
163
|
+
except Exception:
|
|
164
|
+
return []
|
|
165
|
+
memories = []
|
|
166
|
+
for row in rows:
|
|
167
|
+
memories.append(Memory(
|
|
168
|
+
id=str(row.get("id", "")),
|
|
169
|
+
content=row.get("content", ""),
|
|
170
|
+
role=row.get("role", "user"),
|
|
171
|
+
))
|
|
172
|
+
return memories
|
|
173
|
+
|
|
174
|
+
def count(self) -> int:
|
|
175
|
+
try:
|
|
176
|
+
result = self._db.count("messages")
|
|
177
|
+
return result if isinstance(result, int) else 0
|
|
178
|
+
except Exception:
|
|
179
|
+
return 0
|
|
180
|
+
|
|
181
|
+
def clear(self) -> None:
|
|
182
|
+
try:
|
|
183
|
+
self._db.raw_query("DELETE FROM messages")
|
|
184
|
+
except Exception:
|
|
185
|
+
pass
|
coremem/core.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""MemoryCore — the main entry point for coremem.
|
|
2
|
+
|
|
3
|
+
Wraps any StoreBackend with heuristics, wake-up context, and ingestion.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from coremem.backends.base import StoreBackend
|
|
7
|
+
from coremem.heuristics import SearchHeuristics
|
|
8
|
+
from coremem.ingest import ingest_batch, ingest_message
|
|
9
|
+
from coremem.layers import WakeUpContext
|
|
10
|
+
from coremem.types import SearchQuery, SearchResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MemoryCore:
|
|
14
|
+
"""Zero-LLM memory core for AI agents.
|
|
15
|
+
|
|
16
|
+
Dual-backend architecture:
|
|
17
|
+
- ChromaBackend: Pure ChromaDB (baseline)
|
|
18
|
+
- HybridBackend: HybridDB SQLite+FTS5+ChromaDB (enhanced)
|
|
19
|
+
|
|
20
|
+
Same API regardless of backend.
|
|
21
|
+
|
|
22
|
+
Usage:
|
|
23
|
+
core = MemoryCore(backend=ChromaBackend(path="./memory"))
|
|
24
|
+
core.ingest("user", "I built a Spitfire model kit")
|
|
25
|
+
results = core.search("How many model kits?")
|
|
26
|
+
context = core.wake_up(user_id="alice")
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, backend: StoreBackend):
|
|
30
|
+
self._backend = backend
|
|
31
|
+
self._wakeup = WakeUpContext(backend)
|
|
32
|
+
self._heuristics = SearchHeuristics()
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def backend(self) -> StoreBackend:
|
|
36
|
+
return self._backend
|
|
37
|
+
|
|
38
|
+
def ingest(self, role: str, content: str, session_id: str | None = None) -> str:
|
|
39
|
+
"""Store a message verbatim.
|
|
40
|
+
|
|
41
|
+
No LLM extraction. No summarization. Just store the raw text.
|
|
42
|
+
"""
|
|
43
|
+
return ingest_message(backend=self._backend, role=role, content=content, session_id=session_id)
|
|
44
|
+
|
|
45
|
+
def ingest_many(self, messages: list[dict], session_id: str | None = None) -> list[str]:
|
|
46
|
+
"""Store a batch of messages verbatim."""
|
|
47
|
+
return ingest_batch(backend=self._backend, messages=messages, session_id=session_id)
|
|
48
|
+
|
|
49
|
+
def search(self, query: str, limit: int = 10) -> list[SearchResult]:
|
|
50
|
+
"""Search memories and apply deterministic heuristics.
|
|
51
|
+
|
|
52
|
+
Pipeline:
|
|
53
|
+
1. Backend raw search (embedding ± keyword)
|
|
54
|
+
2. Apply heuristics (keyword overlap, temporal, person name, quoted)
|
|
55
|
+
3. Session-level dedup (if backend supports it)
|
|
56
|
+
4. Return ranked results
|
|
57
|
+
|
|
58
|
+
All steps are deterministic — zero LLM calls.
|
|
59
|
+
"""
|
|
60
|
+
sq = SearchQuery(text=query, limit=limit * 3)
|
|
61
|
+
results = self._backend.search(sq)
|
|
62
|
+
|
|
63
|
+
for r in results:
|
|
64
|
+
r.score = SearchHeuristics.apply_all(
|
|
65
|
+
query=query,
|
|
66
|
+
content=r.memory.content,
|
|
67
|
+
score=r.score,
|
|
68
|
+
ts=r.memory.ts.isoformat() if r.memory.ts else None,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
results.sort(key=lambda r: r.score, reverse=True)
|
|
72
|
+
return results[:limit]
|
|
73
|
+
|
|
74
|
+
def wake_up(self, user_id: str = "default", session_id: str | None = None) -> str:
|
|
75
|
+
"""Build the L0+L1 (+ optional L2) wake-up context.
|
|
76
|
+
|
|
77
|
+
Returns ~170 tokens of always-on context that the agent can
|
|
78
|
+
inject into its system prompt without waiting for a tool call.
|
|
79
|
+
"""
|
|
80
|
+
context = self._wakeup.essential(user_id=user_id)
|
|
81
|
+
|
|
82
|
+
if session_id:
|
|
83
|
+
l2 = self._wakeup.session(session_id=session_id)
|
|
84
|
+
if l2:
|
|
85
|
+
context += "\n\n" + l2
|
|
86
|
+
|
|
87
|
+
return context
|
|
88
|
+
|
|
89
|
+
def deep_search_context(self, query: str, limit: int = 10) -> str | None:
|
|
90
|
+
"""Perform an L3 deep search and return formatted context.
|
|
91
|
+
|
|
92
|
+
This is the equivalent of calling the memory_search tool.
|
|
93
|
+
Returns None if no results found.
|
|
94
|
+
"""
|
|
95
|
+
return self._wakeup.deep_search(query=query, limit=limit)
|
|
96
|
+
|
|
97
|
+
def count(self) -> int:
|
|
98
|
+
"""Return total number of stored memories."""
|
|
99
|
+
return self._backend.count()
|
|
100
|
+
|
|
101
|
+
def clear(self) -> None:
|
|
102
|
+
"""Delete all memories."""
|
|
103
|
+
self._backend.clear()
|
coremem/heuristics.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Deterministic post-retrieval heuristics — shared by all backends.
|
|
2
|
+
|
|
3
|
+
All heuristics are zero-LLM, purely pattern-based.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SearchHeuristics:
|
|
10
|
+
"""Post-retrieval scoring heuristics based on MemPalace's proven patterns.
|
|
11
|
+
|
|
12
|
+
Each heuristic applies a deterministic multiplier to results from
|
|
13
|
+
the backend's raw search. Heuristics are additive — they boost or
|
|
14
|
+
penalize scores without replacing the embedding ranking.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
KEYWORD_OVERLAP_WEIGHT = 1.0
|
|
18
|
+
TEMPORAL_BOOST_FACTOR = 0.15
|
|
19
|
+
PERSON_NAME_BOOST = 0.40
|
|
20
|
+
QUOTED_PHRASE_BOOST = 0.60
|
|
21
|
+
COUNTING_QUESTION_SNIPPET_LENGTH = 3000
|
|
22
|
+
RECENCY_DECAY_WEIGHT = 0.1
|
|
23
|
+
RECENCY_DECAY_HALF_LIFE_DAYS = 30
|
|
24
|
+
STOP_WORDS = {
|
|
25
|
+
"the", "a", "an", "is", "are", "was", "were", "be", "been",
|
|
26
|
+
"have", "has", "had", "do", "does", "did", "will", "would",
|
|
27
|
+
"could", "should", "may", "might", "can", "shall",
|
|
28
|
+
"to", "of", "in", "for", "on", "with", "at", "by", "from",
|
|
29
|
+
"and", "or", "but", "not", "so", "if", "as", "than", "that",
|
|
30
|
+
"this", "these", "those", "it", "its", "i", "me", "my", "we",
|
|
31
|
+
"our", "you", "your", "he", "she", "they", "them", "their",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def keyword_overlap(cls, query: str, content: str, score: float) -> float:
|
|
36
|
+
"""Boost score when query keywords appear in content.
|
|
37
|
+
|
|
38
|
+
fused = score * (1 + weight * keyword_overlap_ratio)
|
|
39
|
+
"""
|
|
40
|
+
q_words = {w.lower() for w in re.findall(r"\w+", query) if len(w) > 2}
|
|
41
|
+
q_words -= cls.STOP_WORDS
|
|
42
|
+
if not q_words:
|
|
43
|
+
return score
|
|
44
|
+
|
|
45
|
+
c_words = set(re.findall(r"\w+", content.lower()))
|
|
46
|
+
overlap = len(q_words & c_words) / len(q_words)
|
|
47
|
+
return score * (1 + cls.KEYWORD_OVERLAP_WEIGHT * overlap)
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def temporal_boost(cls, query: str, content_ts: str | None, score: float) -> float:
|
|
51
|
+
"""Boost recent memories when query contains temporal cues.
|
|
52
|
+
|
|
53
|
+
Detects patterns like 'current', 'latest', 'now', 'this year',
|
|
54
|
+
'recently', 'these days' and boosts newer content.
|
|
55
|
+
"""
|
|
56
|
+
temporal_cues = {
|
|
57
|
+
"current", "latest", "now", "recently", "recent",
|
|
58
|
+
"lately", "new", "newest", "these days", "this year",
|
|
59
|
+
"nowadays", "updated", "today",
|
|
60
|
+
}
|
|
61
|
+
q_lower = query.lower()
|
|
62
|
+
if not any(cue in q_lower for cue in temporal_cues):
|
|
63
|
+
return score
|
|
64
|
+
|
|
65
|
+
if not content_ts:
|
|
66
|
+
return score
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
from datetime import datetime
|
|
70
|
+
ts = datetime.fromisoformat(content_ts)
|
|
71
|
+
age_days = (datetime.now() - ts).days
|
|
72
|
+
if age_days < 30:
|
|
73
|
+
return score * (1 + cls.TEMPORAL_BOOST_FACTOR)
|
|
74
|
+
except (ValueError, TypeError):
|
|
75
|
+
pass
|
|
76
|
+
return score
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def recency_decay(cls, content_ts: str | None, score: float) -> float:
|
|
80
|
+
"""Unconditional mild recency boost — applied to every result.
|
|
81
|
+
|
|
82
|
+
Uses exponential decay: score * (1 + weight * e^(-age_days / half_life))
|
|
83
|
+
Very recent content gets ~10% boost, 30-day-old ~3.7%, 60-day ~1.4%.
|
|
84
|
+
Always applied regardless of query content.
|
|
85
|
+
"""
|
|
86
|
+
if not content_ts:
|
|
87
|
+
return score
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
from datetime import datetime
|
|
91
|
+
ts = datetime.fromisoformat(content_ts)
|
|
92
|
+
age_days = max(0, (datetime.now() - ts).days)
|
|
93
|
+
import math
|
|
94
|
+
factor = cls.RECENCY_DECAY_WEIGHT * math.exp(-age_days / cls.RECENCY_DECAY_HALF_LIFE_DAYS)
|
|
95
|
+
return score * (1 + factor)
|
|
96
|
+
except (ValueError, TypeError):
|
|
97
|
+
pass
|
|
98
|
+
return score
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def person_name_boost(cls, content: str, score: float) -> float:
|
|
102
|
+
"""Boost content containing proper names (capitalized multi-word)."""
|
|
103
|
+
names = re.findall(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2})\b", content)
|
|
104
|
+
if names:
|
|
105
|
+
return score * (1 + cls.PERSON_NAME_BOOST)
|
|
106
|
+
return score
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def quoted_phrase_boost(cls, query: str, content: str, score: float) -> float:
|
|
110
|
+
"""Boost when a quoted phrase from the query appears verbatim in content."""
|
|
111
|
+
quoted = re.findall(r'"([^"]+)"', query)
|
|
112
|
+
if not quoted:
|
|
113
|
+
return score
|
|
114
|
+
for phrase in quoted:
|
|
115
|
+
if phrase.lower() in content.lower():
|
|
116
|
+
return score * (1 + cls.QUOTED_PHRASE_BOOST)
|
|
117
|
+
return score
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def is_counting_question(cls, query: str) -> bool:
|
|
121
|
+
"""Detect 'how many' / 'how much total' questions."""
|
|
122
|
+
q = query.lower()
|
|
123
|
+
return q.startswith("how many") or "how much total" in q
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def extract_date_cues(cls, query: str) -> str | None:
|
|
127
|
+
"""Extract a date reference from the query for temporal scoping."""
|
|
128
|
+
year_match = re.search(r"\b(20\d{2})\b", query)
|
|
129
|
+
if year_match:
|
|
130
|
+
return year_match.group(1)
|
|
131
|
+
|
|
132
|
+
month_match = re.search(
|
|
133
|
+
r"\b(january|february|march|april|may|june|july|"
|
|
134
|
+
r"august|september|october|november|december)\b",
|
|
135
|
+
query, re.IGNORECASE,
|
|
136
|
+
)
|
|
137
|
+
if month_match:
|
|
138
|
+
return month_match.group(1)
|
|
139
|
+
|
|
140
|
+
return None
|
|
141
|
+
|
|
142
|
+
@classmethod
|
|
143
|
+
def apply_all(cls, query: str, content: str, score: float, ts: str | None = None) -> float:
|
|
144
|
+
"""Apply all applicable heuristics to a single result."""
|
|
145
|
+
s = cls.keyword_overlap(query, content, score)
|
|
146
|
+
s = cls.recency_decay(ts, s)
|
|
147
|
+
s = cls.temporal_boost(query, ts, s)
|
|
148
|
+
s = cls.person_name_boost(content, s)
|
|
149
|
+
s = cls.quoted_phrase_boost(query, content, s)
|
|
150
|
+
return s
|
coremem/ingest.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Verbatim message ingestion — shared pipeline.
|
|
2
|
+
|
|
3
|
+
All backends ingest raw text without LLM extraction.
|
|
4
|
+
The only processing is deterministic: assign IDs, timestamps,
|
|
5
|
+
and attach metadata (role, session_id).
|
|
6
|
+
|
|
7
|
+
No fact extraction. No summarization. No LLM calls.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from datetime import UTC, datetime
|
|
11
|
+
|
|
12
|
+
from coremem.backends.base import StoreBackend
|
|
13
|
+
from coremem.types import Memory
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def ingest_message(
|
|
17
|
+
backend: StoreBackend,
|
|
18
|
+
role: str,
|
|
19
|
+
content: str,
|
|
20
|
+
session_id: str | None = None,
|
|
21
|
+
ts: datetime | None = None,
|
|
22
|
+
) -> str:
|
|
23
|
+
"""Ingest a single message verbatim.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
backend: The storage backend (ChromaBackend or HybridBackend).
|
|
27
|
+
role: Message role (user, assistant, tool, system).
|
|
28
|
+
content: Raw message text — stored as-is, no extraction.
|
|
29
|
+
session_id: Optional session/thread identifier.
|
|
30
|
+
ts: Optional timestamp (defaults to now).
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
The storage ID assigned to this memory.
|
|
34
|
+
"""
|
|
35
|
+
if not content.strip():
|
|
36
|
+
return ""
|
|
37
|
+
|
|
38
|
+
memory = Memory(
|
|
39
|
+
id="",
|
|
40
|
+
content=content,
|
|
41
|
+
role=role,
|
|
42
|
+
ts=ts or datetime.now(UTC),
|
|
43
|
+
session_id=session_id,
|
|
44
|
+
)
|
|
45
|
+
return backend.ingest(memory)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def ingest_batch(
|
|
49
|
+
backend: StoreBackend,
|
|
50
|
+
messages: list[dict],
|
|
51
|
+
session_id: str | None = None,
|
|
52
|
+
) -> list[str]:
|
|
53
|
+
"""Ingest a batch of messages.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
backend: The storage backend.
|
|
57
|
+
messages: List of {"role": str, "content": str} dicts.
|
|
58
|
+
session_id: Optional session/thread identifier.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
List of storage IDs.
|
|
62
|
+
"""
|
|
63
|
+
ids = []
|
|
64
|
+
for msg in messages:
|
|
65
|
+
mid = ingest_message(
|
|
66
|
+
backend=backend,
|
|
67
|
+
role=msg.get("role", "user"),
|
|
68
|
+
content=msg.get("content", ""),
|
|
69
|
+
session_id=session_id,
|
|
70
|
+
)
|
|
71
|
+
if mid:
|
|
72
|
+
ids.append(mid)
|
|
73
|
+
return ids
|
coremem/layers.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""L0-L3 wake-up context stack.
|
|
2
|
+
|
|
3
|
+
Inspired by MemPalace's four-layer memory stack and validated against
|
|
4
|
+
the human memory multi-store model (Mujawar et al., 2021).
|
|
5
|
+
|
|
6
|
+
L0: Identity — user profile text (~100 tokens, always loaded)
|
|
7
|
+
L1: Essential — top-ranked recent memories (~500 chars, always loaded)
|
|
8
|
+
L2: On-Demand — session-specific context (~200-500 chars, on detect)
|
|
9
|
+
L3: Deep Search — full hybrid/semantic search (per explicit query)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from coremem.backends.base import StoreBackend
|
|
13
|
+
from coremem.heuristics import SearchHeuristics
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class WakeUpContext:
|
|
17
|
+
"""Build the L0-L3 context stack from a memory backend.
|
|
18
|
+
|
|
19
|
+
Usage:
|
|
20
|
+
ctx = WakeUpContext(backend)
|
|
21
|
+
l0_l1 = ctx.essential(user_id="alice") # ~170 tokens, always
|
|
22
|
+
l2 = ctx.session(session_id="sess_5") # on session detect
|
|
23
|
+
l3 = ctx.deep_search("model kits", limit=5) # explicit tool call
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, backend: StoreBackend):
|
|
27
|
+
self._backend = backend
|
|
28
|
+
|
|
29
|
+
def essential(self, user_id: str = "default") -> str:
|
|
30
|
+
"""Build L0 + L1 context (~170 tokens)."""
|
|
31
|
+
parts = []
|
|
32
|
+
|
|
33
|
+
parts.append(f"[L0: Identity] User: {user_id}")
|
|
34
|
+
|
|
35
|
+
recent = self._backend.get_recent(limit=10)
|
|
36
|
+
if recent:
|
|
37
|
+
snippets = []
|
|
38
|
+
for m in recent[:3]:
|
|
39
|
+
content = m.content[:200]
|
|
40
|
+
if len(m.content) > 200:
|
|
41
|
+
content += "..."
|
|
42
|
+
snippets.append(f" - [{m.role}] {content}")
|
|
43
|
+
parts.append("[L1: Essential] Recent context:\n" + "\n".join(snippets))
|
|
44
|
+
|
|
45
|
+
return "\n".join(parts)
|
|
46
|
+
|
|
47
|
+
def session(self, session_id: str) -> str | None:
|
|
48
|
+
"""Build L2 context for a specific session."""
|
|
49
|
+
recent = self._backend.get_recent(limit=20)
|
|
50
|
+
session_memories = [
|
|
51
|
+
m for m in recent
|
|
52
|
+
if m.session_id == session_id
|
|
53
|
+
]
|
|
54
|
+
if not session_memories:
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
lines = [f"[L2: On-Demand] Session {session_id}:"]
|
|
58
|
+
for m in session_memories[:5]:
|
|
59
|
+
content = m.content[:200]
|
|
60
|
+
if len(m.content) > 200:
|
|
61
|
+
content += "..."
|
|
62
|
+
lines.append(f" - [{m.role}] {content}")
|
|
63
|
+
return "\n".join(lines)
|
|
64
|
+
|
|
65
|
+
def deep_search(self, query: str, limit: int = 10) -> str | None:
|
|
66
|
+
"""Build L3 context from a full search query."""
|
|
67
|
+
from coremem.types import SearchQuery
|
|
68
|
+
|
|
69
|
+
results = self._backend.search(SearchQuery(text=query, limit=limit))
|
|
70
|
+
if not results:
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
is_counting = SearchHeuristics.is_counting_question(query)
|
|
74
|
+
|
|
75
|
+
lines = [f"[L3: Deep Search] Results for '{query}':"]
|
|
76
|
+
for r in results:
|
|
77
|
+
content = r.memory.content
|
|
78
|
+
if is_counting and len(content) > SearchHeuristics.COUNTING_QUESTION_SNIPPET_LENGTH:
|
|
79
|
+
content = content[:SearchHeuristics.COUNTING_QUESTION_SNIPPET_LENGTH] + "..."
|
|
80
|
+
elif len(content) > 500:
|
|
81
|
+
content = content[:500] + "..."
|
|
82
|
+
|
|
83
|
+
lines.append(
|
|
84
|
+
f" {r.memory.id[:12]} [{r.memory.role}] "
|
|
85
|
+
f"(score={r.score:.2f}): {content}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
return "\n".join(lines)
|
coremem/types.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Core types for coremem."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Memory:
|
|
9
|
+
"""A single memory entry — one message or conversation turn."""
|
|
10
|
+
id: str
|
|
11
|
+
content: str
|
|
12
|
+
role: str = "user"
|
|
13
|
+
ts: datetime | None = None
|
|
14
|
+
session_id: str | None = None
|
|
15
|
+
workspace_id: str | None = None
|
|
16
|
+
score: float = 0.0
|
|
17
|
+
|
|
18
|
+
def dict(self) -> dict:
|
|
19
|
+
return {
|
|
20
|
+
"id": self.id,
|
|
21
|
+
"content": self.content,
|
|
22
|
+
"role": self.role,
|
|
23
|
+
"ts": self.ts.isoformat() if self.ts else None,
|
|
24
|
+
"session_id": self.session_id,
|
|
25
|
+
"score": self.score,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class SearchResult:
|
|
31
|
+
"""Result from a memory search."""
|
|
32
|
+
memory: Memory
|
|
33
|
+
score: float = 0.0
|
|
34
|
+
source: str = "semantic"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class SearchQuery:
|
|
39
|
+
"""A search query with optional filters."""
|
|
40
|
+
text: str
|
|
41
|
+
limit: int = 10
|
|
42
|
+
wing: str | None = None
|
|
43
|
+
room: str | None = None
|
|
44
|
+
|
|
45
|
+
def dict(self) -> dict:
|
|
46
|
+
return {
|
|
47
|
+
"text": self.text,
|
|
48
|
+
"limit": self.limit,
|
|
49
|
+
"wing": self.wing,
|
|
50
|
+
"room": self.room,
|
|
51
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: coremem
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Zero-LLM memory retrieval for AI agents — semantic search and deterministic heuristics
|
|
5
|
+
Author: Eddy Vinck
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: ai-agents,llm,memory,retrieval,semantic-search
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Requires-Dist: chromadb>=0.5.0
|
|
17
|
+
Requires-Dist: numpy>=1.24.0
|
|
18
|
+
Requires-Dist: pyyaml>=6.0
|
|
19
|
+
Requires-Dist: sentence-transformers>=2.0.0
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
|
|
22
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
23
|
+
Provides-Extra: hybrid
|
|
24
|
+
Requires-Dist: hybriddb>=0.2.0; extra == 'hybrid'
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# CoreMem
|
|
28
|
+
|
|
29
|
+
> **Zero-LLM memory retrieval for AI agents.** CoreMem gives agents instant access to conversation history — semantic search plus deterministic retrieval heuristics, all without a single API call. Scores **98.0% R@5 on LongMemEval (500 questions)** in the Executive Assistant retrieval stack — no LLM, no tuning, no cloud.
|
|
30
|
+
|
|
31
|
+
> **Embedded. Local. Open source.** No external APIs, no vector DB services, no internet connection required. Runs entirely on-device with ChromaDB or HybridDB + sentence-transformers. Ships as a single Python package with zero infrastructure dependencies.
|
|
32
|
+
|
|
33
|
+
**Dual-backend architecture.** Drop-in backends (ChromaDB baseline, HybridDB enhanced) with the same API. Ranking pipeline: backend retrieval → deterministic heuristics → recency-aware rescoring → session-aware retrieval.
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from coremem import MemoryCore
|
|
37
|
+
from coremem.backends.chroma import ChromaBackend
|
|
38
|
+
|
|
39
|
+
core = MemoryCore(backend=ChromaBackend(path="./memory"))
|
|
40
|
+
|
|
41
|
+
# Ingest conversation turns
|
|
42
|
+
core.ingest("user", "I visited the Museum of Modern Art today")
|
|
43
|
+
core.ingest("assistant", "That sounds wonderful! How was it?")
|
|
44
|
+
core.ingest("user", "I went to an Ancient Civilizations exhibition at the Natural History Museum")
|
|
45
|
+
|
|
46
|
+
# Search with deterministic heuristic reranking
|
|
47
|
+
results = core.search("When did I visit art museums?")
|
|
48
|
+
|
|
49
|
+
for r in results:
|
|
50
|
+
print(f"[{r.memory.ts}] [{r.memory.role}] {r.memory.content}")
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Why CoreMem?
|
|
54
|
+
|
|
55
|
+
Every AI agent needs memory. But cloud-based vector search is expensive, slow, and doesn't work offline. Pure embedding similarity misses keyword matches and temporal context. LLM-based memory systems cost tokens per query.
|
|
56
|
+
|
|
57
|
+
CoreMem solves all three:
|
|
58
|
+
|
|
59
|
+
| Component | What it does |
|
|
60
|
+
|-----------|-------------|
|
|
61
|
+
| **Semantic search** | Embedding similarity via ChromaDB or HybridDB |
|
|
62
|
+
| **Deterministic heuristics** | Keyword overlap, temporal recency, person-name boost, quoted-phrase matching |
|
|
63
|
+
| **Session deduplication** | One result per conversation, with full context retrieval |
|
|
64
|
+
|
|
65
|
+
## LongMemEval Results (500 questions, no LLM, no tuning)
|
|
66
|
+
|
|
67
|
+
| Metric | Score |
|
|
68
|
+
|--------|-------|
|
|
69
|
+
| R@5 | **98.0%** |
|
|
70
|
+
| R@10 | **98.4%** |
|
|
71
|
+
| MRR | 0.944 |
|
|
72
|
+
| P@5 | 0.592 |
|
|
73
|
+
| F1@5 | 0.684 |
|
|
74
|
+
| Selectivity | 11.5% haystack scanned |
|
|
75
|
+
| Rank distribution | #1: 91.8%, #2-3: 5.0%, #4-5: 1.2%, #6-10: 0.4%, >10: 1.6% |
|
|
76
|
+
|
|
77
|
+
Outperforms MemPalace raw (96.6%) and matches their hybrid v4 held-out (98.4%) — with zero tuning, zero dev-set peeking.
|
|
78
|
+
|
|
79
|
+
## Installation
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install coremem
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
With HybridDB backend for enhanced FTS5 + vector hybrid search:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
pip install coremem[hybrid]
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Core Concepts
|
|
92
|
+
|
|
93
|
+
### Backends
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
# ChromaDB baseline — pure vector search
|
|
97
|
+
from coremem.backends.chroma import ChromaBackend
|
|
98
|
+
core = MemoryCore(backend=ChromaBackend(path="./data"))
|
|
99
|
+
|
|
100
|
+
# HybridDB enhanced — FTS5 + vector hybrid search
|
|
101
|
+
from coremem.backends.hybrid import HybridBackend
|
|
102
|
+
core = MemoryCore(backend=HybridBackend(path="./data"))
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Ingestion
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
# Simple ingestion
|
|
109
|
+
core.ingest("user", "I built a Spitfire model kit", session_id="conv_001")
|
|
110
|
+
|
|
111
|
+
# Batch ingestion
|
|
112
|
+
from coremem import ingest_batch
|
|
113
|
+
ingest_batch(core, [
|
|
114
|
+
("user", "What's the weather today?"),
|
|
115
|
+
("assistant", "Sunny with a high of 72°F"),
|
|
116
|
+
], session_id="conv_001")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Search
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
# Basic search
|
|
123
|
+
results = core.search("How many model kits?", limit=10)
|
|
124
|
+
|
|
125
|
+
# Limit results
|
|
126
|
+
results = core.search("model building projects", limit=5)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Heuristics
|
|
130
|
+
|
|
131
|
+
Deterministic, zero-LLM scoring boosts applied to every result:
|
|
132
|
+
|
|
133
|
+
| Heuristic | What it catches |
|
|
134
|
+
|-----------|----------------|
|
|
135
|
+
| `keyword_overlap` | Exact word matches between query and content |
|
|
136
|
+
| `temporal_boost` | Queries with "latest", "current", "recently" |
|
|
137
|
+
| `recency_decay` | Unconditional exponential decay (30-day half-life) |
|
|
138
|
+
| `person_name_boost` | Proper name mentions in content |
|
|
139
|
+
| `quoted_phrase_boost` | Exact phrase matches in quotes |
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from coremem import SearchHeuristics
|
|
143
|
+
|
|
144
|
+
# Apply all heuristics to a single result
|
|
145
|
+
score = SearchHeuristics.apply_all(
|
|
146
|
+
query="latest project",
|
|
147
|
+
content="Just finished the Q3 project report",
|
|
148
|
+
score=0.75,
|
|
149
|
+
ts="2026-05-28T10:00:00Z",
|
|
150
|
+
)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Wake-Up Context
|
|
154
|
+
|
|
155
|
+
Give the agent instant situational awareness:
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
context = core.wake_up(user_id="alice")
|
|
159
|
+
# Returns a compact string with L0 identity and L1 recent context.
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
MIT — see [LICENSE](LICENSE).
|
|
165
|
+
|
|
166
|
+
## Author
|
|
167
|
+
|
|
168
|
+
Eddy Vinck
|
|
169
|
+
|
|
170
|
+
CoreMem is the retrieval engine behind the [Executive Assistant](https://github.com/open-assistants-lab) agent system. Pairs with [HybridDB](https://github.com/open-assistants-lab) for storage and ConnectKit for real-time sync.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
coremem/__init__.py,sha256=_Kg-RdBeYZY-YdCjwN4FtBR2ffAqFNmiNK82i6hRVyM,752
|
|
2
|
+
coremem/core.py,sha256=NngEo5rYO39BniE5eACxN4UnDYttOkns5n608oG6TNQ,3642
|
|
3
|
+
coremem/heuristics.py,sha256=H_vWIDT9-0e02lmLA_Wo32-UPCye6-5hA8z1MTwi-wg,5646
|
|
4
|
+
coremem/ingest.py,sha256=OwfWybRlcMLTwj-Eg-tb7FA9wx8I_fhW8F7DQVOlVUs,1865
|
|
5
|
+
coremem/layers.py,sha256=uwRyHouDti1FrdnjC99OzrhBZCiIzW8kGmQznockSUI,3180
|
|
6
|
+
coremem/types.py,sha256=K3EZHRIMwyBCiIBsrqOvPCTTGYa-DlST29XNsTY0_bc,1150
|
|
7
|
+
coremem/backends/__init__.py,sha256=xrnV5akT7X_sN9p2fmPHQa2kyLc55kk_ufwerVV3xak,119
|
|
8
|
+
coremem/backends/base.py,sha256=VzZUZo2eYSRIHybFSHBfmxJt-dAkpyjowjAtN3rDxuA,1144
|
|
9
|
+
coremem/backends/chroma.py,sha256=PswyxsyonuEVjqUj5WEbVXfyfiq6CEC8yNQ-SBT9G4k,4442
|
|
10
|
+
coremem/backends/hybrid.py,sha256=gSicnJJ6JiLLoBhU1WaGVUfiyEgse02-IJLKXizXU88,5732
|
|
11
|
+
coremem-0.1.0.dist-info/METADATA,sha256=x960_5Xe_T3QF7dkaR9JtYGi_jdNbnjFnbd4rkSAgJo,5628
|
|
12
|
+
coremem-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
13
|
+
coremem-0.1.0.dist-info/licenses/LICENSE,sha256=0VqBFOKolO6nkZLiHzpMlKlFlodudmA0C8xu-sf-Z7M,1067
|
|
14
|
+
coremem-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Eddy Vinck
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|