AbstractMemory 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,41 +1,15 @@
1
- """
2
- AbstractMemory - PLACEHOLDER PROJECT
3
-
4
- This is a placeholder package to reserve the 'AbstractMemory' name on PyPI.
5
-
6
- AbstractMemory will be a memory system designed to transform stateless LLMs
7
- into stateful LLMs, with primary integration planned for AbstractLLM.
8
-
9
- The actual implementation is currently part of AbstractLLM and will be
10
- modularized into this separate package in the future to enable:
11
- - Cleaner separation of concerns
12
- - Better evolution and maintenance over time
13
- - Reusability across different LLM frameworks
14
-
15
- WARNING: This is a placeholder. Do not use in production.
16
- """
17
-
18
- __version__ = "0.0.1"
19
- __author__ = "AbstractMemory Team"
20
- __email__ = "contact@example.com"
21
-
22
- # Placeholder exception to prevent accidental usage
23
- class PlaceholderError(Exception):
24
- """Raised when attempting to use placeholder functionality."""
25
- pass
26
-
27
- def placeholder_warning():
28
- """
29
- Warn users that this is a placeholder package.
30
-
31
- Raises:
32
- PlaceholderError: Always raised to prevent usage
33
- """
34
- raise PlaceholderError(
35
- "AbstractMemory is currently a placeholder package. "
36
- "The actual memory system implementation is part of AbstractLLM. "
37
- "This package reserves the name for future modularization."
38
- )
39
-
40
- # Make it clear this is a placeholder
41
- __all__ = ["placeholder_warning", "PlaceholderError", "__version__"]
1
+ from .models import TripleAssertion
2
+ from .embeddings import AbstractGatewayTextEmbedder, TextEmbedder
3
+ from .in_memory_store import InMemoryTripleStore
4
+ from .lancedb_store import LanceDBTripleStore
5
+ from .store import TripleStore, TripleQuery
6
+
7
+ __all__ = [
8
+ "AbstractGatewayTextEmbedder",
9
+ "InMemoryTripleStore",
10
+ "LanceDBTripleStore",
11
+ "TextEmbedder",
12
+ "TripleAssertion",
13
+ "TripleQuery",
14
+ "TripleStore",
15
+ ]
@@ -0,0 +1,96 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from typing import List, Protocol, Sequence
5
+ from urllib.error import HTTPError, URLError
6
+ from urllib.request import Request, urlopen
7
+
8
+
9
+ class TextEmbedder(Protocol):
10
+ """Minimal text embedding interface used by AbstractMemory stores."""
11
+
12
+ def embed_texts(self, texts: Sequence[str]) -> List[List[float]]: ...
13
+
14
+
15
+ class AbstractGatewayTextEmbedder:
16
+ """Text embedder that calls AbstractGateway's embeddings API.
17
+
18
+ AbstractMemory intentionally does not depend on AbstractCore directly. The gateway is responsible for:
19
+ - selecting the embedding provider/model (singleton per gateway instance)
20
+ - generating embeddings via AbstractRuntime+AbstractCore integration
21
+ - enforcing a stable embedding space
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ *,
27
+ base_url: str,
28
+ auth_token: str | None = None,
29
+ endpoint_path: str = "/api/gateway/embeddings",
30
+ timeout_s: float = 30.0,
31
+ ) -> None:
32
+ root = str(base_url or "").strip().rstrip("/")
33
+ if not root:
34
+ raise ValueError("base_url is required")
35
+ path = str(endpoint_path or "").strip()
36
+ if not path.startswith("/"):
37
+ path = "/" + path
38
+ self._url = root + path
39
+ self._timeout_s = float(timeout_s)
40
+ self._headers = {"Content-Type": "application/json"}
41
+ if isinstance(auth_token, str) and auth_token.strip():
42
+ self._headers["Authorization"] = f"Bearer {auth_token.strip()}"
43
+
44
+ def embed_texts(self, texts: Sequence[str]) -> List[List[float]]:
45
+ items = [str(t or "") for t in texts]
46
+ payload = {"input": items}
47
+ req = Request(
48
+ self._url,
49
+ data=json.dumps(payload, ensure_ascii=False).encode("utf-8"),
50
+ headers=dict(self._headers),
51
+ method="POST",
52
+ )
53
+ try:
54
+ with urlopen(req, timeout=self._timeout_s) as resp:
55
+ raw = resp.read().decode("utf-8")
56
+ except HTTPError as e:
57
+ detail = ""
58
+ try:
59
+ detail = e.read().decode("utf-8")
60
+ except Exception:
61
+ detail = ""
62
+ hint = ""
63
+ if int(getattr(e, "code", 0) or 0) == 401:
64
+ hint = (
65
+ " (Set `ABSTRACTGATEWAY_AUTH_TOKEN` / `ABSTRACTFLOW_GATEWAY_AUTH_TOKEN` "
66
+ "for the caller process, or pass a Bearer token to the gateway embeddings endpoint.)"
67
+ )
68
+ raise RuntimeError(f"Gateway embeddings HTTP {e.code}: {detail or e.reason}{hint}") from e
69
+ except URLError as e:
70
+ raise RuntimeError(f"Gateway embeddings request failed: {e}") from e
71
+
72
+ try:
73
+ data = json.loads(raw)
74
+ except Exception as e:
75
+ raise RuntimeError(f"Gateway embeddings returned invalid JSON: {e}") from e
76
+
77
+ rows = data.get("data") if isinstance(data, dict) else None
78
+ if not isinstance(rows, list):
79
+ raise RuntimeError("Gateway embeddings response missing 'data' list")
80
+
81
+ # Preserve order via `index` when present.
82
+ parsed: list[tuple[int, List[float]]] = []
83
+ for i, row_any in enumerate(rows):
84
+ row = row_any if isinstance(row_any, dict) else {}
85
+ idx = row.get("index")
86
+ try:
87
+ index = int(idx) if idx is not None else i
88
+ except Exception:
89
+ index = i
90
+ emb = row.get("embedding")
91
+ if not isinstance(emb, list):
92
+ raise RuntimeError("Gateway embeddings response contains non-list embedding")
93
+ parsed.append((index, [float(x) for x in emb]))
94
+
95
+ parsed.sort(key=lambda t: t[0])
96
+ return [v for _, v in parsed]
@@ -0,0 +1,186 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import uuid
5
+ from typing import Any, Iterable, List, Optional, Sequence
6
+
7
+ from .embeddings import TextEmbedder
8
+ from .models import TripleAssertion, normalize_term
9
+ from .store import TripleQuery
10
+
11
+
12
+ def _canonical_text(a: TripleAssertion) -> str:
13
+ base = f"{a.subject} {a.predicate} {a.object}".strip()
14
+ attrs = a.attributes if isinstance(a.attributes, dict) else {}
15
+
16
+ parts: list[str] = [base]
17
+ st = attrs.get("subject_type")
18
+ ot = attrs.get("object_type")
19
+ if isinstance(st, str) and st.strip():
20
+ parts.append(f"subject_type: {st.strip()}")
21
+ if isinstance(ot, str) and ot.strip():
22
+ parts.append(f"object_type: {ot.strip()}")
23
+
24
+ eq = attrs.get("evidence_quote")
25
+ if isinstance(eq, str) and eq.strip():
26
+ parts.append(f"evidence: {eq.strip()}")
27
+
28
+ ctx = attrs.get("original_context")
29
+ if isinstance(ctx, str) and ctx.strip():
30
+ ctx2 = ctx.strip()
31
+ if len(ctx2) > 400:
32
+ ctx2 = ctx2[:400] + "…"
33
+ parts.append(f"context: {ctx2}")
34
+
35
+ return "\n".join(parts)
36
+
37
+
38
+ def _cosine(a: Sequence[float], b: Sequence[float]) -> float:
39
+ # Defensive: handle empty vectors.
40
+ if not a or not b:
41
+ return 0.0
42
+ n = min(len(a), len(b))
43
+ dot = 0.0
44
+ na = 0.0
45
+ nb = 0.0
46
+ for i in range(n):
47
+ ax = float(a[i])
48
+ bx = float(b[i])
49
+ dot += ax * bx
50
+ na += ax * ax
51
+ nb += bx * bx
52
+ if na <= 0.0 or nb <= 0.0:
53
+ return 0.0
54
+ return dot / (math.sqrt(na) * math.sqrt(nb))
55
+
56
+
57
+ class InMemoryTripleStore:
58
+ """A dependency-free triple store (best-effort).
59
+
60
+ Notes:
61
+ - Intended for tests/dev and hosts without LanceDB installed.
62
+ - Append-only: updates are represented as new assertions.
63
+ - Vector search is optional and stores vectors in-memory only.
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ *,
69
+ embedder: Optional[TextEmbedder] = None,
70
+ vector_column: str = "vector",
71
+ ) -> None:
72
+ self._embedder = embedder
73
+ self._vector_column = str(vector_column or "vector")
74
+ self._rows: list[dict[str, Any]] = []
75
+
76
+ def close(self) -> None:
77
+ return None
78
+
79
+ def add(self, assertions: Iterable[TripleAssertion]) -> List[str]:
80
+ pending: list[TripleAssertion] = [a for a in assertions]
81
+ if not pending:
82
+ return []
83
+
84
+ vectors: Optional[List[List[float]]] = None
85
+ if self._embedder is not None:
86
+ vectors = self._embedder.embed_texts([_canonical_text(a) for a in pending])
87
+
88
+ ids: list[str] = []
89
+ for i, a in enumerate(pending):
90
+ assertion_id = str(uuid.uuid4())
91
+ ids.append(assertion_id)
92
+ row: dict[str, Any] = {"assertion_id": assertion_id, "assertion": a}
93
+ if vectors is not None and i < len(vectors):
94
+ row[self._vector_column] = vectors[i]
95
+ self._rows.append(row)
96
+ return ids
97
+
98
+ def query(self, q: TripleQuery) -> List[TripleAssertion]:
99
+ raw_limit = int(q.limit) if isinstance(q.limit, int) else 100
100
+ limit: Optional[int]
101
+ if raw_limit <= 0:
102
+ limit = None
103
+ else:
104
+ limit = max(1, raw_limit)
105
+
106
+ def _match(a: TripleAssertion) -> bool:
107
+ if q.subject and normalize_term(a.subject) != normalize_term(q.subject):
108
+ return False
109
+ if q.predicate and normalize_term(a.predicate) != normalize_term(q.predicate):
110
+ return False
111
+ if q.object and normalize_term(a.object) != normalize_term(q.object):
112
+ return False
113
+ if q.scope and a.scope != q.scope:
114
+ return False
115
+ if q.owner_id and (a.owner_id or "") != q.owner_id:
116
+ return False
117
+ if q.since and (a.observed_at or "") < q.since:
118
+ return False
119
+ if q.until and (a.observed_at or "") > q.until:
120
+ return False
121
+ if q.active_at:
122
+ at = q.active_at
123
+ if a.valid_from and a.valid_from > at:
124
+ return False
125
+ if a.valid_until and a.valid_until <= at:
126
+ return False
127
+ return True
128
+
129
+ rows = [r for r in self._rows if isinstance(r, dict) and isinstance(r.get("assertion"), TripleAssertion)]
130
+ filtered: list[dict[str, Any]] = []
131
+ for r in rows:
132
+ a = r["assertion"]
133
+ if _match(a):
134
+ filtered.append(r)
135
+
136
+ query_vector: Optional[Sequence[float]] = None
137
+ if q.query_vector:
138
+ query_vector = q.query_vector
139
+ elif q.query_text:
140
+ if self._embedder is None:
141
+ raise ValueError("query_text requires a configured embedder (vector search); keyword fallback is disabled")
142
+ query_vector = self._embedder.embed_texts([q.query_text])[0]
143
+
144
+ if query_vector is not None:
145
+ ranked: list[tuple[float, TripleAssertion]] = []
146
+ for r in filtered:
147
+ v = r.get(q.vector_column or self._vector_column)
148
+ if not isinstance(v, list):
149
+ continue
150
+ try:
151
+ score = _cosine(query_vector, v)
152
+ except Exception:
153
+ score = 0.0
154
+ if q.min_score is not None and score < float(q.min_score):
155
+ continue
156
+ ranked.append((score, r["assertion"]))
157
+ ranked.sort(key=lambda t: t[0], reverse=True)
158
+
159
+ out: list[TripleAssertion] = []
160
+ for score, a in (ranked if limit is None else ranked[:limit]):
161
+ attrs = dict(a.attributes) if isinstance(a.attributes, dict) else {}
162
+ retrieval = attrs.get("_retrieval") if isinstance(attrs.get("_retrieval"), dict) else {}
163
+ retrieval2 = dict(retrieval)
164
+ retrieval2["score"] = float(score)
165
+ retrieval2.setdefault("metric", "cosine")
166
+ attrs["_retrieval"] = retrieval2
167
+ out.append(
168
+ TripleAssertion(
169
+ subject=a.subject,
170
+ predicate=a.predicate,
171
+ object=a.object,
172
+ scope=a.scope,
173
+ owner_id=a.owner_id,
174
+ observed_at=a.observed_at,
175
+ valid_from=a.valid_from,
176
+ valid_until=a.valid_until,
177
+ confidence=a.confidence,
178
+ provenance=dict(a.provenance),
179
+ attributes=attrs,
180
+ )
181
+ )
182
+ return out
183
+
184
+ out: list[TripleAssertion] = [r["assertion"] for r in filtered]
185
+ out.sort(key=lambda a: a.observed_at or "", reverse=(str(q.order).lower() != "asc"))
186
+ return out if limit is None else out[:limit]
@@ -0,0 +1,274 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import uuid
5
+ from pathlib import Path
6
+ from typing import Any, Dict, Iterable, List, Optional, Sequence
7
+
8
+ from .embeddings import TextEmbedder
9
+ from .models import TripleAssertion, normalize_term
10
+ from .store import TripleQuery
11
+
12
+
13
+ def _import_lancedb():
14
+ try:
15
+ import lancedb # type: ignore
16
+
17
+ return lancedb
18
+ except Exception as e: # pragma: no cover
19
+ raise ImportError(
20
+ "LanceDB support requires `lancedb` (and its dependencies). "
21
+ "Install it in your environment (offline/local install is fine), e.g. `pip install lancedb`."
22
+ ) from e
23
+
24
+
25
+ def _escape_sql_string(value: str) -> str:
26
+ # LanceDB uses SQL-like filter strings; escape single quotes.
27
+ return str(value).replace("'", "''")
28
+
29
+
30
+ def _build_where_clause(q: TripleQuery) -> str:
31
+ parts: list[str] = []
32
+
33
+ if q.subject:
34
+ parts.append(f"lower(subject) = '{_escape_sql_string(normalize_term(q.subject))}'")
35
+ if q.predicate:
36
+ parts.append(f"lower(predicate) = '{_escape_sql_string(normalize_term(q.predicate))}'")
37
+ if q.object:
38
+ parts.append(f"lower(object) = '{_escape_sql_string(normalize_term(q.object))}'")
39
+ if q.scope:
40
+ parts.append(f"scope = '{_escape_sql_string(q.scope)}'")
41
+ if q.owner_id:
42
+ parts.append(f"owner_id = '{_escape_sql_string(q.owner_id)}'")
43
+
44
+ if q.since:
45
+ parts.append(f"observed_at >= '{_escape_sql_string(q.since)}'")
46
+ if q.until:
47
+ parts.append(f"observed_at <= '{_escape_sql_string(q.until)}'")
48
+
49
+ if q.active_at:
50
+ at = _escape_sql_string(q.active_at)
51
+ parts.append(f"(valid_from IS NULL OR valid_from <= '{at}')")
52
+ parts.append(f"(valid_until IS NULL OR valid_until > '{at}')")
53
+
54
+ return " AND ".join(parts)
55
+
56
+
57
+ def _canonical_text(a: TripleAssertion) -> str:
58
+ # Stable, information-rich representation for embedding retrieval.
59
+ #
60
+ # Why include more than "s p o":
61
+ # - semantic queries often refer to details that aren't present in the triple surface form
62
+ # - extractor-provided evidence/context improves retrieval selectivity without requiring
63
+ # a separate episodic document store in v0
64
+ base = f"{a.subject} {a.predicate} {a.object}".strip()
65
+ attrs = a.attributes if isinstance(a.attributes, dict) else {}
66
+
67
+ parts: list[str] = [base]
68
+ st = attrs.get("subject_type")
69
+ ot = attrs.get("object_type")
70
+ if isinstance(st, str) and st.strip():
71
+ parts.append(f"subject_type: {st.strip()}")
72
+ if isinstance(ot, str) and ot.strip():
73
+ parts.append(f"object_type: {ot.strip()}")
74
+
75
+ eq = attrs.get("evidence_quote")
76
+ if isinstance(eq, str) and eq.strip():
77
+ parts.append(f"evidence: {eq.strip()}")
78
+
79
+ ctx = attrs.get("original_context")
80
+ if isinstance(ctx, str) and ctx.strip():
81
+ ctx2 = ctx.strip()
82
+ if len(ctx2) > 400:
83
+ ctx2 = ctx2[:400] + "…"
84
+ parts.append(f"context: {ctx2}")
85
+
86
+ return "\n".join(parts)
87
+
88
+
89
+ def _loads_json(raw: object) -> dict:
90
+ if not isinstance(raw, str) or not raw:
91
+ return {}
92
+ try:
93
+ parsed = json.loads(raw)
94
+ return parsed if isinstance(parsed, dict) else {}
95
+ except Exception:
96
+ return {}
97
+
98
+
99
+ class LanceDBTripleStore:
100
+ """LanceDB-backed append-only triple store with optional vector search.
101
+
102
+ Notes:
103
+ - Append-only: updates are represented as new assertions.
104
+ - Vector search is optional and requires `embedder` (for query_text) or query_vector.
105
+ """
106
+
107
+ def __init__(
108
+ self,
109
+ uri: str | Path,
110
+ *,
111
+ table_name: str = "triple_assertions",
112
+ embedder: Optional[TextEmbedder] = None,
113
+ vector_column: str = "vector",
114
+ ):
115
+ self._lancedb = _import_lancedb()
116
+ self._db = self._lancedb.connect(str(uri))
117
+ self._table_name = str(table_name)
118
+ self._vector_column = str(vector_column or "vector")
119
+ self._embedder = embedder
120
+
121
+ self._table = None
122
+ try:
123
+ # `table_names()` is deprecated upstream but is stable and sufficient for local stores.
124
+ if self._table_name in set(self._db.table_names()):
125
+ self._table = self._db.open_table(self._table_name)
126
+ except Exception:
127
+ self._table = None
128
+
129
+ def close(self) -> None:
130
+ # LanceDB tables/connections are managed by the library; nothing required here.
131
+ return None
132
+
133
+ def add(self, assertions: Iterable[TripleAssertion]) -> List[str]:
134
+ rows: list[dict[str, Any]] = []
135
+ ids: List[str] = []
136
+ pending: List[TripleAssertion] = []
137
+
138
+ for a in assertions:
139
+ pending.append(a)
140
+
141
+ if not pending:
142
+ return []
143
+
144
+ # Always store a canonical text column (useful for debugging and future indexing).
145
+ texts: List[str] = [_canonical_text(a) for a in pending]
146
+ vectors: Optional[List[List[float]]] = None
147
+ if self._embedder is not None:
148
+ vectors = self._embedder.embed_texts(texts)
149
+
150
+ for idx, a in enumerate(pending):
151
+ assertion_id = str(uuid.uuid4())
152
+ ids.append(assertion_id)
153
+ row: Dict[str, Any] = {
154
+ "assertion_id": assertion_id,
155
+ "subject": a.subject,
156
+ "predicate": a.predicate,
157
+ "object": a.object,
158
+ "scope": a.scope,
159
+ "owner_id": a.owner_id,
160
+ "observed_at": a.observed_at,
161
+ "valid_from": a.valid_from,
162
+ "valid_until": a.valid_until,
163
+ "confidence": a.confidence,
164
+ "provenance_json": json.dumps(a.provenance, ensure_ascii=False, separators=(",", ":")),
165
+ "attributes_json": json.dumps(a.attributes, ensure_ascii=False, separators=(",", ":")),
166
+ "text": texts[idx],
167
+ }
168
+
169
+ if vectors is not None and idx < len(vectors):
170
+ row[self._vector_column] = vectors[idx]
171
+
172
+ # Keep JSON compact (omit nulls).
173
+ row = {k: v for k, v in row.items() if v is not None}
174
+ rows.append(row)
175
+
176
+ if self._table is None:
177
+ # Create on first insert so we can infer vector dimensionality from real data.
178
+ self._table = self._db.create_table(self._table_name, data=rows, mode="create")
179
+ else:
180
+ self._table.add(rows)
181
+ return ids
182
+
183
+ def query(self, q: TripleQuery) -> List[TripleAssertion]:
184
+ if self._table is None:
185
+ return []
186
+
187
+ raw_limit = int(q.limit) if isinstance(q.limit, int) else 100
188
+ limit: Optional[int]
189
+ if raw_limit <= 0:
190
+ limit = None
191
+ else:
192
+ limit = max(1, raw_limit)
193
+
194
+ where = _build_where_clause(q)
195
+
196
+ query_vector: Optional[Sequence[float]] = None
197
+ if q.query_vector:
198
+ query_vector = q.query_vector
199
+ elif q.query_text:
200
+ if self._embedder is None:
201
+ raise ValueError("query_text requires a configured embedder (vector search); keyword fallback is disabled")
202
+ query_vector = self._embedder.embed_texts([q.query_text])[0]
203
+
204
+ qb = None
205
+ if query_vector is not None:
206
+ # Use cosine metric so `min_score` can be expressed as cosine similarity.
207
+ qb = self._table.search(query_vector, vector_column_name=q.vector_column or self._vector_column).metric("cosine")
208
+ if qb is None:
209
+ qb = self._table.search()
210
+
211
+ if where:
212
+ qb = qb.where(where)
213
+
214
+ if query_vector is None:
215
+ # LanceDB does not currently expose an order_by API on query builders. For deterministic
216
+ # observed_at ordering (and correct limit semantics), fetch all matching rows then sort
217
+ # in Python and apply the limit after sorting.
218
+ rows = qb.to_list()
219
+ else:
220
+ rows = qb.limit(limit).to_list() if limit is not None else qb.to_list()
221
+
222
+ out: List[TripleAssertion] = []
223
+ for r in rows:
224
+ if not isinstance(r, dict):
225
+ continue
226
+ provenance = _loads_json(r.get("provenance_json"))
227
+ attributes = _loads_json(r.get("attributes_json"))
228
+
229
+ # Attach retrieval metadata for semantic queries.
230
+ # LanceDB returns `_distance` for vector searches; with metric=cosine, similarity = 1 - distance.
231
+ if query_vector is not None:
232
+ dist_raw = r.get("_distance")
233
+ dist: Optional[float] = None
234
+ try:
235
+ dist = float(dist_raw) if dist_raw is not None else None
236
+ except Exception:
237
+ dist = None
238
+ score: Optional[float] = None
239
+ if dist is not None:
240
+ score = 1.0 - dist
241
+
242
+ if q.min_score is not None and score is not None and score < float(q.min_score):
243
+ continue
244
+
245
+ retrieval = attributes.get("_retrieval") if isinstance(attributes.get("_retrieval"), dict) else {}
246
+ retrieval2 = dict(retrieval)
247
+ if score is not None:
248
+ retrieval2["score"] = score
249
+ if dist is not None:
250
+ retrieval2["distance"] = dist
251
+ retrieval2.setdefault("metric", "cosine")
252
+ attributes = dict(attributes)
253
+ attributes["_retrieval"] = retrieval2
254
+ out.append(
255
+ TripleAssertion(
256
+ subject=str(r.get("subject") or ""),
257
+ predicate=str(r.get("predicate") or ""),
258
+ object=str(r.get("object") or ""),
259
+ scope=str(r.get("scope") or "run"),
260
+ owner_id=str(r.get("owner_id")) if r.get("owner_id") is not None else None,
261
+ observed_at=str(r.get("observed_at") or ""),
262
+ valid_from=str(r.get("valid_from")) if r.get("valid_from") is not None else None,
263
+ valid_until=str(r.get("valid_until")) if r.get("valid_until") is not None else None,
264
+ confidence=r.get("confidence") if isinstance(r.get("confidence"), (int, float)) else None,
265
+ provenance=provenance,
266
+ attributes=attributes,
267
+ )
268
+ )
269
+
270
+ # For non-semantic queries, keep compatibility with SQLite semantics: order by observed_at.
271
+ # For semantic queries, LanceDB already returns similarity-ranked results.
272
+ if query_vector is None:
273
+ out.sort(key=lambda a: a.observed_at or "", reverse=(str(q.order).lower() != "asc"))
274
+ return out if limit is None else out[:limit]
@@ -0,0 +1,126 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from datetime import datetime, timezone
5
+ from typing import Any, Dict, Optional
6
+
7
+
8
+ def utc_now_iso_seconds() -> str:
9
+ return datetime.now(timezone.utc).isoformat(timespec="seconds")
10
+
11
+
12
+ def canonicalize_term(value: str) -> str:
13
+ """Canonicalize a KG term for stable matching.
14
+
15
+ Policy (v0):
16
+ - trim surrounding whitespace
17
+ - lowercase (avoids missed matches due to casing drift)
18
+ """
19
+ return str(value or "").strip().lower()
20
+
21
+
22
+ def normalize_term(value: str) -> str:
23
+ """Normalize a KG term for case-insensitive matching (query-time)."""
24
+ # `canonicalize_term` already lowercases; keep this for clarity/compatibility.
25
+ return canonicalize_term(value).lower()
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class TripleAssertion:
30
+ """An append-only semantic assertion with temporal and provenance metadata."""
31
+
32
+ subject: str
33
+ predicate: str
34
+ object: str
35
+ scope: str = "run" # run|session|global
36
+ owner_id: Optional[str] = None # scope owner identifier (e.g. run_id, session_memory_*, global_memory)
37
+ observed_at: str = field(default_factory=utc_now_iso_seconds)
38
+
39
+ valid_from: Optional[str] = None
40
+ valid_until: Optional[str] = None
41
+ confidence: Optional[float] = None
42
+
43
+ provenance: Dict[str, Any] = field(default_factory=dict)
44
+ attributes: Dict[str, Any] = field(default_factory=dict)
45
+
46
+ def __post_init__(self) -> None:
47
+ # Canonicalize KG terms (trim + lower) for stable matching.
48
+ object.__setattr__(self, "subject", canonicalize_term(self.subject))
49
+ object.__setattr__(self, "predicate", canonicalize_term(self.predicate))
50
+ object.__setattr__(self, "object", canonicalize_term(self.object))
51
+
52
+ # Keep scope canonical (it is part of the partitioning key).
53
+ object.__setattr__(self, "scope", str(self.scope or "").strip().lower() or "run")
54
+
55
+ # Defensive trimming for timestamps/ids without altering semantics.
56
+ if isinstance(self.owner_id, str):
57
+ oid = self.owner_id.strip()
58
+ object.__setattr__(self, "owner_id", oid if oid else None)
59
+ object.__setattr__(self, "observed_at", str(self.observed_at or "").strip() or utc_now_iso_seconds())
60
+ if isinstance(self.valid_from, str):
61
+ vf = self.valid_from.strip()
62
+ object.__setattr__(self, "valid_from", vf if vf else None)
63
+ if isinstance(self.valid_until, str):
64
+ vu = self.valid_until.strip()
65
+ object.__setattr__(self, "valid_until", vu if vu else None)
66
+
67
+ def to_dict(self) -> Dict[str, Any]:
68
+ out: Dict[str, Any] = {
69
+ "subject": self.subject,
70
+ "predicate": self.predicate,
71
+ "object": self.object,
72
+ "scope": self.scope,
73
+ "owner_id": self.owner_id,
74
+ "observed_at": self.observed_at,
75
+ "valid_from": self.valid_from,
76
+ "valid_until": self.valid_until,
77
+ "confidence": self.confidence,
78
+ "provenance": dict(self.provenance),
79
+ "attributes": dict(self.attributes),
80
+ }
81
+ # Keep JSON compact (omit nulls).
82
+ return {k: v for k, v in out.items() if v is not None}
83
+
84
+ @classmethod
85
+ def from_dict(cls, data: Dict[str, Any]) -> "TripleAssertion":
86
+ if not isinstance(data, dict):
87
+ raise TypeError("TripleAssertion.from_dict expects a dict")
88
+
89
+ subject = data.get("subject")
90
+ predicate = data.get("predicate")
91
+ obj = data.get("object")
92
+ if not isinstance(subject, str) or not subject.strip():
93
+ raise ValueError("TripleAssertion.subject must be a non-empty string")
94
+ if not isinstance(predicate, str) or not predicate.strip():
95
+ raise ValueError("TripleAssertion.predicate must be a non-empty string")
96
+ if not isinstance(obj, str) or not obj.strip():
97
+ raise ValueError("TripleAssertion.object must be a non-empty string")
98
+
99
+ scope = data.get("scope") if isinstance(data.get("scope"), str) else "run"
100
+ owner_id = data.get("owner_id") if isinstance(data.get("owner_id"), str) else None
101
+ observed_at = data.get("observed_at") if isinstance(data.get("observed_at"), str) else utc_now_iso_seconds()
102
+
103
+ provenance = data.get("provenance") if isinstance(data.get("provenance"), dict) else {}
104
+ attributes = data.get("attributes") if isinstance(data.get("attributes"), dict) else {}
105
+
106
+ confidence_raw = data.get("confidence")
107
+ confidence: Optional[float] = None
108
+ if confidence_raw is not None:
109
+ try:
110
+ confidence = float(confidence_raw)
111
+ except Exception:
112
+ confidence = None
113
+
114
+ return cls(
115
+ subject=subject,
116
+ predicate=predicate,
117
+ object=obj,
118
+ scope=scope,
119
+ owner_id=owner_id.strip() if isinstance(owner_id, str) and owner_id.strip() else None,
120
+ observed_at=observed_at.strip() or utc_now_iso_seconds(),
121
+ valid_from=data.get("valid_from") if isinstance(data.get("valid_from"), str) else None,
122
+ valid_until=data.get("valid_until") if isinstance(data.get("valid_until"), str) else None,
123
+ confidence=confidence,
124
+ provenance=dict(provenance),
125
+ attributes=dict(attributes),
126
+ )
@@ -0,0 +1,90 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Iterable, List, Optional, Protocol
5
+
6
+ from .models import TripleAssertion, canonicalize_term
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class TripleQuery:
11
+ subject: Optional[str] = None
12
+ predicate: Optional[str] = None
13
+ object: Optional[str] = None
14
+ scope: Optional[str] = None # run|session|global
15
+ owner_id: Optional[str] = None # owner identifier within the selected scope
16
+
17
+ since: Optional[str] = None # observed_at >= since
18
+ until: Optional[str] = None # observed_at <= until
19
+ active_at: Optional[str] = None # valid_from/valid_until window intersection
20
+
21
+ # Optional semantic search:
22
+ # - query_text requires a store-configured embedder
23
+ # - query_vector bypasses embedding generation
24
+ query_text: Optional[str] = None
25
+ query_vector: Optional[List[float]] = None
26
+ vector_column: str = "vector"
27
+ min_score: Optional[float] = None # cosine similarity threshold (semantic queries)
28
+
29
+ limit: int = 100
30
+ order: str = "desc" # asc|desc by observed_at
31
+
32
+ def __post_init__(self) -> None:
33
+ # Canonicalize KG terms once (trim + lower; stable exact match).
34
+ if isinstance(self.subject, str):
35
+ s = canonicalize_term(self.subject)
36
+ object.__setattr__(self, "subject", s if s else None)
37
+ if isinstance(self.predicate, str):
38
+ p = canonicalize_term(self.predicate)
39
+ object.__setattr__(self, "predicate", p if p else None)
40
+ if isinstance(self.object, str):
41
+ o = canonicalize_term(self.object)
42
+ object.__setattr__(self, "object", o if o else None)
43
+
44
+ if isinstance(self.scope, str):
45
+ sc = str(self.scope or "").strip().lower()
46
+ object.__setattr__(self, "scope", sc if sc else None)
47
+
48
+ # Keep metadata trimmed without changing semantics.
49
+ if isinstance(self.owner_id, str):
50
+ oid = self.owner_id.strip()
51
+ object.__setattr__(self, "owner_id", oid if oid else None)
52
+ if isinstance(self.since, str):
53
+ s = self.since.strip()
54
+ object.__setattr__(self, "since", s if s else None)
55
+ if isinstance(self.until, str):
56
+ u = self.until.strip()
57
+ object.__setattr__(self, "until", u if u else None)
58
+ if isinstance(self.active_at, str):
59
+ a = self.active_at.strip()
60
+ object.__setattr__(self, "active_at", a if a else None)
61
+
62
+ # For semantic retrieval, normalize text input once.
63
+ if isinstance(self.query_text, str):
64
+ qt = str(self.query_text or "").strip()
65
+ object.__setattr__(self, "query_text", qt if qt else None)
66
+
67
+ if isinstance(self.vector_column, str):
68
+ vc = self.vector_column.strip() or "vector"
69
+ object.__setattr__(self, "vector_column", vc)
70
+
71
+ if self.min_score is not None:
72
+ try:
73
+ ms = float(self.min_score)
74
+ except Exception:
75
+ ms = None
76
+ if ms is None or not (ms == ms): # NaN
77
+ object.__setattr__(self, "min_score", None)
78
+ else:
79
+ object.__setattr__(self, "min_score", ms)
80
+
81
+ if isinstance(self.order, str):
82
+ object.__setattr__(self, "order", self.order.strip().lower() or "desc")
83
+
84
+
85
+ class TripleStore(Protocol):
86
+ def add(self, assertions: Iterable[TripleAssertion]) -> List[str]: ...
87
+
88
+ def query(self, q: TripleQuery) -> List[TripleAssertion]: ...
89
+
90
+ def close(self) -> None: ...
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.4
2
+ Name: AbstractMemory
3
+ Version: 0.0.2
4
+ Summary: AbstractMemory: temporal, provenance-aware semantic memory primitives for AbstractFramework.
5
+ Project-URL: AbstractFramework (monorepo), https://github.com/lpalbou/abstractframework
6
+ Author: Laurent-Philippe Albou
7
+ License: MIT
8
+ License-File: LICENSE
9
+ Keywords: agents,knowledge-graph,memory,temporal,triples
10
+ Classifier: Development Status :: 1 - Planning
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Requires-Python: >=3.10
20
+ Provides-Extra: all
21
+ Requires-Dist: lancedb; extra == 'all'
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
24
+ Provides-Extra: lancedb
25
+ Requires-Dist: lancedb; extra == 'lancedb'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # AbstractMemory (early / WIP)
29
+
30
+ AbstractMemory is a small Python library for **append-only, temporal, provenance-aware triple assertions** with a **deterministic query API** and optional **vector/semantic retrieval**.
31
+
32
+ ## Status
33
+ - This package is still early: API and storage details may change.
34
+ - Implemented today: `TripleAssertion`, `TripleQuery`, `InMemoryTripleStore`, `LanceDBTripleStore`, `AbstractGatewayTextEmbedder`.
35
+ - Source of truth for exports: [`src/abstractmemory/__init__.py`](src/abstractmemory/__init__.py)
36
+ - Requires Python 3.10+ (see [`pyproject.toml`](pyproject.toml))
37
+
38
+ ## Install
39
+
40
+ From PyPI (when published):
41
+
42
+ ```bash
43
+ python -m pip install AbstractMemory
44
+ ```
45
+
46
+ Optional persistent backend + vector search:
47
+
48
+ ```bash
49
+ python -m pip install "AbstractMemory[lancedb]"
50
+ ```
51
+
52
+ Note: the distribution name is `AbstractMemory` (pip is case-insensitive). The import name is `abstractmemory`.
53
+
54
+ From source (recommended for this monorepo package):
55
+
56
+ ```bash
57
+ python -m pip install -e .
58
+ ```
59
+
60
+ Optional persistent backend + vector search:
61
+
62
+ ```bash
63
+ python -m pip install -e ".[lancedb]"
64
+ ```
65
+
66
+ ## Quick example
67
+
68
+ ```python
69
+ from abstractmemory import InMemoryTripleStore, TripleAssertion, TripleQuery
70
+
71
+ store = InMemoryTripleStore()
72
+ store.add(
73
+ [
74
+ TripleAssertion(
75
+ subject="Scrooge",
76
+ predicate="related_to",
77
+ object="Christmas",
78
+ scope="session",
79
+ owner_id="sess-1",
80
+ provenance={"span_id": "span_123"},
81
+ )
82
+ ]
83
+ )
84
+
85
+ hits = store.query(TripleQuery(subject="scrooge", scope="session", owner_id="sess-1"))
86
+ assert hits[0].object == "christmas" # terms are canonicalized (trim + lowercase)
87
+ ```
88
+
89
+ ## Documentation
90
+
91
+ - Getting started: [`docs/getting-started.md`](docs/getting-started.md)
92
+ - FAQ: [`docs/faq.md`](docs/faq.md)
93
+ - Architecture (with diagrams): [`docs/architecture.md`](docs/architecture.md)
94
+ - Stores/backends: [`docs/stores.md`](docs/stores.md)
95
+ - API reference: [`docs/api.md`](docs/api.md)
96
+ - Development: [`docs/development.md`](docs/development.md)
97
+
98
+ ## Project
99
+
100
+ - Changelog: [`CHANGELOG.md`](CHANGELOG.md)
101
+ - Contributing: [`CONTRIBUTING.md`](CONTRIBUTING.md)
102
+ - Security: [`SECURITY.md`](SECURITY.md)
103
+ - License: [`LICENSE`](LICENSE)
104
+ - Acknowledgments: [`ACKNOWLEDGMENTS.md`](ACKNOWLEDGMENTS.md)
105
+
106
+ ## Design principles (v0)
107
+
108
+ - **Triples-first** representation with temporal fields (`observed_at`, `valid_from`, `valid_until`).
109
+ - Implemented in `TripleAssertion`: [`src/abstractmemory/models.py`](src/abstractmemory/models.py)
110
+ - **Append-only**: represent updates by adding a new assertion with fresh provenance.
111
+ - Implemented by both stores: [`src/abstractmemory/in_memory_store.py`](src/abstractmemory/in_memory_store.py), [`src/abstractmemory/lancedb_store.py`](src/abstractmemory/lancedb_store.py)
112
+ - **No direct AbstractCore dependency**: embeddings can be obtained via an AbstractGateway HTTP API.
113
+ - Implemented by `AbstractGatewayTextEmbedder`: [`src/abstractmemory/embeddings.py`](src/abstractmemory/embeddings.py)
@@ -0,0 +1,10 @@
1
+ abstractmemory/__init__.py,sha256=diBIm5YRxFmn-rKSu6CVW6rZGEuiDFWi1mwK8Jdwj84,425
2
+ abstractmemory/embeddings.py,sha256=QpBDNNTr1TVzhZoSPoLNa5XASAcQfUvAFKrHlEdo9Zw,3741
3
+ abstractmemory/in_memory_store.py,sha256=ZkQ7KNznVkJqHgzZDAg_OCDtZi1DUJnCFkqxWGtRCEo,6861
4
+ abstractmemory/lancedb_store.py,sha256=4D6hiabFjbB65VLyIjX9CA57NyvMU9p3GMGdXr3s4vY,10650
5
+ abstractmemory/models.py,sha256=35dAm8ofIZVzn6mtd28SfRYesHL40AJ3Z3UfZHWUsvs,5335
6
+ abstractmemory/store.py,sha256=rG_DtBL_InnTgxybfzmLH6R6GdvVeEE7dwnI1g-ksu0,3534
7
+ abstractmemory-0.0.2.dist-info/METADATA,sha256=sWmn-qhXOAkcNqZChs7g7xftb-gAhdIEXUCJf0wvNSw,4091
8
+ abstractmemory-0.0.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
9
+ abstractmemory-0.0.2.dist-info/licenses/LICENSE,sha256=7t9ARuV3s_QVhJahnkFBnX_E5GPLW8BE9geplO1CGko,1079
10
+ abstractmemory-0.0.2.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2025 AbstractMemory Team
3
+ Copyright (c) 2024 Laurent-Philippe Albou
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,94 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: AbstractMemory
3
- Version: 0.0.1
4
- Summary: PLACEHOLDER: Memory system for transforming stateless LLMs into stateful LLMs - primarily designed for AbstractLLM integration
5
- Author-email: AbstractMemory Team <contact@example.com>
6
- Maintainer-email: AbstractMemory Team <contact@example.com>
7
- License-Expression: MIT
8
- Project-URL: Homepage, https://github.com/abstractmemory/abstractmemory
9
- Project-URL: Documentation, https://github.com/abstractmemory/abstractmemory#readme
10
- Project-URL: Repository, https://github.com/abstractmemory/abstractmemory
11
- Project-URL: Bug Reports, https://github.com/abstractmemory/abstractmemory/issues
12
- Keywords: llm,memory,stateful,ai,placeholder
13
- Classifier: Development Status :: 1 - Planning
14
- Classifier: Intended Audience :: Developers
15
- Classifier: Programming Language :: Python :: 3
16
- Classifier: Programming Language :: Python :: 3.8
17
- Classifier: Programming Language :: Python :: 3.9
18
- Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3.11
20
- Classifier: Programming Language :: Python :: 3.12
21
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
- Requires-Python: >=3.8
24
- Description-Content-Type: text/markdown
25
- License-File: LICENSE
26
- Dynamic: license-file
27
-
28
- # AbstractMemory - PLACEHOLDER PROJECT
29
-
30
- ⚠️ **WARNING: This is a placeholder package** ⚠️
31
-
32
- ## Overview
33
-
34
- AbstractMemory is a placeholder package that reserves the name on PyPI for a future memory system designed to transform stateless LLMs into stateful LLMs.
35
-
36
- ## Current Status
37
-
38
- **This package is currently a PLACEHOLDER and should NOT be used in production.**
39
-
40
- The actual memory system implementation is currently integrated within the AbstractLLM project. This separate package exists to:
41
-
42
- 1. **Reserve the PyPI name** for future modularization
43
- 2. **Enable clean separation of concerns** when the code is extracted from AbstractLLM
44
- 3. **Facilitate better evolution and maintenance** of the memory system as a standalone component
45
- 4. **Allow reusability** across different LLM frameworks in the future
46
-
47
- ## Future Vision
48
-
49
- AbstractMemory will provide:
50
-
51
- - **Stateful Memory Management**: Transform stateless LLMs into stateful systems
52
- - **Primary AbstractLLM Integration**: Seamless integration with AbstractLLM
53
- - **Modular Architecture**: Clean separation from core LLM functionality
54
- - **Extensible Framework**: Support for various memory strategies and backends
55
-
56
- ## Installation
57
-
58
- ```bash
59
- pip install AbstractMemory
60
- ```
61
-
62
- ## Usage
63
-
64
- Currently, attempting to use any functionality will raise a `PlaceholderError`:
65
-
66
- ```python
67
- import abstractmemory
68
-
69
- # This will raise PlaceholderError
70
- abstractmemory.placeholder_warning()
71
- ```
72
-
73
- ## Development Timeline
74
-
75
- The actual implementation will be extracted and modularized from AbstractLLM when:
76
- - The AbstractLLM memory system reaches sufficient maturity
77
- - Clean interfaces are established
78
- - Comprehensive testing framework is in place
79
-
80
- ## Contributing
81
-
82
- This is a placeholder project. For memory-related contributions, please refer to the AbstractLLM project until the code is modularized.
83
-
84
- ## License
85
-
86
- MIT License - See LICENSE file for details.
87
-
88
- ## Contact
89
-
90
- For questions about future development plans, please refer to the AbstractLLM project documentation.
91
-
92
- ---
93
-
94
- **Remember: This is a placeholder. The real implementation is coming soon!**
@@ -1,6 +0,0 @@
1
- abstractmemory/__init__.py,sha256=56ZUm3WI5FFZRBoPwwAC4Zj0lK1vglE7RTj3a6yAFFY,1362
2
- abstractmemory-0.0.1.dist-info/licenses/LICENSE,sha256=lPn7wpbMAgGyv2OAhgHtW_bZHq4Hnm7Gx28-8m82Nt4,1076
3
- abstractmemory-0.0.1.dist-info/METADATA,sha256=2kDPyAPoE1e7XGR0_zvRWYp2FmbkHDJRN3vqv17m2XA,3420
4
- abstractmemory-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
- abstractmemory-0.0.1.dist-info/top_level.txt,sha256=ALpTfrLlzRBy55aam5M8YuOi-4i_shfsq1DcgTR6_NQ,15
6
- abstractmemory-0.0.1.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- abstractmemory