AbstractMemory 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractmemory/__init__.py +15 -41
- abstractmemory/embeddings.py +96 -0
- abstractmemory/in_memory_store.py +186 -0
- abstractmemory/lancedb_store.py +274 -0
- abstractmemory/models.py +126 -0
- abstractmemory/store.py +90 -0
- abstractmemory-0.0.2.dist-info/METADATA +113 -0
- abstractmemory-0.0.2.dist-info/RECORD +10 -0
- {abstractmemory-0.0.1.dist-info → abstractmemory-0.0.2.dist-info}/WHEEL +1 -2
- {abstractmemory-0.0.1.dist-info → abstractmemory-0.0.2.dist-info}/licenses/LICENSE +1 -1
- abstractmemory-0.0.1.dist-info/METADATA +0 -94
- abstractmemory-0.0.1.dist-info/RECORD +0 -6
- abstractmemory-0.0.1.dist-info/top_level.txt +0 -1
abstractmemory/__init__.py
CHANGED
|
@@ -1,41 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
__version__ = "0.0.1"
|
|
19
|
-
__author__ = "AbstractMemory Team"
|
|
20
|
-
__email__ = "contact@example.com"
|
|
21
|
-
|
|
22
|
-
# Placeholder exception to prevent accidental usage
|
|
23
|
-
class PlaceholderError(Exception):
|
|
24
|
-
"""Raised when attempting to use placeholder functionality."""
|
|
25
|
-
pass
|
|
26
|
-
|
|
27
|
-
def placeholder_warning():
|
|
28
|
-
"""
|
|
29
|
-
Warn users that this is a placeholder package.
|
|
30
|
-
|
|
31
|
-
Raises:
|
|
32
|
-
PlaceholderError: Always raised to prevent usage
|
|
33
|
-
"""
|
|
34
|
-
raise PlaceholderError(
|
|
35
|
-
"AbstractMemory is currently a placeholder package. "
|
|
36
|
-
"The actual memory system implementation is part of AbstractLLM. "
|
|
37
|
-
"This package reserves the name for future modularization."
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
# Make it clear this is a placeholder
|
|
41
|
-
__all__ = ["placeholder_warning", "PlaceholderError", "__version__"]
|
|
1
|
+
from .models import TripleAssertion
|
|
2
|
+
from .embeddings import AbstractGatewayTextEmbedder, TextEmbedder
|
|
3
|
+
from .in_memory_store import InMemoryTripleStore
|
|
4
|
+
from .lancedb_store import LanceDBTripleStore
|
|
5
|
+
from .store import TripleStore, TripleQuery
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"AbstractGatewayTextEmbedder",
|
|
9
|
+
"InMemoryTripleStore",
|
|
10
|
+
"LanceDBTripleStore",
|
|
11
|
+
"TextEmbedder",
|
|
12
|
+
"TripleAssertion",
|
|
13
|
+
"TripleQuery",
|
|
14
|
+
"TripleStore",
|
|
15
|
+
]
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import List, Protocol, Sequence
|
|
5
|
+
from urllib.error import HTTPError, URLError
|
|
6
|
+
from urllib.request import Request, urlopen
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TextEmbedder(Protocol):
|
|
10
|
+
"""Minimal text embedding interface used by AbstractMemory stores."""
|
|
11
|
+
|
|
12
|
+
def embed_texts(self, texts: Sequence[str]) -> List[List[float]]: ...
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AbstractGatewayTextEmbedder:
|
|
16
|
+
"""Text embedder that calls AbstractGateway's embeddings API.
|
|
17
|
+
|
|
18
|
+
AbstractMemory intentionally does not depend on AbstractCore directly. The gateway is responsible for:
|
|
19
|
+
- selecting the embedding provider/model (singleton per gateway instance)
|
|
20
|
+
- generating embeddings via AbstractRuntime+AbstractCore integration
|
|
21
|
+
- enforcing a stable embedding space
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
*,
|
|
27
|
+
base_url: str,
|
|
28
|
+
auth_token: str | None = None,
|
|
29
|
+
endpoint_path: str = "/api/gateway/embeddings",
|
|
30
|
+
timeout_s: float = 30.0,
|
|
31
|
+
) -> None:
|
|
32
|
+
root = str(base_url or "").strip().rstrip("/")
|
|
33
|
+
if not root:
|
|
34
|
+
raise ValueError("base_url is required")
|
|
35
|
+
path = str(endpoint_path or "").strip()
|
|
36
|
+
if not path.startswith("/"):
|
|
37
|
+
path = "/" + path
|
|
38
|
+
self._url = root + path
|
|
39
|
+
self._timeout_s = float(timeout_s)
|
|
40
|
+
self._headers = {"Content-Type": "application/json"}
|
|
41
|
+
if isinstance(auth_token, str) and auth_token.strip():
|
|
42
|
+
self._headers["Authorization"] = f"Bearer {auth_token.strip()}"
|
|
43
|
+
|
|
44
|
+
def embed_texts(self, texts: Sequence[str]) -> List[List[float]]:
|
|
45
|
+
items = [str(t or "") for t in texts]
|
|
46
|
+
payload = {"input": items}
|
|
47
|
+
req = Request(
|
|
48
|
+
self._url,
|
|
49
|
+
data=json.dumps(payload, ensure_ascii=False).encode("utf-8"),
|
|
50
|
+
headers=dict(self._headers),
|
|
51
|
+
method="POST",
|
|
52
|
+
)
|
|
53
|
+
try:
|
|
54
|
+
with urlopen(req, timeout=self._timeout_s) as resp:
|
|
55
|
+
raw = resp.read().decode("utf-8")
|
|
56
|
+
except HTTPError as e:
|
|
57
|
+
detail = ""
|
|
58
|
+
try:
|
|
59
|
+
detail = e.read().decode("utf-8")
|
|
60
|
+
except Exception:
|
|
61
|
+
detail = ""
|
|
62
|
+
hint = ""
|
|
63
|
+
if int(getattr(e, "code", 0) or 0) == 401:
|
|
64
|
+
hint = (
|
|
65
|
+
" (Set `ABSTRACTGATEWAY_AUTH_TOKEN` / `ABSTRACTFLOW_GATEWAY_AUTH_TOKEN` "
|
|
66
|
+
"for the caller process, or pass a Bearer token to the gateway embeddings endpoint.)"
|
|
67
|
+
)
|
|
68
|
+
raise RuntimeError(f"Gateway embeddings HTTP {e.code}: {detail or e.reason}{hint}") from e
|
|
69
|
+
except URLError as e:
|
|
70
|
+
raise RuntimeError(f"Gateway embeddings request failed: {e}") from e
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
data = json.loads(raw)
|
|
74
|
+
except Exception as e:
|
|
75
|
+
raise RuntimeError(f"Gateway embeddings returned invalid JSON: {e}") from e
|
|
76
|
+
|
|
77
|
+
rows = data.get("data") if isinstance(data, dict) else None
|
|
78
|
+
if not isinstance(rows, list):
|
|
79
|
+
raise RuntimeError("Gateway embeddings response missing 'data' list")
|
|
80
|
+
|
|
81
|
+
# Preserve order via `index` when present.
|
|
82
|
+
parsed: list[tuple[int, List[float]]] = []
|
|
83
|
+
for i, row_any in enumerate(rows):
|
|
84
|
+
row = row_any if isinstance(row_any, dict) else {}
|
|
85
|
+
idx = row.get("index")
|
|
86
|
+
try:
|
|
87
|
+
index = int(idx) if idx is not None else i
|
|
88
|
+
except Exception:
|
|
89
|
+
index = i
|
|
90
|
+
emb = row.get("embedding")
|
|
91
|
+
if not isinstance(emb, list):
|
|
92
|
+
raise RuntimeError("Gateway embeddings response contains non-list embedding")
|
|
93
|
+
parsed.append((index, [float(x) for x in emb]))
|
|
94
|
+
|
|
95
|
+
parsed.sort(key=lambda t: t[0])
|
|
96
|
+
return [v for _, v in parsed]
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
import uuid
|
|
5
|
+
from typing import Any, Iterable, List, Optional, Sequence
|
|
6
|
+
|
|
7
|
+
from .embeddings import TextEmbedder
|
|
8
|
+
from .models import TripleAssertion, normalize_term
|
|
9
|
+
from .store import TripleQuery
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _canonical_text(a: TripleAssertion) -> str:
|
|
13
|
+
base = f"{a.subject} {a.predicate} {a.object}".strip()
|
|
14
|
+
attrs = a.attributes if isinstance(a.attributes, dict) else {}
|
|
15
|
+
|
|
16
|
+
parts: list[str] = [base]
|
|
17
|
+
st = attrs.get("subject_type")
|
|
18
|
+
ot = attrs.get("object_type")
|
|
19
|
+
if isinstance(st, str) and st.strip():
|
|
20
|
+
parts.append(f"subject_type: {st.strip()}")
|
|
21
|
+
if isinstance(ot, str) and ot.strip():
|
|
22
|
+
parts.append(f"object_type: {ot.strip()}")
|
|
23
|
+
|
|
24
|
+
eq = attrs.get("evidence_quote")
|
|
25
|
+
if isinstance(eq, str) and eq.strip():
|
|
26
|
+
parts.append(f"evidence: {eq.strip()}")
|
|
27
|
+
|
|
28
|
+
ctx = attrs.get("original_context")
|
|
29
|
+
if isinstance(ctx, str) and ctx.strip():
|
|
30
|
+
ctx2 = ctx.strip()
|
|
31
|
+
if len(ctx2) > 400:
|
|
32
|
+
ctx2 = ctx2[:400] + "…"
|
|
33
|
+
parts.append(f"context: {ctx2}")
|
|
34
|
+
|
|
35
|
+
return "\n".join(parts)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _cosine(a: Sequence[float], b: Sequence[float]) -> float:
|
|
39
|
+
# Defensive: handle empty vectors.
|
|
40
|
+
if not a or not b:
|
|
41
|
+
return 0.0
|
|
42
|
+
n = min(len(a), len(b))
|
|
43
|
+
dot = 0.0
|
|
44
|
+
na = 0.0
|
|
45
|
+
nb = 0.0
|
|
46
|
+
for i in range(n):
|
|
47
|
+
ax = float(a[i])
|
|
48
|
+
bx = float(b[i])
|
|
49
|
+
dot += ax * bx
|
|
50
|
+
na += ax * ax
|
|
51
|
+
nb += bx * bx
|
|
52
|
+
if na <= 0.0 or nb <= 0.0:
|
|
53
|
+
return 0.0
|
|
54
|
+
return dot / (math.sqrt(na) * math.sqrt(nb))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class InMemoryTripleStore:
|
|
58
|
+
"""A dependency-free triple store (best-effort).
|
|
59
|
+
|
|
60
|
+
Notes:
|
|
61
|
+
- Intended for tests/dev and hosts without LanceDB installed.
|
|
62
|
+
- Append-only: updates are represented as new assertions.
|
|
63
|
+
- Vector search is optional and stores vectors in-memory only.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
*,
|
|
69
|
+
embedder: Optional[TextEmbedder] = None,
|
|
70
|
+
vector_column: str = "vector",
|
|
71
|
+
) -> None:
|
|
72
|
+
self._embedder = embedder
|
|
73
|
+
self._vector_column = str(vector_column or "vector")
|
|
74
|
+
self._rows: list[dict[str, Any]] = []
|
|
75
|
+
|
|
76
|
+
def close(self) -> None:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
def add(self, assertions: Iterable[TripleAssertion]) -> List[str]:
|
|
80
|
+
pending: list[TripleAssertion] = [a for a in assertions]
|
|
81
|
+
if not pending:
|
|
82
|
+
return []
|
|
83
|
+
|
|
84
|
+
vectors: Optional[List[List[float]]] = None
|
|
85
|
+
if self._embedder is not None:
|
|
86
|
+
vectors = self._embedder.embed_texts([_canonical_text(a) for a in pending])
|
|
87
|
+
|
|
88
|
+
ids: list[str] = []
|
|
89
|
+
for i, a in enumerate(pending):
|
|
90
|
+
assertion_id = str(uuid.uuid4())
|
|
91
|
+
ids.append(assertion_id)
|
|
92
|
+
row: dict[str, Any] = {"assertion_id": assertion_id, "assertion": a}
|
|
93
|
+
if vectors is not None and i < len(vectors):
|
|
94
|
+
row[self._vector_column] = vectors[i]
|
|
95
|
+
self._rows.append(row)
|
|
96
|
+
return ids
|
|
97
|
+
|
|
98
|
+
def query(self, q: TripleQuery) -> List[TripleAssertion]:
|
|
99
|
+
raw_limit = int(q.limit) if isinstance(q.limit, int) else 100
|
|
100
|
+
limit: Optional[int]
|
|
101
|
+
if raw_limit <= 0:
|
|
102
|
+
limit = None
|
|
103
|
+
else:
|
|
104
|
+
limit = max(1, raw_limit)
|
|
105
|
+
|
|
106
|
+
def _match(a: TripleAssertion) -> bool:
|
|
107
|
+
if q.subject and normalize_term(a.subject) != normalize_term(q.subject):
|
|
108
|
+
return False
|
|
109
|
+
if q.predicate and normalize_term(a.predicate) != normalize_term(q.predicate):
|
|
110
|
+
return False
|
|
111
|
+
if q.object and normalize_term(a.object) != normalize_term(q.object):
|
|
112
|
+
return False
|
|
113
|
+
if q.scope and a.scope != q.scope:
|
|
114
|
+
return False
|
|
115
|
+
if q.owner_id and (a.owner_id or "") != q.owner_id:
|
|
116
|
+
return False
|
|
117
|
+
if q.since and (a.observed_at or "") < q.since:
|
|
118
|
+
return False
|
|
119
|
+
if q.until and (a.observed_at or "") > q.until:
|
|
120
|
+
return False
|
|
121
|
+
if q.active_at:
|
|
122
|
+
at = q.active_at
|
|
123
|
+
if a.valid_from and a.valid_from > at:
|
|
124
|
+
return False
|
|
125
|
+
if a.valid_until and a.valid_until <= at:
|
|
126
|
+
return False
|
|
127
|
+
return True
|
|
128
|
+
|
|
129
|
+
rows = [r for r in self._rows if isinstance(r, dict) and isinstance(r.get("assertion"), TripleAssertion)]
|
|
130
|
+
filtered: list[dict[str, Any]] = []
|
|
131
|
+
for r in rows:
|
|
132
|
+
a = r["assertion"]
|
|
133
|
+
if _match(a):
|
|
134
|
+
filtered.append(r)
|
|
135
|
+
|
|
136
|
+
query_vector: Optional[Sequence[float]] = None
|
|
137
|
+
if q.query_vector:
|
|
138
|
+
query_vector = q.query_vector
|
|
139
|
+
elif q.query_text:
|
|
140
|
+
if self._embedder is None:
|
|
141
|
+
raise ValueError("query_text requires a configured embedder (vector search); keyword fallback is disabled")
|
|
142
|
+
query_vector = self._embedder.embed_texts([q.query_text])[0]
|
|
143
|
+
|
|
144
|
+
if query_vector is not None:
|
|
145
|
+
ranked: list[tuple[float, TripleAssertion]] = []
|
|
146
|
+
for r in filtered:
|
|
147
|
+
v = r.get(q.vector_column or self._vector_column)
|
|
148
|
+
if not isinstance(v, list):
|
|
149
|
+
continue
|
|
150
|
+
try:
|
|
151
|
+
score = _cosine(query_vector, v)
|
|
152
|
+
except Exception:
|
|
153
|
+
score = 0.0
|
|
154
|
+
if q.min_score is not None and score < float(q.min_score):
|
|
155
|
+
continue
|
|
156
|
+
ranked.append((score, r["assertion"]))
|
|
157
|
+
ranked.sort(key=lambda t: t[0], reverse=True)
|
|
158
|
+
|
|
159
|
+
out: list[TripleAssertion] = []
|
|
160
|
+
for score, a in (ranked if limit is None else ranked[:limit]):
|
|
161
|
+
attrs = dict(a.attributes) if isinstance(a.attributes, dict) else {}
|
|
162
|
+
retrieval = attrs.get("_retrieval") if isinstance(attrs.get("_retrieval"), dict) else {}
|
|
163
|
+
retrieval2 = dict(retrieval)
|
|
164
|
+
retrieval2["score"] = float(score)
|
|
165
|
+
retrieval2.setdefault("metric", "cosine")
|
|
166
|
+
attrs["_retrieval"] = retrieval2
|
|
167
|
+
out.append(
|
|
168
|
+
TripleAssertion(
|
|
169
|
+
subject=a.subject,
|
|
170
|
+
predicate=a.predicate,
|
|
171
|
+
object=a.object,
|
|
172
|
+
scope=a.scope,
|
|
173
|
+
owner_id=a.owner_id,
|
|
174
|
+
observed_at=a.observed_at,
|
|
175
|
+
valid_from=a.valid_from,
|
|
176
|
+
valid_until=a.valid_until,
|
|
177
|
+
confidence=a.confidence,
|
|
178
|
+
provenance=dict(a.provenance),
|
|
179
|
+
attributes=attrs,
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
return out
|
|
183
|
+
|
|
184
|
+
out: list[TripleAssertion] = [r["assertion"] for r in filtered]
|
|
185
|
+
out.sort(key=lambda a: a.observed_at or "", reverse=(str(q.order).lower() != "asc"))
|
|
186
|
+
return out if limit is None else out[:limit]
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import uuid
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, Iterable, List, Optional, Sequence
|
|
7
|
+
|
|
8
|
+
from .embeddings import TextEmbedder
|
|
9
|
+
from .models import TripleAssertion, normalize_term
|
|
10
|
+
from .store import TripleQuery
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _import_lancedb():
|
|
14
|
+
try:
|
|
15
|
+
import lancedb # type: ignore
|
|
16
|
+
|
|
17
|
+
return lancedb
|
|
18
|
+
except Exception as e: # pragma: no cover
|
|
19
|
+
raise ImportError(
|
|
20
|
+
"LanceDB support requires `lancedb` (and its dependencies). "
|
|
21
|
+
"Install it in your environment (offline/local install is fine), e.g. `pip install lancedb`."
|
|
22
|
+
) from e
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _escape_sql_string(value: str) -> str:
|
|
26
|
+
# LanceDB uses SQL-like filter strings; escape single quotes.
|
|
27
|
+
return str(value).replace("'", "''")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _build_where_clause(q: TripleQuery) -> str:
|
|
31
|
+
parts: list[str] = []
|
|
32
|
+
|
|
33
|
+
if q.subject:
|
|
34
|
+
parts.append(f"lower(subject) = '{_escape_sql_string(normalize_term(q.subject))}'")
|
|
35
|
+
if q.predicate:
|
|
36
|
+
parts.append(f"lower(predicate) = '{_escape_sql_string(normalize_term(q.predicate))}'")
|
|
37
|
+
if q.object:
|
|
38
|
+
parts.append(f"lower(object) = '{_escape_sql_string(normalize_term(q.object))}'")
|
|
39
|
+
if q.scope:
|
|
40
|
+
parts.append(f"scope = '{_escape_sql_string(q.scope)}'")
|
|
41
|
+
if q.owner_id:
|
|
42
|
+
parts.append(f"owner_id = '{_escape_sql_string(q.owner_id)}'")
|
|
43
|
+
|
|
44
|
+
if q.since:
|
|
45
|
+
parts.append(f"observed_at >= '{_escape_sql_string(q.since)}'")
|
|
46
|
+
if q.until:
|
|
47
|
+
parts.append(f"observed_at <= '{_escape_sql_string(q.until)}'")
|
|
48
|
+
|
|
49
|
+
if q.active_at:
|
|
50
|
+
at = _escape_sql_string(q.active_at)
|
|
51
|
+
parts.append(f"(valid_from IS NULL OR valid_from <= '{at}')")
|
|
52
|
+
parts.append(f"(valid_until IS NULL OR valid_until > '{at}')")
|
|
53
|
+
|
|
54
|
+
return " AND ".join(parts)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _canonical_text(a: TripleAssertion) -> str:
|
|
58
|
+
# Stable, information-rich representation for embedding retrieval.
|
|
59
|
+
#
|
|
60
|
+
# Why include more than "s p o":
|
|
61
|
+
# - semantic queries often refer to details that aren't present in the triple surface form
|
|
62
|
+
# - extractor-provided evidence/context improves retrieval selectivity without requiring
|
|
63
|
+
# a separate episodic document store in v0
|
|
64
|
+
base = f"{a.subject} {a.predicate} {a.object}".strip()
|
|
65
|
+
attrs = a.attributes if isinstance(a.attributes, dict) else {}
|
|
66
|
+
|
|
67
|
+
parts: list[str] = [base]
|
|
68
|
+
st = attrs.get("subject_type")
|
|
69
|
+
ot = attrs.get("object_type")
|
|
70
|
+
if isinstance(st, str) and st.strip():
|
|
71
|
+
parts.append(f"subject_type: {st.strip()}")
|
|
72
|
+
if isinstance(ot, str) and ot.strip():
|
|
73
|
+
parts.append(f"object_type: {ot.strip()}")
|
|
74
|
+
|
|
75
|
+
eq = attrs.get("evidence_quote")
|
|
76
|
+
if isinstance(eq, str) and eq.strip():
|
|
77
|
+
parts.append(f"evidence: {eq.strip()}")
|
|
78
|
+
|
|
79
|
+
ctx = attrs.get("original_context")
|
|
80
|
+
if isinstance(ctx, str) and ctx.strip():
|
|
81
|
+
ctx2 = ctx.strip()
|
|
82
|
+
if len(ctx2) > 400:
|
|
83
|
+
ctx2 = ctx2[:400] + "…"
|
|
84
|
+
parts.append(f"context: {ctx2}")
|
|
85
|
+
|
|
86
|
+
return "\n".join(parts)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _loads_json(raw: object) -> dict:
|
|
90
|
+
if not isinstance(raw, str) or not raw:
|
|
91
|
+
return {}
|
|
92
|
+
try:
|
|
93
|
+
parsed = json.loads(raw)
|
|
94
|
+
return parsed if isinstance(parsed, dict) else {}
|
|
95
|
+
except Exception:
|
|
96
|
+
return {}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class LanceDBTripleStore:
|
|
100
|
+
"""LanceDB-backed append-only triple store with optional vector search.
|
|
101
|
+
|
|
102
|
+
Notes:
|
|
103
|
+
- Append-only: updates are represented as new assertions.
|
|
104
|
+
- Vector search is optional and requires `embedder` (for query_text) or query_vector.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def __init__(
|
|
108
|
+
self,
|
|
109
|
+
uri: str | Path,
|
|
110
|
+
*,
|
|
111
|
+
table_name: str = "triple_assertions",
|
|
112
|
+
embedder: Optional[TextEmbedder] = None,
|
|
113
|
+
vector_column: str = "vector",
|
|
114
|
+
):
|
|
115
|
+
self._lancedb = _import_lancedb()
|
|
116
|
+
self._db = self._lancedb.connect(str(uri))
|
|
117
|
+
self._table_name = str(table_name)
|
|
118
|
+
self._vector_column = str(vector_column or "vector")
|
|
119
|
+
self._embedder = embedder
|
|
120
|
+
|
|
121
|
+
self._table = None
|
|
122
|
+
try:
|
|
123
|
+
# `table_names()` is deprecated upstream but is stable and sufficient for local stores.
|
|
124
|
+
if self._table_name in set(self._db.table_names()):
|
|
125
|
+
self._table = self._db.open_table(self._table_name)
|
|
126
|
+
except Exception:
|
|
127
|
+
self._table = None
|
|
128
|
+
|
|
129
|
+
def close(self) -> None:
|
|
130
|
+
# LanceDB tables/connections are managed by the library; nothing required here.
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
def add(self, assertions: Iterable[TripleAssertion]) -> List[str]:
|
|
134
|
+
rows: list[dict[str, Any]] = []
|
|
135
|
+
ids: List[str] = []
|
|
136
|
+
pending: List[TripleAssertion] = []
|
|
137
|
+
|
|
138
|
+
for a in assertions:
|
|
139
|
+
pending.append(a)
|
|
140
|
+
|
|
141
|
+
if not pending:
|
|
142
|
+
return []
|
|
143
|
+
|
|
144
|
+
# Always store a canonical text column (useful for debugging and future indexing).
|
|
145
|
+
texts: List[str] = [_canonical_text(a) for a in pending]
|
|
146
|
+
vectors: Optional[List[List[float]]] = None
|
|
147
|
+
if self._embedder is not None:
|
|
148
|
+
vectors = self._embedder.embed_texts(texts)
|
|
149
|
+
|
|
150
|
+
for idx, a in enumerate(pending):
|
|
151
|
+
assertion_id = str(uuid.uuid4())
|
|
152
|
+
ids.append(assertion_id)
|
|
153
|
+
row: Dict[str, Any] = {
|
|
154
|
+
"assertion_id": assertion_id,
|
|
155
|
+
"subject": a.subject,
|
|
156
|
+
"predicate": a.predicate,
|
|
157
|
+
"object": a.object,
|
|
158
|
+
"scope": a.scope,
|
|
159
|
+
"owner_id": a.owner_id,
|
|
160
|
+
"observed_at": a.observed_at,
|
|
161
|
+
"valid_from": a.valid_from,
|
|
162
|
+
"valid_until": a.valid_until,
|
|
163
|
+
"confidence": a.confidence,
|
|
164
|
+
"provenance_json": json.dumps(a.provenance, ensure_ascii=False, separators=(",", ":")),
|
|
165
|
+
"attributes_json": json.dumps(a.attributes, ensure_ascii=False, separators=(",", ":")),
|
|
166
|
+
"text": texts[idx],
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if vectors is not None and idx < len(vectors):
|
|
170
|
+
row[self._vector_column] = vectors[idx]
|
|
171
|
+
|
|
172
|
+
# Keep JSON compact (omit nulls).
|
|
173
|
+
row = {k: v for k, v in row.items() if v is not None}
|
|
174
|
+
rows.append(row)
|
|
175
|
+
|
|
176
|
+
if self._table is None:
|
|
177
|
+
# Create on first insert so we can infer vector dimensionality from real data.
|
|
178
|
+
self._table = self._db.create_table(self._table_name, data=rows, mode="create")
|
|
179
|
+
else:
|
|
180
|
+
self._table.add(rows)
|
|
181
|
+
return ids
|
|
182
|
+
|
|
183
|
+
def query(self, q: TripleQuery) -> List[TripleAssertion]:
|
|
184
|
+
if self._table is None:
|
|
185
|
+
return []
|
|
186
|
+
|
|
187
|
+
raw_limit = int(q.limit) if isinstance(q.limit, int) else 100
|
|
188
|
+
limit: Optional[int]
|
|
189
|
+
if raw_limit <= 0:
|
|
190
|
+
limit = None
|
|
191
|
+
else:
|
|
192
|
+
limit = max(1, raw_limit)
|
|
193
|
+
|
|
194
|
+
where = _build_where_clause(q)
|
|
195
|
+
|
|
196
|
+
query_vector: Optional[Sequence[float]] = None
|
|
197
|
+
if q.query_vector:
|
|
198
|
+
query_vector = q.query_vector
|
|
199
|
+
elif q.query_text:
|
|
200
|
+
if self._embedder is None:
|
|
201
|
+
raise ValueError("query_text requires a configured embedder (vector search); keyword fallback is disabled")
|
|
202
|
+
query_vector = self._embedder.embed_texts([q.query_text])[0]
|
|
203
|
+
|
|
204
|
+
qb = None
|
|
205
|
+
if query_vector is not None:
|
|
206
|
+
# Use cosine metric so `min_score` can be expressed as cosine similarity.
|
|
207
|
+
qb = self._table.search(query_vector, vector_column_name=q.vector_column or self._vector_column).metric("cosine")
|
|
208
|
+
if qb is None:
|
|
209
|
+
qb = self._table.search()
|
|
210
|
+
|
|
211
|
+
if where:
|
|
212
|
+
qb = qb.where(where)
|
|
213
|
+
|
|
214
|
+
if query_vector is None:
|
|
215
|
+
# LanceDB does not currently expose an order_by API on query builders. For deterministic
|
|
216
|
+
# observed_at ordering (and correct limit semantics), fetch all matching rows then sort
|
|
217
|
+
# in Python and apply the limit after sorting.
|
|
218
|
+
rows = qb.to_list()
|
|
219
|
+
else:
|
|
220
|
+
rows = qb.limit(limit).to_list() if limit is not None else qb.to_list()
|
|
221
|
+
|
|
222
|
+
out: List[TripleAssertion] = []
|
|
223
|
+
for r in rows:
|
|
224
|
+
if not isinstance(r, dict):
|
|
225
|
+
continue
|
|
226
|
+
provenance = _loads_json(r.get("provenance_json"))
|
|
227
|
+
attributes = _loads_json(r.get("attributes_json"))
|
|
228
|
+
|
|
229
|
+
# Attach retrieval metadata for semantic queries.
|
|
230
|
+
# LanceDB returns `_distance` for vector searches; with metric=cosine, similarity = 1 - distance.
|
|
231
|
+
if query_vector is not None:
|
|
232
|
+
dist_raw = r.get("_distance")
|
|
233
|
+
dist: Optional[float] = None
|
|
234
|
+
try:
|
|
235
|
+
dist = float(dist_raw) if dist_raw is not None else None
|
|
236
|
+
except Exception:
|
|
237
|
+
dist = None
|
|
238
|
+
score: Optional[float] = None
|
|
239
|
+
if dist is not None:
|
|
240
|
+
score = 1.0 - dist
|
|
241
|
+
|
|
242
|
+
if q.min_score is not None and score is not None and score < float(q.min_score):
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
retrieval = attributes.get("_retrieval") if isinstance(attributes.get("_retrieval"), dict) else {}
|
|
246
|
+
retrieval2 = dict(retrieval)
|
|
247
|
+
if score is not None:
|
|
248
|
+
retrieval2["score"] = score
|
|
249
|
+
if dist is not None:
|
|
250
|
+
retrieval2["distance"] = dist
|
|
251
|
+
retrieval2.setdefault("metric", "cosine")
|
|
252
|
+
attributes = dict(attributes)
|
|
253
|
+
attributes["_retrieval"] = retrieval2
|
|
254
|
+
out.append(
|
|
255
|
+
TripleAssertion(
|
|
256
|
+
subject=str(r.get("subject") or ""),
|
|
257
|
+
predicate=str(r.get("predicate") or ""),
|
|
258
|
+
object=str(r.get("object") or ""),
|
|
259
|
+
scope=str(r.get("scope") or "run"),
|
|
260
|
+
owner_id=str(r.get("owner_id")) if r.get("owner_id") is not None else None,
|
|
261
|
+
observed_at=str(r.get("observed_at") or ""),
|
|
262
|
+
valid_from=str(r.get("valid_from")) if r.get("valid_from") is not None else None,
|
|
263
|
+
valid_until=str(r.get("valid_until")) if r.get("valid_until") is not None else None,
|
|
264
|
+
confidence=r.get("confidence") if isinstance(r.get("confidence"), (int, float)) else None,
|
|
265
|
+
provenance=provenance,
|
|
266
|
+
attributes=attributes,
|
|
267
|
+
)
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# For non-semantic queries, keep compatibility with SQLite semantics: order by observed_at.
|
|
271
|
+
# For semantic queries, LanceDB already returns similarity-ranked results.
|
|
272
|
+
if query_vector is None:
|
|
273
|
+
out.sort(key=lambda a: a.observed_at or "", reverse=(str(q.order).lower() != "asc"))
|
|
274
|
+
return out if limit is None else out[:limit]
|
abstractmemory/models.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def utc_now_iso_seconds() -> str:
|
|
9
|
+
return datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def canonicalize_term(value: str) -> str:
|
|
13
|
+
"""Canonicalize a KG term for stable matching.
|
|
14
|
+
|
|
15
|
+
Policy (v0):
|
|
16
|
+
- trim surrounding whitespace
|
|
17
|
+
- lowercase (avoids missed matches due to casing drift)
|
|
18
|
+
"""
|
|
19
|
+
return str(value or "").strip().lower()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def normalize_term(value: str) -> str:
|
|
23
|
+
"""Normalize a KG term for case-insensitive matching (query-time)."""
|
|
24
|
+
# `canonicalize_term` already lowercases; keep this for clarity/compatibility.
|
|
25
|
+
return canonicalize_term(value).lower()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class TripleAssertion:
|
|
30
|
+
"""An append-only semantic assertion with temporal and provenance metadata."""
|
|
31
|
+
|
|
32
|
+
subject: str
|
|
33
|
+
predicate: str
|
|
34
|
+
object: str
|
|
35
|
+
scope: str = "run" # run|session|global
|
|
36
|
+
owner_id: Optional[str] = None # scope owner identifier (e.g. run_id, session_memory_*, global_memory)
|
|
37
|
+
observed_at: str = field(default_factory=utc_now_iso_seconds)
|
|
38
|
+
|
|
39
|
+
valid_from: Optional[str] = None
|
|
40
|
+
valid_until: Optional[str] = None
|
|
41
|
+
confidence: Optional[float] = None
|
|
42
|
+
|
|
43
|
+
provenance: Dict[str, Any] = field(default_factory=dict)
|
|
44
|
+
attributes: Dict[str, Any] = field(default_factory=dict)
|
|
45
|
+
|
|
46
|
+
def __post_init__(self) -> None:
|
|
47
|
+
# Canonicalize KG terms (trim + lower) for stable matching.
|
|
48
|
+
object.__setattr__(self, "subject", canonicalize_term(self.subject))
|
|
49
|
+
object.__setattr__(self, "predicate", canonicalize_term(self.predicate))
|
|
50
|
+
object.__setattr__(self, "object", canonicalize_term(self.object))
|
|
51
|
+
|
|
52
|
+
# Keep scope canonical (it is part of the partitioning key).
|
|
53
|
+
object.__setattr__(self, "scope", str(self.scope or "").strip().lower() or "run")
|
|
54
|
+
|
|
55
|
+
# Defensive trimming for timestamps/ids without altering semantics.
|
|
56
|
+
if isinstance(self.owner_id, str):
|
|
57
|
+
oid = self.owner_id.strip()
|
|
58
|
+
object.__setattr__(self, "owner_id", oid if oid else None)
|
|
59
|
+
object.__setattr__(self, "observed_at", str(self.observed_at or "").strip() or utc_now_iso_seconds())
|
|
60
|
+
if isinstance(self.valid_from, str):
|
|
61
|
+
vf = self.valid_from.strip()
|
|
62
|
+
object.__setattr__(self, "valid_from", vf if vf else None)
|
|
63
|
+
if isinstance(self.valid_until, str):
|
|
64
|
+
vu = self.valid_until.strip()
|
|
65
|
+
object.__setattr__(self, "valid_until", vu if vu else None)
|
|
66
|
+
|
|
67
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
68
|
+
out: Dict[str, Any] = {
|
|
69
|
+
"subject": self.subject,
|
|
70
|
+
"predicate": self.predicate,
|
|
71
|
+
"object": self.object,
|
|
72
|
+
"scope": self.scope,
|
|
73
|
+
"owner_id": self.owner_id,
|
|
74
|
+
"observed_at": self.observed_at,
|
|
75
|
+
"valid_from": self.valid_from,
|
|
76
|
+
"valid_until": self.valid_until,
|
|
77
|
+
"confidence": self.confidence,
|
|
78
|
+
"provenance": dict(self.provenance),
|
|
79
|
+
"attributes": dict(self.attributes),
|
|
80
|
+
}
|
|
81
|
+
# Keep JSON compact (omit nulls).
|
|
82
|
+
return {k: v for k, v in out.items() if v is not None}
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def from_dict(cls, data: Dict[str, Any]) -> "TripleAssertion":
|
|
86
|
+
if not isinstance(data, dict):
|
|
87
|
+
raise TypeError("TripleAssertion.from_dict expects a dict")
|
|
88
|
+
|
|
89
|
+
subject = data.get("subject")
|
|
90
|
+
predicate = data.get("predicate")
|
|
91
|
+
obj = data.get("object")
|
|
92
|
+
if not isinstance(subject, str) or not subject.strip():
|
|
93
|
+
raise ValueError("TripleAssertion.subject must be a non-empty string")
|
|
94
|
+
if not isinstance(predicate, str) or not predicate.strip():
|
|
95
|
+
raise ValueError("TripleAssertion.predicate must be a non-empty string")
|
|
96
|
+
if not isinstance(obj, str) or not obj.strip():
|
|
97
|
+
raise ValueError("TripleAssertion.object must be a non-empty string")
|
|
98
|
+
|
|
99
|
+
scope = data.get("scope") if isinstance(data.get("scope"), str) else "run"
|
|
100
|
+
owner_id = data.get("owner_id") if isinstance(data.get("owner_id"), str) else None
|
|
101
|
+
observed_at = data.get("observed_at") if isinstance(data.get("observed_at"), str) else utc_now_iso_seconds()
|
|
102
|
+
|
|
103
|
+
provenance = data.get("provenance") if isinstance(data.get("provenance"), dict) else {}
|
|
104
|
+
attributes = data.get("attributes") if isinstance(data.get("attributes"), dict) else {}
|
|
105
|
+
|
|
106
|
+
confidence_raw = data.get("confidence")
|
|
107
|
+
confidence: Optional[float] = None
|
|
108
|
+
if confidence_raw is not None:
|
|
109
|
+
try:
|
|
110
|
+
confidence = float(confidence_raw)
|
|
111
|
+
except Exception:
|
|
112
|
+
confidence = None
|
|
113
|
+
|
|
114
|
+
return cls(
|
|
115
|
+
subject=subject,
|
|
116
|
+
predicate=predicate,
|
|
117
|
+
object=obj,
|
|
118
|
+
scope=scope,
|
|
119
|
+
owner_id=owner_id.strip() if isinstance(owner_id, str) and owner_id.strip() else None,
|
|
120
|
+
observed_at=observed_at.strip() or utc_now_iso_seconds(),
|
|
121
|
+
valid_from=data.get("valid_from") if isinstance(data.get("valid_from"), str) else None,
|
|
122
|
+
valid_until=data.get("valid_until") if isinstance(data.get("valid_until"), str) else None,
|
|
123
|
+
confidence=confidence,
|
|
124
|
+
provenance=dict(provenance),
|
|
125
|
+
attributes=dict(attributes),
|
|
126
|
+
)
|
abstractmemory/store.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Iterable, List, Optional, Protocol
|
|
5
|
+
|
|
6
|
+
from .models import TripleAssertion, canonicalize_term
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class TripleQuery:
|
|
11
|
+
subject: Optional[str] = None
|
|
12
|
+
predicate: Optional[str] = None
|
|
13
|
+
object: Optional[str] = None
|
|
14
|
+
scope: Optional[str] = None # run|session|global
|
|
15
|
+
owner_id: Optional[str] = None # owner identifier within the selected scope
|
|
16
|
+
|
|
17
|
+
since: Optional[str] = None # observed_at >= since
|
|
18
|
+
until: Optional[str] = None # observed_at <= until
|
|
19
|
+
active_at: Optional[str] = None # valid_from/valid_until window intersection
|
|
20
|
+
|
|
21
|
+
# Optional semantic search:
|
|
22
|
+
# - query_text requires a store-configured embedder
|
|
23
|
+
# - query_vector bypasses embedding generation
|
|
24
|
+
query_text: Optional[str] = None
|
|
25
|
+
query_vector: Optional[List[float]] = None
|
|
26
|
+
vector_column: str = "vector"
|
|
27
|
+
min_score: Optional[float] = None # cosine similarity threshold (semantic queries)
|
|
28
|
+
|
|
29
|
+
limit: int = 100
|
|
30
|
+
order: str = "desc" # asc|desc by observed_at
|
|
31
|
+
|
|
32
|
+
def __post_init__(self) -> None:
|
|
33
|
+
# Canonicalize KG terms once (trim + lower; stable exact match).
|
|
34
|
+
if isinstance(self.subject, str):
|
|
35
|
+
s = canonicalize_term(self.subject)
|
|
36
|
+
object.__setattr__(self, "subject", s if s else None)
|
|
37
|
+
if isinstance(self.predicate, str):
|
|
38
|
+
p = canonicalize_term(self.predicate)
|
|
39
|
+
object.__setattr__(self, "predicate", p if p else None)
|
|
40
|
+
if isinstance(self.object, str):
|
|
41
|
+
o = canonicalize_term(self.object)
|
|
42
|
+
object.__setattr__(self, "object", o if o else None)
|
|
43
|
+
|
|
44
|
+
if isinstance(self.scope, str):
|
|
45
|
+
sc = str(self.scope or "").strip().lower()
|
|
46
|
+
object.__setattr__(self, "scope", sc if sc else None)
|
|
47
|
+
|
|
48
|
+
# Keep metadata trimmed without changing semantics.
|
|
49
|
+
if isinstance(self.owner_id, str):
|
|
50
|
+
oid = self.owner_id.strip()
|
|
51
|
+
object.__setattr__(self, "owner_id", oid if oid else None)
|
|
52
|
+
if isinstance(self.since, str):
|
|
53
|
+
s = self.since.strip()
|
|
54
|
+
object.__setattr__(self, "since", s if s else None)
|
|
55
|
+
if isinstance(self.until, str):
|
|
56
|
+
u = self.until.strip()
|
|
57
|
+
object.__setattr__(self, "until", u if u else None)
|
|
58
|
+
if isinstance(self.active_at, str):
|
|
59
|
+
a = self.active_at.strip()
|
|
60
|
+
object.__setattr__(self, "active_at", a if a else None)
|
|
61
|
+
|
|
62
|
+
# For semantic retrieval, normalize text input once.
|
|
63
|
+
if isinstance(self.query_text, str):
|
|
64
|
+
qt = str(self.query_text or "").strip()
|
|
65
|
+
object.__setattr__(self, "query_text", qt if qt else None)
|
|
66
|
+
|
|
67
|
+
if isinstance(self.vector_column, str):
|
|
68
|
+
vc = self.vector_column.strip() or "vector"
|
|
69
|
+
object.__setattr__(self, "vector_column", vc)
|
|
70
|
+
|
|
71
|
+
if self.min_score is not None:
|
|
72
|
+
try:
|
|
73
|
+
ms = float(self.min_score)
|
|
74
|
+
except Exception:
|
|
75
|
+
ms = None
|
|
76
|
+
if ms is None or not (ms == ms): # NaN
|
|
77
|
+
object.__setattr__(self, "min_score", None)
|
|
78
|
+
else:
|
|
79
|
+
object.__setattr__(self, "min_score", ms)
|
|
80
|
+
|
|
81
|
+
if isinstance(self.order, str):
|
|
82
|
+
object.__setattr__(self, "order", self.order.strip().lower() or "desc")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class TripleStore(Protocol):
|
|
86
|
+
def add(self, assertions: Iterable[TripleAssertion]) -> List[str]: ...
|
|
87
|
+
|
|
88
|
+
def query(self, q: TripleQuery) -> List[TripleAssertion]: ...
|
|
89
|
+
|
|
90
|
+
def close(self) -> None: ...
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: AbstractMemory
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: AbstractMemory: temporal, provenance-aware semantic memory primitives for AbstractFramework.
|
|
5
|
+
Project-URL: AbstractFramework (monorepo), https://github.com/lpalbou/abstractframework
|
|
6
|
+
Author: Laurent-Philippe Albou
|
|
7
|
+
License: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: agents,knowledge-graph,memory,temporal,triples
|
|
10
|
+
Classifier: Development Status :: 1 - Planning
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Provides-Extra: all
|
|
21
|
+
Requires-Dist: lancedb; extra == 'all'
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
24
|
+
Provides-Extra: lancedb
|
|
25
|
+
Requires-Dist: lancedb; extra == 'lancedb'
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# AbstractMemory (early / WIP)
|
|
29
|
+
|
|
30
|
+
AbstractMemory is a small Python library for **append-only, temporal, provenance-aware triple assertions** with a **deterministic query API** and optional **vector/semantic retrieval**.
|
|
31
|
+
|
|
32
|
+
## Status
|
|
33
|
+
- This package is still early: API and storage details may change.
|
|
34
|
+
- Implemented today: `TripleAssertion`, `TripleQuery`, `InMemoryTripleStore`, `LanceDBTripleStore`, `AbstractGatewayTextEmbedder`.
|
|
35
|
+
- Source of truth for exports: [`src/abstractmemory/__init__.py`](src/abstractmemory/__init__.py)
|
|
36
|
+
- Requires Python 3.10+ (see [`pyproject.toml`](pyproject.toml))
|
|
37
|
+
|
|
38
|
+
## Install
|
|
39
|
+
|
|
40
|
+
From PyPI (when published):
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
python -m pip install AbstractMemory
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Optional persistent backend + vector search:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
python -m pip install "AbstractMemory[lancedb]"
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Note: the distribution name is `AbstractMemory` (pip is case-insensitive). The import name is `abstractmemory`.
|
|
53
|
+
|
|
54
|
+
From source (recommended for this monorepo package):
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
python -m pip install -e .
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Optional persistent backend + vector search:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
python -m pip install -e ".[lancedb]"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Quick example
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from abstractmemory import InMemoryTripleStore, TripleAssertion, TripleQuery
|
|
70
|
+
|
|
71
|
+
store = InMemoryTripleStore()
|
|
72
|
+
store.add(
|
|
73
|
+
[
|
|
74
|
+
TripleAssertion(
|
|
75
|
+
subject="Scrooge",
|
|
76
|
+
predicate="related_to",
|
|
77
|
+
object="Christmas",
|
|
78
|
+
scope="session",
|
|
79
|
+
owner_id="sess-1",
|
|
80
|
+
provenance={"span_id": "span_123"},
|
|
81
|
+
)
|
|
82
|
+
]
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
hits = store.query(TripleQuery(subject="scrooge", scope="session", owner_id="sess-1"))
|
|
86
|
+
assert hits[0].object == "christmas" # terms are canonicalized (trim + lowercase)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Documentation
|
|
90
|
+
|
|
91
|
+
- Getting started: [`docs/getting-started.md`](docs/getting-started.md)
|
|
92
|
+
- FAQ: [`docs/faq.md`](docs/faq.md)
|
|
93
|
+
- Architecture (with diagrams): [`docs/architecture.md`](docs/architecture.md)
|
|
94
|
+
- Stores/backends: [`docs/stores.md`](docs/stores.md)
|
|
95
|
+
- API reference: [`docs/api.md`](docs/api.md)
|
|
96
|
+
- Development: [`docs/development.md`](docs/development.md)
|
|
97
|
+
|
|
98
|
+
## Project
|
|
99
|
+
|
|
100
|
+
- Changelog: [`CHANGELOG.md`](CHANGELOG.md)
|
|
101
|
+
- Contributing: [`CONTRIBUTING.md`](CONTRIBUTING.md)
|
|
102
|
+
- Security: [`SECURITY.md`](SECURITY.md)
|
|
103
|
+
- License: [`LICENSE`](LICENSE)
|
|
104
|
+
- Acknowledgments: [`ACKNOWLEDGMENTS.md`](ACKNOWLEDGMENTS.md)
|
|
105
|
+
|
|
106
|
+
## Design principles (v0)
|
|
107
|
+
|
|
108
|
+
- **Triples-first** representation with temporal fields (`observed_at`, `valid_from`, `valid_until`).
|
|
109
|
+
- Implemented in `TripleAssertion`: [`src/abstractmemory/models.py`](src/abstractmemory/models.py)
|
|
110
|
+
- **Append-only**: represent updates by adding a new assertion with fresh provenance.
|
|
111
|
+
- Implemented by both stores: [`src/abstractmemory/in_memory_store.py`](src/abstractmemory/in_memory_store.py), [`src/abstractmemory/lancedb_store.py`](src/abstractmemory/lancedb_store.py)
|
|
112
|
+
- **No direct AbstractCore dependency**: embeddings can be obtained via an AbstractGateway HTTP API.
|
|
113
|
+
- Implemented by `AbstractGatewayTextEmbedder`: [`src/abstractmemory/embeddings.py`](src/abstractmemory/embeddings.py)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
abstractmemory/__init__.py,sha256=diBIm5YRxFmn-rKSu6CVW6rZGEuiDFWi1mwK8Jdwj84,425
|
|
2
|
+
abstractmemory/embeddings.py,sha256=QpBDNNTr1TVzhZoSPoLNa5XASAcQfUvAFKrHlEdo9Zw,3741
|
|
3
|
+
abstractmemory/in_memory_store.py,sha256=ZkQ7KNznVkJqHgzZDAg_OCDtZi1DUJnCFkqxWGtRCEo,6861
|
|
4
|
+
abstractmemory/lancedb_store.py,sha256=4D6hiabFjbB65VLyIjX9CA57NyvMU9p3GMGdXr3s4vY,10650
|
|
5
|
+
abstractmemory/models.py,sha256=35dAm8ofIZVzn6mtd28SfRYesHL40AJ3Z3UfZHWUsvs,5335
|
|
6
|
+
abstractmemory/store.py,sha256=rG_DtBL_InnTgxybfzmLH6R6GdvVeEE7dwnI1g-ksu0,3534
|
|
7
|
+
abstractmemory-0.0.2.dist-info/METADATA,sha256=sWmn-qhXOAkcNqZChs7g7xftb-gAhdIEXUCJf0wvNSw,4091
|
|
8
|
+
abstractmemory-0.0.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
9
|
+
abstractmemory-0.0.2.dist-info/licenses/LICENSE,sha256=7t9ARuV3s_QVhJahnkFBnX_E5GPLW8BE9geplO1CGko,1079
|
|
10
|
+
abstractmemory-0.0.2.dist-info/RECORD,,
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: AbstractMemory
|
|
3
|
-
Version: 0.0.1
|
|
4
|
-
Summary: PLACEHOLDER: Memory system for transforming stateless LLMs into stateful LLMs - primarily designed for AbstractLLM integration
|
|
5
|
-
Author-email: AbstractMemory Team <contact@example.com>
|
|
6
|
-
Maintainer-email: AbstractMemory Team <contact@example.com>
|
|
7
|
-
License-Expression: MIT
|
|
8
|
-
Project-URL: Homepage, https://github.com/abstractmemory/abstractmemory
|
|
9
|
-
Project-URL: Documentation, https://github.com/abstractmemory/abstractmemory#readme
|
|
10
|
-
Project-URL: Repository, https://github.com/abstractmemory/abstractmemory
|
|
11
|
-
Project-URL: Bug Reports, https://github.com/abstractmemory/abstractmemory/issues
|
|
12
|
-
Keywords: llm,memory,stateful,ai,placeholder
|
|
13
|
-
Classifier: Development Status :: 1 - Planning
|
|
14
|
-
Classifier: Intended Audience :: Developers
|
|
15
|
-
Classifier: Programming Language :: Python :: 3
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
-
Requires-Python: >=3.8
|
|
24
|
-
Description-Content-Type: text/markdown
|
|
25
|
-
License-File: LICENSE
|
|
26
|
-
Dynamic: license-file
|
|
27
|
-
|
|
28
|
-
# AbstractMemory - PLACEHOLDER PROJECT
|
|
29
|
-
|
|
30
|
-
⚠️ **WARNING: This is a placeholder package** ⚠️
|
|
31
|
-
|
|
32
|
-
## Overview
|
|
33
|
-
|
|
34
|
-
AbstractMemory is a placeholder package that reserves the name on PyPI for a future memory system designed to transform stateless LLMs into stateful LLMs.
|
|
35
|
-
|
|
36
|
-
## Current Status
|
|
37
|
-
|
|
38
|
-
**This package is currently a PLACEHOLDER and should NOT be used in production.**
|
|
39
|
-
|
|
40
|
-
The actual memory system implementation is currently integrated within the AbstractLLM project. This separate package exists to:
|
|
41
|
-
|
|
42
|
-
1. **Reserve the PyPI name** for future modularization
|
|
43
|
-
2. **Enable clean separation of concerns** when the code is extracted from AbstractLLM
|
|
44
|
-
3. **Facilitate better evolution and maintenance** of the memory system as a standalone component
|
|
45
|
-
4. **Allow reusability** across different LLM frameworks in the future
|
|
46
|
-
|
|
47
|
-
## Future Vision
|
|
48
|
-
|
|
49
|
-
AbstractMemory will provide:
|
|
50
|
-
|
|
51
|
-
- **Stateful Memory Management**: Transform stateless LLMs into stateful systems
|
|
52
|
-
- **Primary AbstractLLM Integration**: Seamless integration with AbstractLLM
|
|
53
|
-
- **Modular Architecture**: Clean separation from core LLM functionality
|
|
54
|
-
- **Extensible Framework**: Support for various memory strategies and backends
|
|
55
|
-
|
|
56
|
-
## Installation
|
|
57
|
-
|
|
58
|
-
```bash
|
|
59
|
-
pip install AbstractMemory
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
## Usage
|
|
63
|
-
|
|
64
|
-
Currently, attempting to use any functionality will raise a `PlaceholderError`:
|
|
65
|
-
|
|
66
|
-
```python
|
|
67
|
-
import abstractmemory
|
|
68
|
-
|
|
69
|
-
# This will raise PlaceholderError
|
|
70
|
-
abstractmemory.placeholder_warning()
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
## Development Timeline
|
|
74
|
-
|
|
75
|
-
The actual implementation will be extracted and modularized from AbstractLLM when:
|
|
76
|
-
- The AbstractLLM memory system reaches sufficient maturity
|
|
77
|
-
- Clean interfaces are established
|
|
78
|
-
- Comprehensive testing framework is in place
|
|
79
|
-
|
|
80
|
-
## Contributing
|
|
81
|
-
|
|
82
|
-
This is a placeholder project. For memory-related contributions, please refer to the AbstractLLM project until the code is modularized.
|
|
83
|
-
|
|
84
|
-
## License
|
|
85
|
-
|
|
86
|
-
MIT License - See LICENSE file for details.
|
|
87
|
-
|
|
88
|
-
## Contact
|
|
89
|
-
|
|
90
|
-
For questions about future development plans, please refer to the AbstractLLM project documentation.
|
|
91
|
-
|
|
92
|
-
---
|
|
93
|
-
|
|
94
|
-
**Remember: This is a placeholder. The real implementation is coming soon!**
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
abstractmemory/__init__.py,sha256=56ZUm3WI5FFZRBoPwwAC4Zj0lK1vglE7RTj3a6yAFFY,1362
|
|
2
|
-
abstractmemory-0.0.1.dist-info/licenses/LICENSE,sha256=lPn7wpbMAgGyv2OAhgHtW_bZHq4Hnm7Gx28-8m82Nt4,1076
|
|
3
|
-
abstractmemory-0.0.1.dist-info/METADATA,sha256=2kDPyAPoE1e7XGR0_zvRWYp2FmbkHDJRN3vqv17m2XA,3420
|
|
4
|
-
abstractmemory-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
5
|
-
abstractmemory-0.0.1.dist-info/top_level.txt,sha256=ALpTfrLlzRBy55aam5M8YuOi-4i_shfsq1DcgTR6_NQ,15
|
|
6
|
-
abstractmemory-0.0.1.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
abstractmemory
|