engram-lite 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
engram/__init__.py ADDED
@@ -0,0 +1,30 @@
1
+ """engram — a small, local-first agentic memory engine.
2
+
3
+ One SQLite file holds the facts, a keyword index (FTS5), and a vector index
4
+ (sqlite-vec). Meant to be shared across your AI tools (Claude, Cursor, …) and
5
+ across sessions. See docs/memory-docs/ for the design and docs/code-docs/ for the
6
+ code guide.
7
+
8
+ Quick start:
9
+
10
+ from engram import Memory
11
+ mem = Memory("my_memory.db")
12
+ mem.save("Bob owns the payments service", subject="Bob")
13
+ mem.search("who owns payments?", subject="Bob")
14
+
15
+ Layout:
16
+ engram.core the engine logic (memory, consolidation, retrieval, …)
17
+ engram.storage SQLite connection, schema, row/index operations
18
+ engram.embeddings text → vector (local model, with a stub fallback)
19
+ engram.cli the `engram` command (demo / status / rebuild)
20
+ """
21
+
22
+ from .core.memory import Memory
23
+ from .embeddings import get_embedder
24
+
25
+ __all__ = ["Memory", "get_embedder"]
26
+ try:
27
+ from importlib.metadata import version as _pkg_version
28
+ __version__ = _pkg_version("engram-lite")
29
+ except Exception: # not installed (running from a checkout)
30
+ __version__ = "0.1.0"
engram/cli/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """The `engram` command-line interface."""
2
+
3
+ from .main import main
4
+
5
+ __all__ = ["main"]
engram/cli/demo.py ADDED
@@ -0,0 +1,60 @@
1
+ """`engram demo` — an interactive REPL to see the engine work (no AI tool needed).
2
+
3
+ For each line you type: FIND relevant memories, then SAVE the salient ones.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import argparse
8
+
9
+ from ..core.memory import Memory
10
+
11
+
12
+ def run(args: argparse.Namespace) -> int:
13
+ mem = Memory(path="engram_demo.db")
14
+ print("engram demo — type a message, or :quit. (:help for commands)\n")
15
+ pinned_subject = None
16
+ while True:
17
+ try:
18
+ line = input("you> ").strip()
19
+ except (EOFError, KeyboardInterrupt):
20
+ print()
21
+ break
22
+ if not line:
23
+ continue
24
+ if line in (":quit", ":q"):
25
+ break
26
+ if line in (":help", ":h"):
27
+ print(" :list :forget <id> :subject <name> :quit")
28
+ continue
29
+ if line == ":list":
30
+ for f in mem.all_current():
31
+ print(f" [{f['id'][:8]}] ({f['block_id']}) {f['value']}")
32
+ continue
33
+ if line.startswith(":forget "):
34
+ mem.forget(line.split(" ", 1)[1].strip())
35
+ print(" forgotten.")
36
+ continue
37
+ if line.startswith(":subject "):
38
+ pinned_subject = line.split(" ", 1)[1].strip()
39
+ print(f" subject pinned: {pinned_subject}")
40
+ continue
41
+
42
+ hits = mem.search(line, k=3)
43
+ if hits:
44
+ print(" 🧠 recalled:")
45
+ for h in hits:
46
+ print(f" - {h['value']}")
47
+ else:
48
+ print(" 🧠 (nothing relevant remembered yet)")
49
+ res = mem.remember(line, subject=pinned_subject) # gated save (salience)
50
+ if res["decision"] == "SKIP":
51
+ print(f" · skipped — {res['reason']}")
52
+ else:
53
+ print(f" 💾 {res['decision']} → block '{res['block_id']}'")
54
+ print()
55
+ mem.close()
56
+ return 0
57
+
58
+
59
+ def add_parser(sub) -> None:
60
+ sub.add_parser("demo", help="interactive demo REPL").set_defaults(func=run)
engram/cli/main.py ADDED
@@ -0,0 +1,99 @@
1
+ """The `engram` command — dispatches to subcommands.
2
+
3
+ engram demo interactive REPL
4
+ engram status print current env settings
5
+ engram rebuild re-embed a store after changing the embedding model
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import argparse
10
+
11
+ from . import demo
12
+
13
+
14
+ def _run_selftest(args: argparse.Namespace) -> int:
15
+ """Verify the engine works end-to-end (offline, no model download)."""
16
+ import os
17
+ import tempfile
18
+
19
+ from ..core.memory import Memory
20
+ from ..embeddings import HashEmbedder
21
+
22
+ fd, path = tempfile.mkstemp(suffix=".db")
23
+ os.close(fd)
24
+ try:
25
+ m = Memory(path, embedder=HashEmbedder())
26
+ m.remember("selftest: the payments service is owned by Bob", subject="selftest")
27
+ hits = m.search("who owns payments?", subject="selftest")
28
+ m.close()
29
+ if hits:
30
+ print("✓ engine OK — sqlite-vec + FTS5 + vector search working")
31
+ return 0
32
+ print("✗ engine ran but recall failed")
33
+ return 1
34
+ except Exception as exc: # noqa: BLE001
35
+ print(f"✗ engine FAILED: {exc}")
36
+ return 1
37
+ finally:
38
+ os.unlink(path)
39
+
40
+
41
+ def _run_rebuild(args) -> int:
42
+ from ..core.memory import Memory
43
+ res = Memory.reembed(args.db)
44
+ print(f"re-embedded {res['reembedded']} facts at {res['dim']}-dim "
45
+ f"({res['model']}) — store ready")
46
+ return 0
47
+
48
+
49
+ def _run_status(args: argparse.Namespace) -> int:
50
+ import os
51
+ import sqlite3
52
+
53
+ from .. import config
54
+ from ..settings import Settings
55
+
56
+ s = Settings.from_env()
57
+ print(s.summary())
58
+ # show "how full" without loading the embedding model — a plain sqlite read
59
+ if os.path.exists(s.db_path):
60
+ con = sqlite3.connect(s.db_path)
61
+ try:
62
+ cur = con.execute(
63
+ "SELECT COUNT(*) FROM facts WHERE superseded_by IS NULL "
64
+ "AND validation_status = 'fresh'"
65
+ ).fetchone()[0]
66
+ tot = con.execute("SELECT COUNT(*) FROM facts").fetchone()[0]
67
+ pages = con.execute("PRAGMA page_count").fetchone()[0]
68
+ psize = con.execute("PRAGMA page_size").fetchone()[0]
69
+ print(f"usage: {cur} current / {tot} total facts · cap {config.MAX_FACTS} "
70
+ f"· db {pages * psize / 1024:.0f} KB")
71
+ except sqlite3.Error:
72
+ print("usage: (no memory yet)")
73
+ finally:
74
+ con.close()
75
+ else:
76
+ print("usage: (no DB at this path yet)")
77
+ return 0
78
+
79
+
80
+ def main(argv: list[str] | None = None) -> int:
81
+ parser = argparse.ArgumentParser(prog="engram", description="engram — local agentic memory")
82
+ sub = parser.add_subparsers(dest="command")
83
+
84
+ demo.add_parser(sub)
85
+ sub.add_parser("status", help="print the current env settings").set_defaults(func=_run_status)
86
+ sub.add_parser("selftest", help="verify the engine works (offline)").set_defaults(func=_run_selftest)
87
+ rb = sub.add_parser("rebuild", help="re-embed a store after changing ENGRAM_EMBEDDER_MODEL")
88
+ rb.add_argument("db", help="path to the memory .db file")
89
+ rb.set_defaults(func=_run_rebuild)
90
+
91
+ args = parser.parse_args(argv)
92
+ if not getattr(args, "func", None):
93
+ parser.print_help()
94
+ return 0
95
+ return args.func(args)
96
+
97
+
98
+ if __name__ == "__main__":
99
+ raise SystemExit(main())
engram/config.py ADDED
@@ -0,0 +1,79 @@
1
+ """Build-time knobs — constants the engine is tuned with.
2
+
3
+ Distinct from `settings.py`, which reads *runtime* flags from environment variables.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ # ── storage ──────────────────────────────────────────────────────────────────
8
+ DEFAULT_DB_PATH = "engram.db" # one SQLite file holds everything (STORAGE_AND_RETRIEVAL §3)
9
+
10
+ # ── embeddings ───────────────────────────────────────────────────────────────
11
+ EMBED_MODEL = "BAAI/bge-small-en-v1.5" # small, local, offline model (fastembed)
12
+ FALLBACK_DIM = 384 # dim of the hash-stub embedder + bge-small
13
+ # Pin the model cache to a STABLE dir so it downloads once and stays offline —
14
+ # fastembed's default is $TMPDIR, which the OS wipes (silent re-downloads).
15
+ # Override with ENGRAM_MODEL_CACHE.
16
+ MODEL_CACHE_DIR = "~/.cache/engram/models"
17
+
18
+ # ── consolidation thresholds (cosine similarity, 0..1) ───────────────────────
19
+ # Four-way operation set: ADD / UPDATE / DELETE / NOOP — here NOOP is upgraded
20
+ # to REINFORCE (bump confidence instead of silently dropping the duplicate).
21
+ REINFORCE_SIM = 0.97 # ~identical to an existing fact → just bump it, no new row
22
+ UPDATE_SIM = 0.86 # close enough to be the same fact → refine, contradict, or supersede
23
+
24
+ # ── extraction (one interaction → several atomic facts) ──────────────────────
25
+ # The write path's biggest quality lever: split a message into discrete, reusable facts
26
+ # instead of storing it as one blob (MEMORY_LIFECYCLE §2).
27
+ MAX_FACTS_PER_INTERACTION = 8 # cap on candidates pulled from a single remember() call
28
+
29
+ # ── retrieval ────────────────────────────────────────────────────────────────
30
+ CANDIDATES_PER_CHANNEL = 30 # how many to pull from keyword, vector, and entity search
31
+ RRF_K = 60 # Reciprocal Rank Fusion constant (INDEXING_DEEP_DIVE §7)
32
+ DEFAULT_TOP_K = 5 # how many memories to return by default
33
+ SEARCH_K_CAP = 200 # k is clamped to [1, this]: a mistyped/hostile k must
34
+ # neither blow the knn query limit nor blackout serving
35
+ LEDGER_CAP = 2000 # decision-ledger rows kept (capped rotation)
36
+ ENTITY_RERANK_BONUS = 0.15 # multiplicative boost per query entity a fact names
37
+ # (max 3); tuned on LoCoMo dev convs 26+30 ONLY
38
+
39
+ # framework-stamped pseudo-tags that carry no task information — treating them
40
+ # as a lane caused a total silent serving blackout (loss census P1)
41
+ GENERIC_TASK_TAGS = frozenset({
42
+ "conversation", "chat", "general", "message", "misc", "context", "default",
43
+ "session", "dialogue", "turn",
44
+ })
45
+ MIN_SIMILARITY = 0.30 # vector candidates below this cosine are dropped (noise floor)
46
+
47
+ # recency: at equal relevance, fresher memories rank higher (INDEXING_DEEP_DIVE §7).
48
+ # final_score = rrf_score × ((1 - RECENCY_WEIGHT) + RECENCY_WEIGHT × freshness),
49
+ # where freshness = 0.5 ** (age_days / RECENCY_HALFLIFE_DAYS) ∈ (0, 1].
50
+ RECENCY_WEIGHT = 0.30 # how much recency may tilt the ranking (0 = off)
51
+ RECENCY_HALFLIFE_DAYS = 30.0 # a memory this old contributes half its freshness
52
+
53
+ # ── entities (the third retrieval signal) ────────────────────────────────────
54
+ ENTITY_MIN_LEN = 2 # ignore entity tokens shorter than this
55
+
56
+ # common words ignored in the keyword (FTS) query so it doesn't match everything
57
+ STOPWORDS = {
58
+ "a", "an", "the", "is", "are", "was", "were", "be", "to", "of", "in", "on",
59
+ "for", "and", "or", "i", "you", "we", "it", "this", "that", "what", "who",
60
+ "whom", "how", "why", "when", "where", "should", "would", "could", "my",
61
+ "your", "our", "here", "there", "do", "does", "did", "use", "used", "can",
62
+ "will", "with", "about", "from", "at", "as", "by", "me", "s",
63
+ }
64
+
65
+ # ── conditioned promotion (the lane model — see core/promotion.py) ───────────
66
+ PROMOTION_FLOOR_FRAC = 0.34 # keep a fact only if score >= this fraction of the top score
67
+ PROMOTION_EPSILON = 0.05 # weight of the word-overlap tie-break signal
68
+ PROMOTION_OVERFETCH = 4 # promotion re-ranks OVERFETCH×k retrieval candidates
69
+ PROMOTION_MIN_CANDIDATES = 30 # ...but never fewer than this many candidates
70
+ LANE_FETCH_LIMIT = 100 # lane channel: max lane-tagged facts added as candidates
71
+ TAGS_PER_FACT_CAP = 8 # max tags stored per fact
72
+
73
+ # ── salience (what to save) + compaction (how compactly) ─────────────────────
74
+ KEY_CHAR_CAP = 160 # the short embedded label is trimmed to this
75
+ VALUE_CHAR_CAP = 1000 # the stored value is capped (guardrail against blobs)
76
+ MAX_TEXT_CHARS = 2000 # longer than this → assumed file/output dump, skip saving
77
+
78
+ # ── size limit / eviction ────────────────────────────────────────────────────
79
+ MAX_FACTS = 5000 # per DB; least-used facts are evicted beyond this
@@ -0,0 +1,5 @@
1
+ """The engine logic: orchestration, extraction, consolidation, entities, retrieval, fusion."""
2
+
3
+ from .memory import Memory
4
+
5
+ __all__ = ["Memory"]
engram/core/anchors.py ADDED
@@ -0,0 +1,101 @@
1
+ """Anchor tokens — the parts of a fact that make it THAT fact.
2
+
3
+ "orders p99 is 450ms" and "refunds p99 is 450ms" share almost every word, but
4
+ each carries a content token the other lacks. Any merge / collapse / invalidate
5
+ decision between two such texts is a guess, and losing a true fact is worse
6
+ than keeping a near-sibling.
7
+
8
+ The guard is only consulted AFTER a caller has established near-identity
9
+ (consolidation: cosine ≥ 0.86 on the same block; serve-time collapse:
10
+ Jaccard ≥ 0.6) — inside a frame that similar, a differing content stem is not
11
+ paraphrase variance, it is the identity of a different fact. One-sided
12
+ differences (only one text has a negator, or extra detail) do NOT trigger:
13
+ that's the genuine update/negation case the write path exists to handle.
14
+
15
+ Loss census (2026-07-05) receipts: the UPDATE band silently superseded sibling
16
+ facts at cos 0.86-0.95, DELETE false-fired on motion verbs, and serve-time
17
+ near-dup collapse hid 8/12 distinct sibling pairs.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import re
22
+ from typing import Set
23
+
24
+ # words that flip a statement's polarity — genuine negators only. Motion /
25
+ # completion verbs ("dropped the kids at school", "the meeting ended") are NOT
26
+ # negation; they false-fired the DELETE path (census) and live nowhere here.
27
+ NEGATORS = {
28
+ "not", "no", "never", "isn't", "aren't", "wasn't", "weren't", "don't",
29
+ "doesn't", "didn't", "won't", "can't", "cannot", "longer", # "no longer"
30
+ "cancelled", "canceled", "discontinued", "deprecated",
31
+ }
32
+
33
+ _STOP = {
34
+ "the", "a", "an", "of", "to", "and", "or", "for", "with", "on", "in", "at",
35
+ "is", "are", "was", "were", "has", "have", "had", "be", "been", "its",
36
+ "it's", "this", "that", "these", "those", "every", "each", "all", "some",
37
+ "we", "our", "they", "their", "he", "she", "his", "her", "you", "your",
38
+ "i", "my", "me", "now", "then", "also", "very", "just", "will", "would",
39
+ }
40
+
41
+ _TOKEN = re.compile(r"[A-Za-z0-9'’\-]+")
42
+
43
+
44
+ def _stem(tok: str) -> str:
45
+ """Cheap derivational stem so 'deploys'/'deploying'/'deployed' agree.
46
+ Only used for anchor comparison — never stored."""
47
+ for suf in ("ing", "ed", "es", "s"):
48
+ if tok.endswith(suf) and len(tok) - len(suf) >= 3:
49
+ tok = tok[: -len(suf)]
50
+ if len(tok) >= 2 and tok[-1] == tok[-2]:
51
+ tok = tok[:-1]
52
+ break
53
+ return tok[:-1] if tok.endswith("e") else tok
54
+
55
+
56
+ def _strong_weak(text: str) -> tuple[Set[str], Set[str]]:
57
+ """STRONG anchors are definitive identity tokens: digits ("450ms",
58
+ "Tuesday at 10am"'s 10am), negators, and mid-sentence capitalized words
59
+ (names, days, products). WEAK anchors are the remaining content stems."""
60
+ strong: Set[str] = set()
61
+ weak: Set[str] = set()
62
+ for i, tok in enumerate(_TOKEN.findall(text or "")):
63
+ low = tok.lower()
64
+ if any(c.isdigit() for c in tok):
65
+ strong.add(low)
66
+ elif low in NEGATORS:
67
+ strong.add(low)
68
+ elif i > 0 and tok[0].isupper():
69
+ strong.add(_stem(low))
70
+ elif len(low) >= 3 and low not in _STOP:
71
+ # drop apostrophes BEFORE stemming: "lot's" → lots → lot (a
72
+ # trailing-strip left "lot'" and the filler floor never matched)
73
+ weak.add(_stem(low.replace("'", "").replace("’", "")))
74
+ return strong, weak
75
+
76
+
77
+ def anchor_tokens(text: str) -> Set[str]:
78
+ """All identity-bearing tokens of a fact (strong ∪ weak)."""
79
+ strong, weak = _strong_weak(text)
80
+ return strong | weak
81
+
82
+
83
+ def exclusive_anchors(a: str, b: str) -> bool:
84
+ """True when the two near-identical texts are DIFFERENT facts.
85
+
86
+ Two signals, both deterministic:
87
+ 1. strong anchors differ on both sides — "payments deploys Tuesday" vs
88
+ "... Thursday", "p99 450ms" vs a different number owner, one side
89
+ negated per anchor asymmetry;
90
+ 2. a 1-for-1 content-stem substitution in an otherwise shared frame —
91
+ "Bob owns payments" vs "Bob owns refunds". Paraphrases differ by
92
+ ADDITIVE wording (several extra stems on one or both sides), never by
93
+ a clean substitution, so genuine rewordings still merge/collapse.
94
+ """
95
+ sa, wa = _strong_weak(a)
96
+ sb, wb = _strong_weak(b)
97
+ if (sa - sb) and (sb - sa):
98
+ return True
99
+ ea = wa - wb - sb
100
+ eb = wb - wa - sa
101
+ return len(ea) == 1 and len(eb) == 1
@@ -0,0 +1,51 @@
1
+ """Keep each memory small, so it costs little to store and few tokens to return.
2
+
3
+ - `compact_key` → a short one-line label (the thing we embed + the handle).
4
+ - `cap_value` → the full text, trimmed to a guardrail length.
5
+
6
+ This is the cheap, offline version of "summarize the memory." A real LLM summary
7
+ could fill the same role (MEMORY_LIFECYCLE §2/§6); here we just normalize + trim.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import re
12
+
13
+ from .. import config
14
+ from . import anchors
15
+
16
+ # first-sentence boundary with the SAME digit + abbreviation guards extraction
17
+ # uses: without them "Priorities: 1." and "Dr." became the embedded key and the
18
+ # consolidation handle — matching nothing at recall and colliding across every
19
+ # "Dr. X" fact (loss census P1)
20
+ _FIRST_SENTENCE = re.compile(
21
+ r"(?<=[^\d][.!?])"
22
+ r"(?<!\b[A-Z]\.)(?<!Dr\.)(?<!Mr\.)(?<!Mrs\.)(?<!Ms\.)(?<!St\.)(?<!Ave\.)"
23
+ r"(?<!Blvd\.)(?<!Prof\.)(?<!Jr\.)(?<!Sr\.)(?<!vs\.)(?<!e\.g\.)(?<!i\.e\.)(?<!etc\.)"
24
+ r"\s"
25
+ )
26
+
27
+
28
+ def compact_key(text: str, cap: int = config.KEY_CHAR_CAP) -> str:
29
+ """A short, clean one-liner: collapse whitespace, take the first sentence, trim.
30
+
31
+ When the cap truncates, anchor tokens (names/numbers) from the cut tail are
32
+ appended — they're what makes the fact findable, and losing them made
33
+ semantic match on the cut content impossible (loss census P1).
34
+ """
35
+ t = " ".join(text.split())
36
+ first = _FIRST_SENTENCE.split(t, maxsplit=1)[0]
37
+ if len(first) <= cap:
38
+ return first
39
+ head = t[:cap].rsplit(" ", 1)[0]
40
+ lost = anchors.anchor_tokens(t) - anchors.anchor_tokens(head)
41
+ if lost:
42
+ head = f"{head} {' '.join(sorted(lost)[:4])}"[: cap + 48]
43
+ return head
44
+
45
+
46
+ def cap_value(text: str, cap: int = config.VALUE_CHAR_CAP) -> str:
47
+ """The full memory text, capped so a stray long paste can't bloat the store."""
48
+ t = text.strip()
49
+ if len(t) <= cap:
50
+ return t
51
+ return t[:cap].rsplit(" ", 1)[0] + " …"
@@ -0,0 +1,91 @@
1
+ """The "is this new or already known?" decision (MEMORY_LIFECYCLE §5).
2
+
3
+ Given a new fact's vector + key and its block, look at the nearest existing fact
4
+ *in that block* and classify the write into a four-way operation set:
5
+
6
+ REINFORCE near-identical (≥ REINFORCE_SIM) → bump confidence, no new row
7
+ DELETE same topic but the new fact NEGATES the old → retire the stale belief, add new
8
+ UPDATE same topic, refined/changed value → supersede the old version
9
+ ADD nothing close enough → brand-new fact
10
+
11
+ (The "already known, do nothing" case is often called NOOP; here it becomes REINFORCE
12
+ so repetition strengthens a memory instead of being silently dropped.)
13
+
14
+ Pure decision logic — it does not write; `memory.py` applies the result via the
15
+ repository. Contradiction is detected deterministically (negation polarity); a
16
+ semantic LLM resolver could be slotted in behind this same contract.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ from typing import Optional, Tuple
22
+
23
+ from .. import config
24
+ from ..storage import repository
25
+ from . import anchors
26
+
27
+ # words/markers that flip a statement's polarity ("Bob no longer owns payments").
28
+ # Motion/completion verbs (dropped/stopped/ended/removed) are NOT here: "Bob
29
+ # dropped the kids at school" is an event, not a negation — with them in the
30
+ # list it invalidated Bob's standing routine (loss census 2026-07-05).
31
+ _NEGATIONS = (
32
+ "not", "no longer", "never", "isn't", "aren't", "wasn't", "weren't", "don't",
33
+ "doesn't", "didn't", "won't", "can't", "cannot",
34
+ "deprecated", "cancelled", "canceled", "discontinued", "no more",
35
+ )
36
+ _NEG_RE = re.compile(r"\b(" + "|".join(re.escape(n) for n in _NEGATIONS) + r")\b", re.IGNORECASE)
37
+
38
+
39
+ def _cosine_from_l2(distance: float) -> float:
40
+ """For unit vectors, euclidean^2 = 2 - 2cos ⇒ cos = 1 - d^2/2."""
41
+ return 1.0 - (distance * distance) / 2.0
42
+
43
+
44
+ def _has_negation(text: str) -> bool:
45
+ return bool(_NEG_RE.search(text or ""))
46
+
47
+
48
+ def _contradicts(old_key: str, new_key: str) -> bool:
49
+ """True when one statement negates the other (opposite polarity)."""
50
+ return _has_negation(old_key) != _has_negation(new_key)
51
+
52
+
53
+ def _nearest_in_block(conn, block_id: str, vec) -> Optional[Tuple[str, float, str]]:
54
+ for fid, dist in repository.nearest(conn, vec, k=10):
55
+ row = repository.get(conn, fid)
56
+ if row is None or row["superseded_by"] is not None:
57
+ continue
58
+ if row["validation_status"] != "fresh":
59
+ continue
60
+ if row["block_id"] != block_id:
61
+ continue
62
+ return fid, _cosine_from_l2(dist), row["key"]
63
+ return None
64
+
65
+
66
+ def decide(conn, block_id: str, vec, key: str = "") -> Tuple[str, Optional[str], str]:
67
+ """Return (operation, target_fact_id, note). target is None for ADD.
68
+
69
+ `note` is non-empty when a merge was DEMOTED by the anchor guard — the
70
+ caller records it in the decision ledger so demotions are auditable.
71
+ """
72
+ nearest = _nearest_in_block(conn, block_id, vec)
73
+ if nearest is None:
74
+ return "ADD", None, ""
75
+ fid, sim, old_key = nearest
76
+
77
+ if sim >= config.UPDATE_SIM:
78
+ # anchor guard: if each statement carries an identity token the other
79
+ # lacks (orders vs refunds, Tuesday vs Thursday), these are two TRUE
80
+ # sibling facts however similar the wording — merging/retiring either
81
+ # one destroys real memory. Keep both. (Loss census 2026-07-05.)
82
+ if anchors.exclusive_anchors(old_key, key):
83
+ return "ADD", None, f"merge demoted: exclusive anchors vs {fid} (cos {sim:.2f})"
84
+ # same topic: does the new statement contradict (negate) the old one?
85
+ if _contradicts(old_key, key):
86
+ return "DELETE", fid, ""
87
+ if sim >= config.REINFORCE_SIM:
88
+ return "REINFORCE", fid, ""
89
+ return "UPDATE", fid, ""
90
+
91
+ return "ADD", None, ""
engram/core/dates.py ADDED
@@ -0,0 +1,129 @@
1
+ """Deterministic relative-date resolution at capture time.
2
+
3
+ People state facts in relative time — "I went last Friday", "we're moving next
4
+ month" — and the absolute date exists only in the conversation's own timestamp.
5
+ LLM-extraction systems resolve this during ingestion with a model call;
6
+ we resolve it with calendar arithmetic: zero LLM, same information.
7
+
8
+ `resolve_relatives(text, anchor)` annotates each relative expression inline
9
+ with its computed absolute form: "I went last Friday [= Friday, 14 July 2023]".
10
+ Inline (not appended) so keyword/vector retrieval keeps the date next to the
11
+ event it dates. Text inside existing [= ...] annotations is never re-annotated.
12
+
13
+ Measured motivation (LoCoMo track B): 58% of temporal failures had the
14
+ evidence SERVED but relative — the answering model can't do the arithmetic
15
+ reliably; the store should carry it resolved.
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import datetime as _dt
20
+ import re
21
+ from typing import Optional
22
+
23
+ _MONTHS = "january february march april may june july august september october november december".split()
24
+ _WEEKDAYS = "monday tuesday wednesday thursday friday saturday sunday".split()
25
+ _WORD_NUMS = {"a": 1, "an": 1, "one": 1, "two": 2, "three": 3, "four": 4, "five": 5,
26
+ "six": 6, "seven": 7, "eight": 8, "nine": 9, "ten": 10, "couple of": 2,
27
+ "few": 3}
28
+
29
+ _ANCHOR_PATTERNS = [
30
+ "%d %B, %Y", "%d %B %Y", "%B %d, %Y", "%B %d %Y", "%Y-%m-%d", "%d/%m/%Y",
31
+ ]
32
+
33
+
34
+ def parse_anchor(s: Optional[str]) -> Optional[_dt.date]:
35
+ """Parse a conversation timestamp like '8 May, 2023' into a date."""
36
+ if not s:
37
+ return None
38
+ s = s.strip()
39
+ # tolerate a leading time ("1:56 pm on 8 May, 2023")
40
+ if " on " in s:
41
+ s = s.split(" on ")[-1].strip()
42
+ for fmt in _ANCHOR_PATTERNS:
43
+ try:
44
+ return _dt.datetime.strptime(s, fmt).date()
45
+ except ValueError:
46
+ continue
47
+ m = re.search(r"(\d{1,2})\s+([A-Za-z]+),?\s+(\d{4})", s)
48
+ if m and m.group(2).lower() in _MONTHS:
49
+ return _dt.date(int(m.group(3)), _MONTHS.index(m.group(2).lower()) + 1,
50
+ int(m.group(1)))
51
+ return None
52
+
53
+
54
+ def _fmt_date(d: _dt.date) -> str:
55
+ return f"{_WEEKDAYS[d.weekday()].capitalize()}, {d.day} {_MONTHS[d.month - 1].capitalize()} {d.year}"
56
+
57
+
58
+ def _fmt_month(d: _dt.date) -> str:
59
+ return f"{_MONTHS[d.month - 1].capitalize()} {d.year}"
60
+
61
+
62
+ def _month_shift(d: _dt.date, months: int) -> _dt.date:
63
+ m = d.month - 1 + months
64
+ return _dt.date(d.year + m // 12, m % 12 + 1, 1)
65
+
66
+
67
+ def resolve_relatives(text: str, anchor: Optional[_dt.date],
68
+ max_annotations: int = 3) -> str:
69
+ """Annotate relative time expressions with computed absolutes, inline."""
70
+ if anchor is None or "[=" in text:
71
+ return text
72
+ count = 0
73
+
74
+ def ann(match_text: str, resolved: str) -> str:
75
+ nonlocal count
76
+ count += 1
77
+ return f"{match_text} [= {resolved}]"
78
+
79
+ def sub(pattern: str, resolver) -> None:
80
+ nonlocal text
81
+ def repl(m: re.Match) -> str:
82
+ if count >= max_annotations:
83
+ return m.group(0)
84
+ r = resolver(m)
85
+ return ann(m.group(0), r) if r else m.group(0)
86
+ text = re.sub(pattern, repl, text, flags=re.IGNORECASE)
87
+
88
+ # exact-day expressions
89
+ sub(r"\byesterday\b", lambda m: _fmt_date(anchor - _dt.timedelta(days=1)))
90
+ sub(r"\blast night\b", lambda m: _fmt_date(anchor - _dt.timedelta(days=1)))
91
+ sub(r"\b(today|tonight|this morning|this evening|this afternoon)\b",
92
+ lambda m: _fmt_date(anchor))
93
+ sub(r"\btomorrow\b", lambda m: _fmt_date(anchor + _dt.timedelta(days=1)))
94
+ # last/next <weekday>: the most recent such day strictly before (after) anchor
95
+ sub(r"\blast\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b",
96
+ lambda m: _fmt_date(anchor - _dt.timedelta(
97
+ days=((anchor.weekday() - _WEEKDAYS.index(m.group(1).lower())) % 7) or 7)))
98
+ sub(r"\bnext\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b",
99
+ lambda m: _fmt_date(anchor + _dt.timedelta(
100
+ days=((_WEEKDAYS.index(m.group(1).lower()) - anchor.weekday()) % 7) or 7)))
101
+ # periods — phrased the way LoCoMo golds phrase them
102
+ sub(r"\blast\s+week(end)?\b",
103
+ lambda m: f"the week{'end' if m.group(1) else ''} before {anchor.day} "
104
+ f"{_MONTHS[anchor.month - 1].capitalize()} {anchor.year}")
105
+ sub(r"\bthis\s+week(end)?\b",
106
+ lambda m: f"the week{'end' if m.group(1) else ''} of {anchor.day} "
107
+ f"{_MONTHS[anchor.month - 1].capitalize()} {anchor.year}")
108
+ sub(r"\blast\s+month\b", lambda m: _fmt_month(_month_shift(anchor, -1)))
109
+ sub(r"\bnext\s+month\b", lambda m: _fmt_month(_month_shift(anchor, 1)))
110
+ sub(r"\bthis\s+month\b", lambda m: _fmt_month(anchor))
111
+ sub(r"\blast\s+year\b", lambda m: str(anchor.year - 1))
112
+ sub(r"\bnext\s+year\b", lambda m: str(anchor.year + 1))
113
+ # "N days/weeks/months/years ago"
114
+ def ago(m: re.Match) -> Optional[str]:
115
+ raw = m.group(1).lower()
116
+ n = _WORD_NUMS.get(raw) or (int(raw) if raw.isdigit() else None)
117
+ if n is None:
118
+ return None
119
+ unit = m.group(2).lower()
120
+ if unit == "day":
121
+ return _fmt_date(anchor - _dt.timedelta(days=n))
122
+ if unit == "week":
123
+ return _fmt_date(anchor - _dt.timedelta(weeks=n))
124
+ if unit == "month":
125
+ return _fmt_month(_month_shift(anchor, -n))
126
+ return str(anchor.year - n)
127
+ sub(r"\b(\d+|a|an|one|two|three|four|five|six|seven|eight|nine|ten|few|couple of)"
128
+ r"\s+(day|week|month|year)s?\s+ago\b", ago)
129
+ return text