memex-python 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
memex/replay.py ADDED
@@ -0,0 +1,144 @@
1
+ """Event-log replay. Integrity-tolerant: per-item failures are collected in
2
+ ``skipped`` rather than thrown. Includes a strict ISO-8601 parser ported
3
+ verbatim from the TS library (rejects sub-ms precision, validates calendar
4
+ fields, requires ``Z`` or an explicit offset) so replay ordering is deterministic.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ from dataclasses import dataclass
11
+ from datetime import datetime, timezone
12
+ from typing import Any, NamedTuple, cast
13
+
14
+ from .errors import InvalidTimestampError
15
+ from .graph import GraphState, create_graph_state
16
+ from .models import MemoryLifecycleEvent
17
+ from .reducer import apply_command
18
+
19
+ __all__ = ["ReplayFailure", "ReplayResult", "replay_commands", "replay_from_envelopes"]
20
+
21
+
22
+ @dataclass
23
+ class ReplayFailure:
24
+ # dataclass (not NamedTuple) so the `index` field does not clash with
25
+ # tuple.index under strict typing.
26
+ index: int
27
+ error: Exception
28
+ command: Any = None
29
+ envelope: Any = None
30
+
31
+
32
+ class ReplayResult(NamedTuple):
33
+ state: GraphState
34
+ events: list[MemoryLifecycleEvent]
35
+ skipped: list[ReplayFailure]
36
+
37
+
38
+ def replay_commands(commands: list[Any]) -> ReplayResult:
39
+ state = create_graph_state()
40
+ all_events: list[MemoryLifecycleEvent] = []
41
+ skipped: list[ReplayFailure] = []
42
+
43
+ for i, cmd in enumerate(commands):
44
+ try:
45
+ result = apply_command(state, cmd)
46
+ state = result.state
47
+ all_events.extend(result.events)
48
+ except Exception as err: # noqa: BLE001 - integrity-tolerant by design
49
+ skipped.append(ReplayFailure(index=i, command=cmd, error=err))
50
+
51
+ return ReplayResult(state, all_events, skipped)
52
+
53
+
54
+ # Strict ISO 8601, milliseconds-only precision, explicit offset or Z.
55
+ _ISO_8601_RE = re.compile(
56
+ r"^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:\.(\d{1,3}))?(?:Z|([+-])(\d{2}):(\d{2}))$"
57
+ )
58
+
59
+ _EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
60
+
61
+
62
+ def _is_leap_year(year: int) -> bool:
63
+ return (year % 4 == 0 and year % 100 != 0) or year % 400 == 0
64
+
65
+
66
+ def _days_in_month(year: int, month: int) -> int:
67
+ if month == 2:
68
+ return 29 if _is_leap_year(year) else 28
69
+ if month in (4, 6, 9, 11):
70
+ return 30
71
+ return 31
72
+
73
+
74
+ def parse_iso_ts(ts: str) -> int:
75
+ m = _ISO_8601_RE.match(ts)
76
+ if not m:
77
+ raise InvalidTimestampError(f'Invalid envelope timestamp: "{ts}" (expected ISO 8601)')
78
+
79
+ year, month, day = int(m[1]), int(m[2]), int(m[3])
80
+ hour, minute, second = int(m[4]), int(m[5]), int(m[6])
81
+ ms = int(m[7].ljust(3, "0")) if m[7] else 0
82
+
83
+ if (
84
+ month < 1 or month > 12
85
+ or day < 1 or day > _days_in_month(year, month)
86
+ or hour > 23 or minute > 59 or second > 59
87
+ ):
88
+ raise InvalidTimestampError(f'Invalid envelope timestamp: "{ts}" (calendar fields out of range)')
89
+
90
+ try:
91
+ dt = datetime(year, month, day, hour, minute, second, ms * 1000, tzinfo=timezone.utc)
92
+ except ValueError as err:
93
+ raise InvalidTimestampError(f'Invalid envelope timestamp: "{ts}" ({err})') from err
94
+
95
+ delta = dt - _EPOCH
96
+ epoch = delta.days * 86_400_000 + delta.seconds * 1000 + delta.microseconds // 1000
97
+
98
+ if m[8]:
99
+ off_h, off_m = int(m[9]), int(m[10])
100
+ if off_h > 23 or off_m > 59:
101
+ raise InvalidTimestampError(f'Invalid envelope timestamp: "{ts}" (bad offset)')
102
+ sign = 1 if m[8] == "-" else -1
103
+ epoch += sign * (off_h * 60 + off_m) * 60 * 1000
104
+
105
+ return epoch
106
+
107
+
108
+ def _env_ts(env: Any) -> str:
109
+ # An envelope is a dict (e.g. from JSON) or an EventEnvelope model; its `ts`
110
+ # is always an ISO string.
111
+ return cast(str, env["ts"] if isinstance(env, dict) else env.ts)
112
+
113
+
114
+ def _env_payload(env: Any) -> Any:
115
+ # The payload is genuinely heterogeneous — a command model or a raw dict —
116
+ # so Any is the honest type; apply_command re-validates it.
117
+ return env["payload"] if isinstance(env, dict) else env.payload
118
+
119
+
120
+ def replay_from_envelopes(envelopes: list[Any]) -> ReplayResult:
121
+ skipped: list[ReplayFailure] = []
122
+ sortable: list[tuple[Any, int, int]] = [] # (env, ts, original index)
123
+
124
+ for i, env in enumerate(envelopes):
125
+ try:
126
+ ts = parse_iso_ts(_env_ts(env))
127
+ sortable.append((env, ts, i))
128
+ except Exception as err: # noqa: BLE001 - integrity-tolerant by design
129
+ skipped.append(ReplayFailure(index=i, envelope=env, error=err))
130
+
131
+ sortable.sort(key=lambda x: x[1])
132
+
133
+ state = create_graph_state()
134
+ all_events: list[MemoryLifecycleEvent] = []
135
+
136
+ for env, _ts, index in sortable:
137
+ try:
138
+ result = apply_command(state, _env_payload(env))
139
+ state = result.state
140
+ all_events.extend(result.events)
141
+ except Exception as err: # noqa: BLE001 - integrity-tolerant by design
142
+ skipped.append(ReplayFailure(index=index, envelope=env, error=err))
143
+
144
+ return ReplayResult(state, all_events, skipped)
memex/retrieval.py ADDED
@@ -0,0 +1,266 @@
1
+ """Provenance walks, contradiction-aware packing, diversity, and smart retrieval."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ from collections.abc import Callable
7
+ from dataclasses import dataclass
8
+ from functools import cmp_to_key
9
+ from typing import Any
10
+
11
+ from pydantic import BaseModel
12
+
13
+ from .graph import GraphState
14
+ from .models import Edge, MemoryFilter, MemoryItem, ScoredItem, ScoreWeights
15
+ from .query import get_edges, get_scored_items
16
+
17
+ __all__ = [
18
+ "SupportNode",
19
+ "DiversityOptions",
20
+ "get_support_tree",
21
+ "get_support_set",
22
+ "filter_contradictions",
23
+ "surface_contradictions",
24
+ "apply_diversity",
25
+ "smart_retrieve",
26
+ ]
27
+
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # 1. Support tree — provenance walk
31
+ # ---------------------------------------------------------------------------
32
+
33
+
34
+ @dataclass
35
+ class SupportNode:
36
+ item: MemoryItem
37
+ parents: list[SupportNode]
38
+
39
+
40
+ def get_support_tree(state: GraphState, item_id: str) -> SupportNode | None:
41
+ """Build the full provenance tree for an item, deduplicating on cycles."""
42
+ if item_id not in state.items:
43
+ return None
44
+
45
+ visited: set[str] = set()
46
+
47
+ def walk(node_id: str) -> SupportNode | None:
48
+ current = state.items.get(node_id)
49
+ if current is None:
50
+ return None
51
+ if node_id in visited:
52
+ return SupportNode(item=current, parents=[])
53
+ visited.add(node_id)
54
+ parent_nodes: list[SupportNode] = []
55
+ if current.parents:
56
+ for pid in current.parents:
57
+ node = walk(pid)
58
+ if node is not None:
59
+ parent_nodes.append(node)
60
+ return SupportNode(item=current, parents=parent_nodes)
61
+
62
+ return walk(item_id)
63
+
64
+
65
+ def get_support_set(state: GraphState, item_id: str) -> list[MemoryItem]:
66
+ """Flatten the provenance chain into the set of items that justify a claim."""
67
+ if item_id not in state.items:
68
+ return []
69
+
70
+ visited: set[str] = set()
71
+ result: list[MemoryItem] = []
72
+
73
+ def walk(node_id: str) -> None:
74
+ if node_id in visited:
75
+ return
76
+ visited.add(node_id)
77
+ current = state.items.get(node_id)
78
+ if current is None:
79
+ return
80
+ result.append(current)
81
+ if current.parents:
82
+ for pid in current.parents:
83
+ walk(pid)
84
+
85
+ walk(item_id)
86
+ return result
87
+
88
+
89
+ # ---------------------------------------------------------------------------
90
+ # 2. Contradiction-aware packing
91
+ # ---------------------------------------------------------------------------
92
+
93
+
94
+ def _superseded_ids(state: GraphState) -> set[str]:
95
+ superseded: set[str] = set()
96
+ for edge in state.edges.values():
97
+ if edge.kind == "SUPERSEDES" and edge.active:
98
+ superseded.add(edge.to)
99
+ return superseded
100
+
101
+
102
+ def filter_contradictions(state: GraphState, scored: list[ScoredItem]) -> list[ScoredItem]:
103
+ """Collapse contradictions: drop superseded items and the lower-scoring side
104
+ of each unresolved CONTRADICTS pair (deterministic tie-breaks)."""
105
+ superseded = _superseded_ids(state)
106
+ filtered = [s for s in scored if s.item.id not in superseded]
107
+
108
+ contradict_edges = get_edges(state, {"kind": "CONTRADICTS", "active_only": True})
109
+ if contradict_edges:
110
+ score_map = {entry.item.id: entry.score for entry in filtered}
111
+
112
+ def _cmp(a: Edge, b: Edge) -> int:
113
+ # Highest max-score pair first, then highest min-score, then edge_id.
114
+ max_a = max(score_map.get(a.from_, -1), score_map.get(a.to, -1))
115
+ max_b = max(score_map.get(b.from_, -1), score_map.get(b.to, -1))
116
+ if max_a != max_b:
117
+ return -1 if max_a > max_b else 1
118
+ min_a = min(score_map.get(a.from_, -1), score_map.get(a.to, -1))
119
+ min_b = min(score_map.get(b.from_, -1), score_map.get(b.to, -1))
120
+ if min_a != min_b:
121
+ return -1 if min_a > min_b else 1
122
+ return -1 if a.edge_id < b.edge_id else 1
123
+
124
+ contradict_edges = sorted(contradict_edges, key=cmp_to_key(_cmp))
125
+
126
+ excluded: set[str] = set()
127
+ for edge in contradict_edges:
128
+ if edge.from_ in excluded or edge.to in excluded:
129
+ continue
130
+ score_a = score_map.get(edge.from_, -1)
131
+ score_b = score_map.get(edge.to, -1)
132
+ if score_a >= 0 and score_b >= 0:
133
+ if score_a != score_b:
134
+ excluded.add(edge.to if score_a > score_b else edge.from_)
135
+ else:
136
+ excluded.add(edge.to if edge.from_ < edge.to else edge.from_)
137
+
138
+ if excluded:
139
+ filtered = [s for s in filtered if s.item.id not in excluded]
140
+
141
+ return filtered
142
+
143
+
144
+ def surface_contradictions(state: GraphState, scored: list[ScoredItem]) -> list[ScoredItem]:
145
+ """Keep both sides of each contradiction, annotated via ``contradicted_by``.
146
+ Superseded items are still removed."""
147
+ superseded = _superseded_ids(state)
148
+ result = [
149
+ ScoredItem(
150
+ item=s.item,
151
+ score=s.score,
152
+ contradicted_by=list(s.contradicted_by) if s.contradicted_by else None,
153
+ )
154
+ for s in scored
155
+ if s.item.id not in superseded
156
+ ]
157
+
158
+ contradict_edges = get_edges(state, {"kind": "CONTRADICTS", "active_only": True})
159
+ if not contradict_edges:
160
+ return result
161
+
162
+ item_map = {entry.item.id: entry for entry in result}
163
+
164
+ # Dedup by item id — multiple/bidirectional CONTRADICTS edges may connect the
165
+ # same pair, and a self-edge makes a is b.
166
+ for edge in contradict_edges:
167
+ a = item_map.get(edge.from_)
168
+ b = item_map.get(edge.to)
169
+ if a is None or b is None:
170
+ continue
171
+ if a is b:
172
+ continue # ignore self-contradictions
173
+ if a.contradicted_by is None:
174
+ a.contradicted_by = []
175
+ if not any(i.id == b.item.id for i in a.contradicted_by):
176
+ a.contradicted_by.append(b.item)
177
+ if b.contradicted_by is None:
178
+ b.contradicted_by = []
179
+ if not any(i.id == a.item.id for i in b.contradicted_by):
180
+ b.contradicted_by.append(a.item)
181
+
182
+ return result
183
+
184
+
185
+ # ---------------------------------------------------------------------------
186
+ # 3. Diversity scoring
187
+ # ---------------------------------------------------------------------------
188
+
189
+
190
+ class DiversityOptions(BaseModel):
191
+ author_penalty: float | None = None
192
+ parent_penalty: float | None = None
193
+ source_penalty: float | None = None
194
+
195
+
196
+ def apply_diversity(
197
+ scored: list[ScoredItem],
198
+ options: DiversityOptions | dict[str, Any],
199
+ ) -> list[ScoredItem]:
200
+ """Re-rank scored items with per-duplicate penalties (author/parent/source)."""
201
+ opts = options if isinstance(options, DiversityOptions) else DiversityOptions.model_validate(options)
202
+
203
+ author_counts: dict[str, int] | None = {} if opts.author_penalty else None
204
+ parent_counts: dict[str, int] | None = {} if opts.parent_penalty else None
205
+ source_counts: dict[str, int] | None = {} if opts.source_penalty else None
206
+
207
+ diversified: list[ScoredItem] = []
208
+ for entry in scored:
209
+ penalty = 0.0
210
+ if author_counts is not None:
211
+ count = author_counts.get(entry.item.author, 0)
212
+ penalty += count * opts.author_penalty # type: ignore[operator]
213
+ author_counts[entry.item.author] = count + 1
214
+ if parent_counts is not None and entry.item.parents:
215
+ for pid in entry.item.parents:
216
+ count = parent_counts.get(pid, 0)
217
+ penalty += count * opts.parent_penalty # type: ignore[operator]
218
+ parent_counts[pid] = count + 1
219
+ if source_counts is not None:
220
+ count = source_counts.get(entry.item.source_kind, 0)
221
+ penalty += count * opts.source_penalty # type: ignore[operator]
222
+ source_counts[entry.item.source_kind] = count + 1
223
+ diversified.append(
224
+ ScoredItem(item=entry.item, score=max(0.0, entry.score - penalty), contradicted_by=entry.contradicted_by)
225
+ )
226
+
227
+ diversified.sort(key=lambda s: s.score, reverse=True)
228
+ return diversified
229
+
230
+
231
+ # ---------------------------------------------------------------------------
232
+ # 4. Combined smart retrieval
233
+ # ---------------------------------------------------------------------------
234
+
235
+
236
+ def smart_retrieve(
237
+ state: GraphState,
238
+ *,
239
+ budget: float,
240
+ cost_fn: Callable[[MemoryItem], float],
241
+ weights: ScoreWeights | dict[str, Any],
242
+ filter: MemoryFilter | dict[str, Any] | None = None,
243
+ contradictions: str | None = None,
244
+ diversity: DiversityOptions | dict[str, Any] | None = None,
245
+ ) -> list[ScoredItem]:
246
+ """Score -> contradiction policy -> diversity -> greedy budget pack."""
247
+ scored = get_scored_items(state, weights, {"pre": filter})
248
+
249
+ if contradictions == "filter":
250
+ scored = filter_contradictions(state, scored)
251
+ elif contradictions == "surface":
252
+ scored = surface_contradictions(state, scored)
253
+
254
+ if diversity is not None:
255
+ scored = apply_diversity(scored, diversity)
256
+
257
+ results: list[ScoredItem] = []
258
+ remaining = budget
259
+ for entry in scored:
260
+ cost = cost_fn(entry.item)
261
+ if cost < 0 or not math.isfinite(cost):
262
+ raise ValueError(f"cost_fn must return a finite non-negative number, got {cost}")
263
+ if cost <= remaining:
264
+ results.append(entry)
265
+ remaining -= cost
266
+ return results
memex/schemas.py ADDED
@@ -0,0 +1,67 @@
1
+ """Validation entry points — the parity shim for ``@ai2070/memex/schemas``.
2
+
3
+ In Pydantic the models *are* the schema, so this module re-exports them and
4
+ offers ``validate_*`` helpers (backed by ``TypeAdapter`` for the command unions).
5
+ Use these to validate untrusted external input before folding it in:
6
+
7
+ from memex.schemas import validate_command
8
+ cmd = validate_command(raw) # raises pydantic.ValidationError on bad shape
9
+ state = apply_command(state, cmd).state
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Any
15
+
16
+ from pydantic import TypeAdapter
17
+
18
+ from .commands import MemoryCommand, MemoryCommandAdapter
19
+ from .intent import Intent, IntentCommand
20
+ from .models import Edge, MemoryItem
21
+ from .task import Task, TaskCommand
22
+
23
+ __all__ = [
24
+ "validate_command",
25
+ "validate_intent_command",
26
+ "validate_task_command",
27
+ "validate_memory_item",
28
+ "validate_edge",
29
+ # schema aliases (the Pydantic model IS the schema)
30
+ "MemoryItemSchema",
31
+ "EdgeSchema",
32
+ "IntentSchema",
33
+ "TaskSchema",
34
+ "MemoryCommandAdapter",
35
+ "IntentCommandAdapter",
36
+ "TaskCommandAdapter",
37
+ ]
38
+
39
+ # The models are the schemas.
40
+ MemoryItemSchema = MemoryItem
41
+ EdgeSchema = Edge
42
+ IntentSchema = Intent
43
+ TaskSchema = Task
44
+
45
+ IntentCommandAdapter: TypeAdapter[IntentCommand] = TypeAdapter(IntentCommand)
46
+ TaskCommandAdapter: TypeAdapter[TaskCommand] = TypeAdapter(TaskCommand)
47
+
48
+
49
+ def validate_command(raw: Any) -> MemoryCommand:
50
+ """Validate a raw mapping into a typed memory command."""
51
+ return MemoryCommandAdapter.validate_python(raw)
52
+
53
+
54
+ def validate_intent_command(raw: Any) -> IntentCommand:
55
+ return IntentCommandAdapter.validate_python(raw)
56
+
57
+
58
+ def validate_task_command(raw: Any) -> TaskCommand:
59
+ return TaskCommandAdapter.validate_python(raw)
60
+
61
+
62
+ def validate_memory_item(raw: Any) -> MemoryItem:
63
+ return MemoryItem.model_validate(raw)
64
+
65
+
66
+ def validate_edge(raw: Any) -> Edge:
67
+ return Edge.model_validate(raw)
memex/serialization.py ADDED
@@ -0,0 +1,47 @@
1
+ """Serialize / restore a GraphState.
2
+
3
+ The on-disk shape matches the TS library — ``{"items": [[id, item], ...],
4
+ "edges": [[id, edge], ...]}`` with unset optionals omitted and edge ``from``
5
+ emitted under its alias — so a Python event store stays wire-compatible with a
6
+ TypeScript one.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from typing import Any
13
+
14
+ from .graph import GraphState
15
+ from .models import Edge, MemoryItem
16
+
17
+ __all__ = ["SerializedGraphState", "to_json", "from_json", "stringify", "parse"]
18
+
19
+ SerializedGraphState = dict[str, list[list[Any]]]
20
+
21
+
22
+ def _dump(model: MemoryItem | Edge) -> dict[str, Any]:
23
+ return model.model_dump(by_alias=True, exclude_none=True)
24
+
25
+
26
+ def to_json(state: GraphState) -> SerializedGraphState:
27
+ return {
28
+ "items": [[id_, _dump(item)] for id_, item in state.items.items()],
29
+ "edges": [[id_, _dump(edge)] for id_, edge in state.edges.items()],
30
+ }
31
+
32
+
33
+ def from_json(data: SerializedGraphState) -> GraphState:
34
+ # Tolerate a missing items/edges key (mirrors JS `new Map(undefined)`).
35
+ items = {id_: MemoryItem.model_validate(d) for id_, d in data.get("items") or []}
36
+ edges = {id_: Edge.model_validate(d) for id_, d in data.get("edges") or []}
37
+ return GraphState(items=items, edges=edges)
38
+
39
+
40
+ def stringify(state: GraphState, pretty: bool = False) -> str:
41
+ if pretty:
42
+ return json.dumps(to_json(state), indent=2)
43
+ return json.dumps(to_json(state), separators=(",", ":"))
44
+
45
+
46
+ def parse(json_str: str) -> GraphState:
47
+ return from_json(json.loads(json_str))
memex/stats.py ADDED
@@ -0,0 +1,71 @@
1
+ """Aggregate counts over a GraphState."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Callable, Iterable
6
+ from typing import NamedTuple, TypeVar
7
+
8
+ from .graph import GraphState
9
+
10
+ __all__ = ["ItemStats", "EdgeStats", "GraphStats", "get_stats"]
11
+
12
+ T = TypeVar("T")
13
+
14
+
15
+ class ItemStats(NamedTuple):
16
+ total: int
17
+ by_kind: dict[str, int]
18
+ by_source_kind: dict[str, int]
19
+ by_author: dict[str, int]
20
+ by_scope: dict[str, int]
21
+ with_parents: int
22
+ root: int
23
+
24
+
25
+ class EdgeStats(NamedTuple):
26
+ total: int
27
+ active: int
28
+ by_kind: dict[str, int]
29
+
30
+
31
+ class GraphStats(NamedTuple):
32
+ items: ItemStats
33
+ edges: EdgeStats
34
+
35
+
36
+ def _count_by(values: Iterable[T], key_fn: Callable[[T], str]) -> dict[str, int]:
37
+ counts: dict[str, int] = {}
38
+ for v in values:
39
+ key = key_fn(v)
40
+ counts[key] = counts.get(key, 0) + 1
41
+ return counts
42
+
43
+
44
+ def get_stats(state: GraphState) -> GraphStats:
45
+ items = list(state.items.values())
46
+ edges = list(state.edges.values())
47
+
48
+ with_parents = 0
49
+ root = 0
50
+ for item in items:
51
+ if item.parents:
52
+ with_parents += 1
53
+ else:
54
+ root += 1
55
+
56
+ return GraphStats(
57
+ items=ItemStats(
58
+ total=len(items),
59
+ by_kind=_count_by(items, lambda i: i.kind),
60
+ by_source_kind=_count_by(items, lambda i: i.source_kind),
61
+ by_author=_count_by(items, lambda i: i.author),
62
+ by_scope=_count_by(items, lambda i: i.scope),
63
+ with_parents=with_parents,
64
+ root=root,
65
+ ),
66
+ edges=EdgeStats(
67
+ total=len(edges),
68
+ active=sum(1 for e in edges if e.active),
69
+ by_kind=_count_by(edges, lambda e: e.kind),
70
+ ),
71
+ )