memex-python 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
memex/query.py ADDED
@@ -0,0 +1,435 @@
1
+ """Filtering, scoring, decay, sorting, and neighborhood navigation.
2
+
3
+ Iteration order follows insertion order everywhere (Python ``dict`` preserves it,
4
+ matching JS ``Map``), so result ordering is identical to the TS library. The
5
+ multi-sort comparator is ported via ``functools.cmp_to_key`` to reproduce the JS
6
+ comparator exactly, including its stable tie-breaking.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import math
12
+ from functools import cmp_to_key
13
+ from typing import Any
14
+
15
+ from pydantic import BaseModel
16
+
17
+ from . import _time
18
+ from ._uuid import safe_extract_timestamp
19
+ from .errors import InvalidTimestampError
20
+ from .graph import GraphState
21
+ from .models import (
22
+ DecayConfig,
23
+ Edge,
24
+ EdgeFilter,
25
+ MemoryFilter,
26
+ MemoryItem,
27
+ QueryOptions,
28
+ Range,
29
+ ScoredItem,
30
+ ScoreWeights,
31
+ SortOption,
32
+ )
33
+
34
+ __all__ = [
35
+ "ScoredQueryOptions",
36
+ "matches_filter",
37
+ "extract_timestamp",
38
+ "get_items",
39
+ "get_scored_items",
40
+ "get_edges",
41
+ "get_item_by_id",
42
+ "get_edge_by_id",
43
+ "get_related_items",
44
+ "get_parents",
45
+ "get_children",
46
+ "compute_decay_multiplier",
47
+ "compute_score",
48
+ "get_sort_value",
49
+ ]
50
+
51
+ _MISSING = object()
52
+
53
+ INTERVAL_MS: dict[str, int] = {
54
+ "hour": 3_600_000,
55
+ "day": 86_400_000,
56
+ "week": 604_800_000,
57
+ }
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Coercion helpers — public functions accept models OR plain dicts (D5/ergonomics)
62
+ # ---------------------------------------------------------------------------
63
+
64
+
65
+ def _coerce_filter(f: MemoryFilter | dict[str, Any] | None) -> MemoryFilter | None:
66
+ if f is None or isinstance(f, MemoryFilter):
67
+ return f
68
+ return MemoryFilter.model_validate(f)
69
+
70
+
71
+ def _coerce_options(o: QueryOptions | dict[str, Any] | None) -> QueryOptions | None:
72
+ if o is None or isinstance(o, QueryOptions):
73
+ return o
74
+ return QueryOptions.model_validate(o)
75
+
76
+
77
+ def _coerce_weights(w: ScoreWeights | dict[str, Any]) -> ScoreWeights:
78
+ if isinstance(w, ScoreWeights):
79
+ return w
80
+ return ScoreWeights.model_validate(w)
81
+
82
+
83
+ def _coerce_edge_filter(f: EdgeFilter | dict[str, Any] | None) -> EdgeFilter | None:
84
+ if f is None or isinstance(f, EdgeFilter):
85
+ return f
86
+ return EdgeFilter.model_validate(f)
87
+
88
+
89
+ # ---------------------------------------------------------------------------
90
+ # Timestamp helpers
91
+ # ---------------------------------------------------------------------------
92
+
93
+
94
+ def extract_timestamp(uuid_id: str) -> int:
95
+ """Extract the ms timestamp from a UUIDv7 id, raising on anything else."""
96
+ ts = safe_extract_timestamp(uuid_id)
97
+ if ts is None:
98
+ raise InvalidTimestampError(
99
+ f'Cannot extract timestamp: "{uuid_id}" is not a valid UUIDv7'
100
+ )
101
+ return ts
102
+
103
+
104
+ def _item_timestamp(item: MemoryItem) -> int:
105
+ ts = item.created_at if item.created_at is not None else safe_extract_timestamp(item.id)
106
+ if ts is None:
107
+ raise InvalidTimestampError(
108
+ f'Cannot determine timestamp for item "{item.id}": '
109
+ "set created_at or use a UUIDv7 id"
110
+ )
111
+ return ts
112
+
113
+
114
+ # ---------------------------------------------------------------------------
115
+ # Filtering
116
+ # ---------------------------------------------------------------------------
117
+
118
+
119
+ def _resolve_path(obj: Any, path: str) -> Any:
120
+ current = obj
121
+ for segment in path.split("."):
122
+ if not isinstance(current, dict) or segment not in current:
123
+ return _MISSING
124
+ current = current[segment]
125
+ return current
126
+
127
+
128
+ def _matches_range(value: float | None, rng: Range | None) -> bool:
129
+ if rng is None:
130
+ return True
131
+ if rng.min is not None and (value is None or value < rng.min):
132
+ return False
133
+ if rng.max is not None and (value is None or value > rng.max):
134
+ return False
135
+ return True
136
+
137
+
138
+ def matches_filter(item: MemoryItem, f: MemoryFilter) -> bool:
139
+ if f.ids is not None and item.id not in f.ids:
140
+ return False
141
+
142
+ if f.scope is not None and item.scope != f.scope:
143
+ return False
144
+ if f.scope_prefix is not None and not item.scope.startswith(f.scope_prefix):
145
+ return False
146
+
147
+ if f.author is not None and item.author != f.author:
148
+ return False
149
+ if f.kind is not None and item.kind != f.kind:
150
+ return False
151
+ if f.source_kind is not None and item.source_kind != f.source_kind:
152
+ return False
153
+
154
+ if f.intent_id is not None and item.intent_id != f.intent_id:
155
+ return False
156
+ if f.intent_ids is not None and (
157
+ item.intent_id is None or item.intent_id not in f.intent_ids
158
+ ):
159
+ return False
160
+ if f.task_id is not None and item.task_id != f.task_id:
161
+ return False
162
+ if f.task_ids is not None and (
163
+ item.task_id is None or item.task_id not in f.task_ids
164
+ ):
165
+ return False
166
+
167
+ if f.range is not None:
168
+ if not _matches_range(item.authority, f.range.authority):
169
+ return False
170
+ if not _matches_range(item.conviction, f.range.conviction):
171
+ return False
172
+ if not _matches_range(item.importance, f.range.importance):
173
+ return False
174
+
175
+ if f.has_parent is not None:
176
+ if item.parents is None or f.has_parent not in item.parents:
177
+ return False
178
+ if f.is_root is not None:
179
+ has_parents = item.parents is not None and len(item.parents) > 0
180
+ if f.is_root and has_parents:
181
+ return False
182
+ if not f.is_root and not has_parents:
183
+ return False
184
+
185
+ if f.parents is not None:
186
+ p = item.parents or []
187
+ if f.parents.includes is not None and f.parents.includes not in p:
188
+ return False
189
+ if f.parents.includes_any is not None and not any(i in p for i in f.parents.includes_any):
190
+ return False
191
+ if f.parents.includes_all is not None and not all(i in p for i in f.parents.includes_all):
192
+ return False
193
+ if f.parents.count is not None and not _matches_range(len(p), f.parents.count):
194
+ return False
195
+
196
+ if f.decay is not None:
197
+ multiplier = compute_decay_multiplier(item, f.decay.config)
198
+ if multiplier < f.decay.min:
199
+ return False
200
+
201
+ if f.created is not None:
202
+ ts = _item_timestamp(item)
203
+ if f.created.before is not None and ts >= f.created.before:
204
+ return False
205
+ if f.created.after is not None and ts < f.created.after:
206
+ return False
207
+
208
+ if f.not_ is not None and matches_filter(item, f.not_):
209
+ return False
210
+ if f.meta is not None:
211
+ for path, value in f.meta.items():
212
+ if _resolve_path(item.meta, path) != value:
213
+ return False
214
+ if f.meta_has is not None:
215
+ for path in f.meta_has:
216
+ if _resolve_path(item.meta, path) is _MISSING:
217
+ return False
218
+ if f.or_:
219
+ if not any(matches_filter(item, sub) for sub in f.or_):
220
+ return False
221
+ return True
222
+
223
+
224
+ # ---------------------------------------------------------------------------
225
+ # Decay & scoring
226
+ # ---------------------------------------------------------------------------
227
+
228
+
229
+ def compute_decay_multiplier(item: MemoryItem, decay: DecayConfig) -> float:
230
+ age_ms = _time.now_ms() - _item_timestamp(item)
231
+ if age_ms <= 0:
232
+ return 1.0 # future item (clock skew) — no decay
233
+ interval_ms = INTERVAL_MS.get(decay.interval)
234
+ if interval_ms is None:
235
+ raise ValueError(
236
+ f'Unknown decay interval: "{decay.interval}". Expected "hour", "day", or "week".'
237
+ )
238
+ intervals = age_ms / interval_ms
239
+
240
+ if decay.type == "exponential":
241
+ return float((1 - decay.rate) ** intervals)
242
+ if decay.type == "linear":
243
+ return max(0.0, 1 - decay.rate * intervals)
244
+ if decay.type == "step":
245
+ return float((1 - decay.rate) ** math.floor(intervals))
246
+ raise ValueError(
247
+ f'Unknown decay type: "{decay.type}". Expected "exponential", "linear", or "step".'
248
+ )
249
+
250
+
251
+ def _n(value: float | None) -> float:
252
+ return value if value is not None else 0.0
253
+
254
+
255
+ def compute_score(item: MemoryItem, weights: ScoreWeights) -> float:
256
+ base = (
257
+ _n(weights.authority) * item.authority
258
+ + _n(weights.conviction) * _n(item.conviction)
259
+ + _n(weights.importance) * _n(item.importance)
260
+ )
261
+ if weights.decay is None:
262
+ return base
263
+ return base * compute_decay_multiplier(item, weights.decay)
264
+
265
+
266
+ # ---------------------------------------------------------------------------
267
+ # Sorting
268
+ # ---------------------------------------------------------------------------
269
+
270
+
271
+ def get_sort_value(item: MemoryItem, field: str) -> float:
272
+ if field == "authority":
273
+ return item.authority
274
+ if field == "conviction":
275
+ return _n(item.conviction)
276
+ if field == "importance":
277
+ return _n(item.importance)
278
+ if field == "recency":
279
+ return _item_timestamp(item)
280
+ raise ValueError(
281
+ f'Unknown sort field: "{field}". '
282
+ 'Expected "authority", "conviction", "importance", or "recency".'
283
+ )
284
+
285
+
286
+ def _multi_sort(items: list[MemoryItem], sorts: list[SortOption]) -> list[MemoryItem]:
287
+ def _cmp(a: MemoryItem, b: MemoryItem) -> int:
288
+ for s in sorts:
289
+ va = get_sort_value(a, s.field)
290
+ vb = get_sort_value(b, s.field)
291
+ if va < vb:
292
+ return -1 if s.order == "asc" else 1
293
+ if va > vb:
294
+ return 1 if s.order == "asc" else -1
295
+ return 0
296
+
297
+ return sorted(items, key=cmp_to_key(_cmp))
298
+
299
+
300
+ # ---------------------------------------------------------------------------
301
+ # Queries
302
+ # ---------------------------------------------------------------------------
303
+
304
+
305
+ def get_items(
306
+ state: GraphState,
307
+ filter: MemoryFilter | dict[str, Any] | None = None,
308
+ options: QueryOptions | dict[str, Any] | None = None,
309
+ ) -> list[MemoryItem]:
310
+ f = _coerce_filter(filter)
311
+ if f is None:
312
+ results = list(state.items.values())
313
+ else:
314
+ results = [item for item in state.items.values() if matches_filter(item, f)]
315
+
316
+ opts = _coerce_options(options)
317
+ if opts is not None and opts.sort is not None:
318
+ sorts = opts.sort if isinstance(opts.sort, list) else [opts.sort]
319
+ results = _multi_sort(results, sorts)
320
+
321
+ if opts is not None and (opts.offset is not None or opts.limit is not None):
322
+ start = opts.offset or 0
323
+ end = start + opts.limit if opts.limit is not None else None
324
+ results = results[start:end]
325
+
326
+ return results
327
+
328
+
329
+ class ScoredQueryOptions(BaseModel):
330
+ pre: MemoryFilter | None = None
331
+ post: MemoryFilter | None = None
332
+ min_score: float | None = None
333
+ limit: int | None = None
334
+ offset: int | None = None
335
+
336
+
337
+ def _coerce_scored_options(
338
+ o: ScoredQueryOptions | dict[str, Any] | None,
339
+ ) -> ScoredQueryOptions | None:
340
+ if o is None or isinstance(o, ScoredQueryOptions):
341
+ return o
342
+ return ScoredQueryOptions.model_validate(o)
343
+
344
+
345
+ def get_scored_items(
346
+ state: GraphState,
347
+ weights: ScoreWeights | dict[str, Any],
348
+ options: ScoredQueryOptions | dict[str, Any] | None = None,
349
+ ) -> list[ScoredItem]:
350
+ w = _coerce_weights(weights)
351
+ opts = _coerce_scored_options(options)
352
+
353
+ items = get_items(state, opts.pre if opts else None)
354
+ scored = [ScoredItem(item=item, score=compute_score(item, w)) for item in items]
355
+ scored.sort(key=lambda s: s.score, reverse=True)
356
+
357
+ if opts is not None and opts.min_score is not None:
358
+ scored = [s for s in scored if s.score >= opts.min_score]
359
+ if opts is not None and opts.post is not None:
360
+ scored = [s for s in scored if matches_filter(s.item, opts.post)]
361
+ if opts is not None and (opts.offset is not None or opts.limit is not None):
362
+ start = opts.offset or 0
363
+ end = start + opts.limit if opts.limit is not None else None
364
+ scored = scored[start:end]
365
+
366
+ return scored
367
+
368
+
369
+ def get_edges(
370
+ state: GraphState,
371
+ filter: EdgeFilter | dict[str, Any] | None = None,
372
+ ) -> list[Edge]:
373
+ f = _coerce_edge_filter(filter)
374
+ active_only = True if f is None or f.active_only is None else f.active_only
375
+ results: list[Edge] = []
376
+ for edge in state.edges.values():
377
+ if active_only and not edge.active:
378
+ continue
379
+ if f is not None:
380
+ if f.from_ is not None and edge.from_ != f.from_:
381
+ continue
382
+ if f.to is not None and edge.to != f.to:
383
+ continue
384
+ if f.kind is not None and edge.kind != f.kind:
385
+ continue
386
+ if f.min_weight is not None and (edge.weight is None or edge.weight < f.min_weight):
387
+ continue
388
+ results.append(edge)
389
+ return results
390
+
391
+
392
+ def get_item_by_id(state: GraphState, id: str) -> MemoryItem | None:
393
+ return state.items.get(id)
394
+
395
+
396
+ def get_edge_by_id(state: GraphState, edge_id: str) -> Edge | None:
397
+ return state.edges.get(edge_id)
398
+
399
+
400
+ def get_related_items(
401
+ state: GraphState,
402
+ item_id: str,
403
+ direction: str = "both",
404
+ ) -> list[MemoryItem]:
405
+ related_ids: dict[str, None] = {} # insertion-ordered set
406
+ for edge in state.edges.values():
407
+ if not edge.active:
408
+ continue
409
+ if direction in ("from", "both") and edge.from_ == item_id:
410
+ related_ids[edge.to] = None
411
+ if direction in ("to", "both") and edge.to == item_id:
412
+ related_ids[edge.from_] = None
413
+ related_ids.pop(item_id, None)
414
+
415
+ results: list[MemoryItem] = []
416
+ for rid in related_ids:
417
+ item = state.items.get(rid)
418
+ if item is not None:
419
+ results.append(item)
420
+ return results
421
+
422
+
423
+ def get_parents(state: GraphState, item_id: str) -> list[MemoryItem]:
424
+ item = state.items.get(item_id)
425
+ if item is None or not item.parents:
426
+ return []
427
+ return [state.items[pid] for pid in item.parents if pid in state.items]
428
+
429
+
430
+ def get_children(state: GraphState, item_id: str) -> list[MemoryItem]:
431
+ return [
432
+ item
433
+ for item in state.items.values()
434
+ if item.parents and item_id in item.parents
435
+ ]
memex/reducer.py ADDED
@@ -0,0 +1,151 @@
1
+ """The memory-graph reducer: ``apply_command(state, cmd) -> (new_state, events)``.
2
+
3
+ Pure and immutable — every branch returns a fresh :class:`GraphState` (the
4
+ relevant dict is cloned) and never mutates the input. ``merge_item`` /
5
+ ``merge_edge`` use ``model_copy(update=...)``, which does NOT re-validate — this
6
+ exactly mirrors the TS guarantee that *factories validate scores, updates do not*.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any, NamedTuple
12
+
13
+ from pydantic import BaseModel
14
+
15
+ from .commands import (
16
+ EdgeCreate,
17
+ EdgeRetract,
18
+ EdgeUpdate,
19
+ MemoryCommand,
20
+ MemoryCommandAdapter,
21
+ MemoryCreate,
22
+ MemoryRetract,
23
+ MemoryUpdate,
24
+ )
25
+ from .errors import (
26
+ DuplicateEdgeError,
27
+ DuplicateMemoryError,
28
+ EdgeNotFoundError,
29
+ MemoryNotFoundError,
30
+ )
31
+ from .graph import GraphState
32
+ from .models import Edge, MemoryItem, MemoryLifecycleEvent
33
+
34
+ __all__ = ["CommandResult", "apply_command", "merge_item", "merge_edge"]
35
+
36
+
37
+ class CommandResult(NamedTuple):
38
+ state: GraphState
39
+ events: list[MemoryLifecycleEvent]
40
+
41
+
42
+ _EDGE_IMMUTABLE = frozenset({"edge_id", "from", "from_", "to"})
43
+
44
+
45
+ def _merge_and_prune(base: dict[str, Any], patch: dict[str, Any]) -> dict[str, Any]:
46
+ """Shallow-merge ``patch`` onto ``base``.
47
+
48
+ The TS version strips ``undefined`` entries before and after merging.
49
+ Python has no ``undefined``; JS ``null`` maps to ``None`` and is kept. So
50
+ this is a plain shallow merge — content/meta keys cannot be *deleted* via an
51
+ update, matching the TS behavior.
52
+ """
53
+ return {**base, **patch}
54
+
55
+
56
+ def merge_item(existing: MemoryItem, partial: dict[str, Any]) -> MemoryItem:
57
+ """Merge a partial onto an item. ``id`` and ``created_at`` are never changed."""
58
+ update: dict[str, Any] = {}
59
+ for key, value in partial.items():
60
+ if key in ("id", "created_at"):
61
+ continue
62
+ if key == "content":
63
+ update["content"] = _merge_and_prune(existing.content, value)
64
+ elif key == "meta":
65
+ update["meta"] = _merge_and_prune(existing.meta or {}, value)
66
+ else:
67
+ update[key] = value
68
+ return existing.model_copy(update=update)
69
+
70
+
71
+ def merge_edge(existing: Edge, partial: dict[str, Any]) -> Edge:
72
+ """Merge a partial onto an edge. ``edge_id`` / ``from`` / ``to`` are fixed."""
73
+ update = {k: v for k, v in partial.items() if k not in _EDGE_IMMUTABLE}
74
+ return existing.model_copy(update=update)
75
+
76
+
77
+ def apply_command(state: GraphState, cmd: MemoryCommand | dict[str, Any]) -> CommandResult:
78
+ command = cmd if isinstance(cmd, BaseModel) else MemoryCommandAdapter.validate_python(cmd)
79
+
80
+ match command:
81
+ case MemoryCreate(item=item):
82
+ if item.id in state.items:
83
+ raise DuplicateMemoryError(item.id)
84
+ items = {**state.items, item.id: item}
85
+ return CommandResult(
86
+ GraphState(items, state.edges),
87
+ [MemoryLifecycleEvent(type="memory.created", item=item, cause_type="memory.create")],
88
+ )
89
+
90
+ case MemoryUpdate(item_id=item_id, partial=partial):
91
+ existing = state.items.get(item_id)
92
+ if existing is None:
93
+ raise MemoryNotFoundError(item_id)
94
+ merged = merge_item(existing, partial)
95
+ items = {**state.items, item_id: merged}
96
+ return CommandResult(
97
+ GraphState(items, state.edges),
98
+ [MemoryLifecycleEvent(type="memory.updated", item=merged, cause_type="memory.update")],
99
+ )
100
+
101
+ case MemoryRetract(item_id=item_id):
102
+ existing = state.items.get(item_id)
103
+ if existing is None:
104
+ raise MemoryNotFoundError(item_id)
105
+ items = dict(state.items)
106
+ del items[item_id]
107
+ edges = dict(state.edges)
108
+ events: list[MemoryLifecycleEvent] = [
109
+ MemoryLifecycleEvent(type="memory.retracted", item=existing, cause_type="memory.retract")
110
+ ]
111
+ for edge_id, edge in state.edges.items():
112
+ if edge.from_ == item_id or edge.to == item_id:
113
+ del edges[edge_id]
114
+ events.append(
115
+ MemoryLifecycleEvent(type="edge.retracted", edge=edge, cause_type="memory.retract")
116
+ )
117
+ return CommandResult(GraphState(items, edges), events)
118
+
119
+ case EdgeCreate(edge=edge):
120
+ if edge.edge_id in state.edges:
121
+ raise DuplicateEdgeError(edge.edge_id)
122
+ edges = {**state.edges, edge.edge_id: edge}
123
+ return CommandResult(
124
+ GraphState(state.items, edges),
125
+ [MemoryLifecycleEvent(type="edge.created", edge=edge, cause_type="edge.create")],
126
+ )
127
+
128
+ case EdgeUpdate(edge_id=edge_id, partial=partial):
129
+ existing_edge = state.edges.get(edge_id)
130
+ if existing_edge is None:
131
+ raise EdgeNotFoundError(edge_id)
132
+ merged_edge = merge_edge(existing_edge, partial)
133
+ edges = {**state.edges, edge_id: merged_edge}
134
+ return CommandResult(
135
+ GraphState(state.items, edges),
136
+ [MemoryLifecycleEvent(type="edge.updated", edge=merged_edge, cause_type="edge.update")],
137
+ )
138
+
139
+ case EdgeRetract(edge_id=edge_id):
140
+ existing_edge = state.edges.get(edge_id)
141
+ if existing_edge is None:
142
+ raise EdgeNotFoundError(edge_id)
143
+ edges = dict(state.edges)
144
+ del edges[edge_id]
145
+ return CommandResult(
146
+ GraphState(state.items, edges),
147
+ [MemoryLifecycleEvent(type="edge.retracted", edge=existing_edge, cause_type="edge.retract")],
148
+ )
149
+
150
+ case _: # pragma: no cover - defensive
151
+ raise TypeError(f"Unknown memory command: {command!r}")