PyPI - memex-python - Versions diffs - 0.13.0__py3-none-any.whl - Mend

memex-python 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

memex/__init__.py +336 -0
memex/_time.py +26 -0
memex/_uuid.py +62 -0
memex/bulk.py +138 -0
memex/commands.py +75 -0
memex/envelope.py +69 -0
memex/errors.py +51 -0
memex/factories.py +97 -0
memex/graph.py +30 -0
memex/integrity.py +317 -0
memex/intent.py +318 -0
memex/models.py +271 -0
memex/query.py +435 -0
memex/reducer.py +151 -0
memex/replay.py +144 -0
memex/retrieval.py +266 -0
memex/schemas.py +67 -0
memex/serialization.py +47 -0
memex/stats.py +71 -0
memex/store.py +222 -0
memex/task.py +361 -0
memex/transplant.py +480 -0
memex_python-0.13.0.dist-info/METADATA +150 -0
memex_python-0.13.0.dist-info/RECORD +26 -0
memex_python-0.13.0.dist-info/WHEEL +4 -0
memex_python-0.13.0.dist-info/licenses/LICENSE +190 -0

memex/replay.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""Event-log replay. Integrity-tolerant: per-item failures are collected in
+``skipped`` rather than thrown. Includes a strict ISO-8601 parser ported
+verbatim from the TS library (rejects sub-ms precision, validates calendar
+fields, requires ``Z`` or an explicit offset) so replay ordering is deterministic.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any, NamedTuple, cast
+from .errors import InvalidTimestampError
+from .graph import GraphState, create_graph_state
+from .models import MemoryLifecycleEvent
+from .reducer import apply_command
+__all__ = ["ReplayFailure", "ReplayResult", "replay_commands", "replay_from_envelopes"]
+@dataclass
+class ReplayFailure:
+    # dataclass (not NamedTuple) so the `index` field does not clash with
+    # tuple.index under strict typing.
+    index: int
+    error: Exception
+    command: Any = None
+    envelope: Any = None
+class ReplayResult(NamedTuple):
+    state: GraphState
+    events: list[MemoryLifecycleEvent]
+    skipped: list[ReplayFailure]
+def replay_commands(commands: list[Any]) -> ReplayResult:
+    state = create_graph_state()
+    all_events: list[MemoryLifecycleEvent] = []
+    skipped: list[ReplayFailure] = []
+    for i, cmd in enumerate(commands):
+        try:
+            result = apply_command(state, cmd)
+            state = result.state
+            all_events.extend(result.events)
+        except Exception as err:  # noqa: BLE001 - integrity-tolerant by design
+            skipped.append(ReplayFailure(index=i, command=cmd, error=err))
+    return ReplayResult(state, all_events, skipped)
+# Strict ISO 8601, milliseconds-only precision, explicit offset or Z.
+_ISO_8601_RE = re.compile(
+    r"^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:\.(\d{1,3}))?(?:Z|([+-])(\d{2}):(\d{2}))$"
+)
+_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
+def _is_leap_year(year: int) -> bool:
+    return (year % 4 == 0 and year % 100 != 0) or year % 400 == 0
+def _days_in_month(year: int, month: int) -> int:
+    if month == 2:
+        return 29 if _is_leap_year(year) else 28
+    if month in (4, 6, 9, 11):
+        return 30
+    return 31
+def parse_iso_ts(ts: str) -> int:
+    m = _ISO_8601_RE.match(ts)
+    if not m:
+        raise InvalidTimestampError(f'Invalid envelope timestamp: "{ts}" (expected ISO 8601)')
+    year, month, day = int(m[1]), int(m[2]), int(m[3])
+    hour, minute, second = int(m[4]), int(m[5]), int(m[6])
+    ms = int(m[7].ljust(3, "0")) if m[7] else 0
+    if (
+        month < 1 or month > 12
+        or day < 1 or day > _days_in_month(year, month)
+        or hour > 23 or minute > 59 or second > 59
+    ):
+        raise InvalidTimestampError(f'Invalid envelope timestamp: "{ts}" (calendar fields out of range)')
+    try:
+        dt = datetime(year, month, day, hour, minute, second, ms * 1000, tzinfo=timezone.utc)
+    except ValueError as err:
+        raise InvalidTimestampError(f'Invalid envelope timestamp: "{ts}" ({err})') from err
+    delta = dt - _EPOCH
+    epoch = delta.days * 86_400_000 + delta.seconds * 1000 + delta.microseconds // 1000
+    if m[8]:
+        off_h, off_m = int(m[9]), int(m[10])
+        if off_h > 23 or off_m > 59:
+            raise InvalidTimestampError(f'Invalid envelope timestamp: "{ts}" (bad offset)')
+        sign = 1 if m[8] == "-" else -1
+        epoch += sign * (off_h * 60 + off_m) * 60 * 1000
+    return epoch
+def _env_ts(env: Any) -> str:
+    # An envelope is a dict (e.g. from JSON) or an EventEnvelope model; its `ts`
+    # is always an ISO string.
+    return cast(str, env["ts"] if isinstance(env, dict) else env.ts)
+def _env_payload(env: Any) -> Any:
+    # The payload is genuinely heterogeneous — a command model or a raw dict —
+    # so Any is the honest type; apply_command re-validates it.
+    return env["payload"] if isinstance(env, dict) else env.payload
+def replay_from_envelopes(envelopes: list[Any]) -> ReplayResult:
+    skipped: list[ReplayFailure] = []
+    sortable: list[tuple[Any, int, int]] = []  # (env, ts, original index)
+    for i, env in enumerate(envelopes):
+        try:
+            ts = parse_iso_ts(_env_ts(env))
+            sortable.append((env, ts, i))
+        except Exception as err:  # noqa: BLE001 - integrity-tolerant by design
+            skipped.append(ReplayFailure(index=i, envelope=env, error=err))
+    sortable.sort(key=lambda x: x[1])
+    state = create_graph_state()
+    all_events: list[MemoryLifecycleEvent] = []
+    for env, _ts, index in sortable:
+        try:
+            result = apply_command(state, _env_payload(env))
+            state = result.state
+            all_events.extend(result.events)
+        except Exception as err:  # noqa: BLE001 - integrity-tolerant by design
+            skipped.append(ReplayFailure(index=index, envelope=env, error=err))
+    return ReplayResult(state, all_events, skipped)

memex/retrieval.py ADDED Viewed

@@ -0,0 +1,266 @@
+"""Provenance walks, contradiction-aware packing, diversity, and smart retrieval."""
+from __future__ import annotations
+import math
+from collections.abc import Callable
+from dataclasses import dataclass
+from functools import cmp_to_key
+from typing import Any
+from pydantic import BaseModel
+from .graph import GraphState
+from .models import Edge, MemoryFilter, MemoryItem, ScoredItem, ScoreWeights
+from .query import get_edges, get_scored_items
+__all__ = [
+    "SupportNode",
+    "DiversityOptions",
+    "get_support_tree",
+    "get_support_set",
+    "filter_contradictions",
+    "surface_contradictions",
+    "apply_diversity",
+    "smart_retrieve",
+]
+# ---------------------------------------------------------------------------
+# 1. Support tree — provenance walk
+# ---------------------------------------------------------------------------
+@dataclass
+class SupportNode:
+    item: MemoryItem
+    parents: list[SupportNode]
+def get_support_tree(state: GraphState, item_id: str) -> SupportNode | None:
+    """Build the full provenance tree for an item, deduplicating on cycles."""
+    if item_id not in state.items:
+        return None
+    visited: set[str] = set()
+    def walk(node_id: str) -> SupportNode | None:
+        current = state.items.get(node_id)
+        if current is None:
+            return None
+        if node_id in visited:
+            return SupportNode(item=current, parents=[])
+        visited.add(node_id)
+        parent_nodes: list[SupportNode] = []
+        if current.parents:
+            for pid in current.parents:
+                node = walk(pid)
+                if node is not None:
+                    parent_nodes.append(node)
+        return SupportNode(item=current, parents=parent_nodes)
+    return walk(item_id)
+def get_support_set(state: GraphState, item_id: str) -> list[MemoryItem]:
+    """Flatten the provenance chain into the set of items that justify a claim."""
+    if item_id not in state.items:
+        return []
+    visited: set[str] = set()
+    result: list[MemoryItem] = []
+    def walk(node_id: str) -> None:
+        if node_id in visited:
+            return
+        visited.add(node_id)
+        current = state.items.get(node_id)
+        if current is None:
+            return
+        result.append(current)
+        if current.parents:
+            for pid in current.parents:
+                walk(pid)
+    walk(item_id)
+    return result
+# ---------------------------------------------------------------------------
+# 2. Contradiction-aware packing
+# ---------------------------------------------------------------------------
+def _superseded_ids(state: GraphState) -> set[str]:
+    superseded: set[str] = set()
+    for edge in state.edges.values():
+        if edge.kind == "SUPERSEDES" and edge.active:
+            superseded.add(edge.to)
+    return superseded
+def filter_contradictions(state: GraphState, scored: list[ScoredItem]) -> list[ScoredItem]:
+    """Collapse contradictions: drop superseded items and the lower-scoring side
+    of each unresolved CONTRADICTS pair (deterministic tie-breaks)."""
+    superseded = _superseded_ids(state)
+    filtered = [s for s in scored if s.item.id not in superseded]
+    contradict_edges = get_edges(state, {"kind": "CONTRADICTS", "active_only": True})
+    if contradict_edges:
+        score_map = {entry.item.id: entry.score for entry in filtered}
+        def _cmp(a: Edge, b: Edge) -> int:
+            # Highest max-score pair first, then highest min-score, then edge_id.
+            max_a = max(score_map.get(a.from_, -1), score_map.get(a.to, -1))
+            max_b = max(score_map.get(b.from_, -1), score_map.get(b.to, -1))
+            if max_a != max_b:
+                return -1 if max_a > max_b else 1
+            min_a = min(score_map.get(a.from_, -1), score_map.get(a.to, -1))
+            min_b = min(score_map.get(b.from_, -1), score_map.get(b.to, -1))
+            if min_a != min_b:
+                return -1 if min_a > min_b else 1
+            return -1 if a.edge_id < b.edge_id else 1
+        contradict_edges = sorted(contradict_edges, key=cmp_to_key(_cmp))
+        excluded: set[str] = set()
+        for edge in contradict_edges:
+            if edge.from_ in excluded or edge.to in excluded:
+                continue
+            score_a = score_map.get(edge.from_, -1)
+            score_b = score_map.get(edge.to, -1)
+            if score_a >= 0 and score_b >= 0:
+                if score_a != score_b:
+                    excluded.add(edge.to if score_a > score_b else edge.from_)
+                else:
+                    excluded.add(edge.to if edge.from_ < edge.to else edge.from_)
+        if excluded:
+            filtered = [s for s in filtered if s.item.id not in excluded]
+    return filtered
+def surface_contradictions(state: GraphState, scored: list[ScoredItem]) -> list[ScoredItem]:
+    """Keep both sides of each contradiction, annotated via ``contradicted_by``.
+    Superseded items are still removed."""
+    superseded = _superseded_ids(state)
+    result = [
+        ScoredItem(
+            item=s.item,
+            score=s.score,
+            contradicted_by=list(s.contradicted_by) if s.contradicted_by else None,
+        )
+        for s in scored
+        if s.item.id not in superseded
+    ]
+    contradict_edges = get_edges(state, {"kind": "CONTRADICTS", "active_only": True})
+    if not contradict_edges:
+        return result
+    item_map = {entry.item.id: entry for entry in result}
+    # Dedup by item id — multiple/bidirectional CONTRADICTS edges may connect the
+    # same pair, and a self-edge makes a is b.
+    for edge in contradict_edges:
+        a = item_map.get(edge.from_)
+        b = item_map.get(edge.to)
+        if a is None or b is None:
+            continue
+        if a is b:
+            continue  # ignore self-contradictions
+        if a.contradicted_by is None:
+            a.contradicted_by = []
+        if not any(i.id == b.item.id for i in a.contradicted_by):
+            a.contradicted_by.append(b.item)
+        if b.contradicted_by is None:
+            b.contradicted_by = []
+        if not any(i.id == a.item.id for i in b.contradicted_by):
+            b.contradicted_by.append(a.item)
+    return result
+# ---------------------------------------------------------------------------
+# 3. Diversity scoring
+# ---------------------------------------------------------------------------
+class DiversityOptions(BaseModel):
+    author_penalty: float | None = None
+    parent_penalty: float | None = None
+    source_penalty: float | None = None
+def apply_diversity(
+    scored: list[ScoredItem],
+    options: DiversityOptions | dict[str, Any],
+) -> list[ScoredItem]:
+    """Re-rank scored items with per-duplicate penalties (author/parent/source)."""
+    opts = options if isinstance(options, DiversityOptions) else DiversityOptions.model_validate(options)
+    author_counts: dict[str, int] | None = {} if opts.author_penalty else None
+    parent_counts: dict[str, int] | None = {} if opts.parent_penalty else None
+    source_counts: dict[str, int] | None = {} if opts.source_penalty else None
+    diversified: list[ScoredItem] = []
+    for entry in scored:
+        penalty = 0.0
+        if author_counts is not None:
+            count = author_counts.get(entry.item.author, 0)
+            penalty += count * opts.author_penalty  # type: ignore[operator]
+            author_counts[entry.item.author] = count + 1
+        if parent_counts is not None and entry.item.parents:
+            for pid in entry.item.parents:
+                count = parent_counts.get(pid, 0)
+                penalty += count * opts.parent_penalty  # type: ignore[operator]
+                parent_counts[pid] = count + 1
+        if source_counts is not None:
+            count = source_counts.get(entry.item.source_kind, 0)
+            penalty += count * opts.source_penalty  # type: ignore[operator]
+            source_counts[entry.item.source_kind] = count + 1
+        diversified.append(
+            ScoredItem(item=entry.item, score=max(0.0, entry.score - penalty), contradicted_by=entry.contradicted_by)
+        )
+    diversified.sort(key=lambda s: s.score, reverse=True)
+    return diversified
+# ---------------------------------------------------------------------------
+# 4. Combined smart retrieval
+# ---------------------------------------------------------------------------
+def smart_retrieve(
+    state: GraphState,
+    *,
+    budget: float,
+    cost_fn: Callable[[MemoryItem], float],
+    weights: ScoreWeights | dict[str, Any],
+    filter: MemoryFilter | dict[str, Any] | None = None,
+    contradictions: str | None = None,
+    diversity: DiversityOptions | dict[str, Any] | None = None,
+) -> list[ScoredItem]:
+    """Score -> contradiction policy -> diversity -> greedy budget pack."""
+    scored = get_scored_items(state, weights, {"pre": filter})
+    if contradictions == "filter":
+        scored = filter_contradictions(state, scored)
+    elif contradictions == "surface":
+        scored = surface_contradictions(state, scored)
+    if diversity is not None:
+        scored = apply_diversity(scored, diversity)
+    results: list[ScoredItem] = []
+    remaining = budget
+    for entry in scored:
+        cost = cost_fn(entry.item)
+        if cost < 0 or not math.isfinite(cost):
+            raise ValueError(f"cost_fn must return a finite non-negative number, got {cost}")
+        if cost <= remaining:
+            results.append(entry)
+            remaining -= cost
+    return results

memex/schemas.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""Validation entry points — the parity shim for ``@ai2070/memex/schemas``.
+In Pydantic the models *are* the schema, so this module re-exports them and
+offers ``validate_*`` helpers (backed by ``TypeAdapter`` for the command unions).
+Use these to validate untrusted external input before folding it in:
+    from memex.schemas import validate_command
+    cmd = validate_command(raw)          # raises pydantic.ValidationError on bad shape
+    state = apply_command(state, cmd).state
+"""
+from __future__ import annotations
+from typing import Any
+from pydantic import TypeAdapter
+from .commands import MemoryCommand, MemoryCommandAdapter
+from .intent import Intent, IntentCommand
+from .models import Edge, MemoryItem
+from .task import Task, TaskCommand
+__all__ = [
+    "validate_command",
+    "validate_intent_command",
+    "validate_task_command",
+    "validate_memory_item",
+    "validate_edge",
+    # schema aliases (the Pydantic model IS the schema)
+    "MemoryItemSchema",
+    "EdgeSchema",
+    "IntentSchema",
+    "TaskSchema",
+    "MemoryCommandAdapter",
+    "IntentCommandAdapter",
+    "TaskCommandAdapter",
+]
+# The models are the schemas.
+MemoryItemSchema = MemoryItem
+EdgeSchema = Edge
+IntentSchema = Intent
+TaskSchema = Task
+IntentCommandAdapter: TypeAdapter[IntentCommand] = TypeAdapter(IntentCommand)
+TaskCommandAdapter: TypeAdapter[TaskCommand] = TypeAdapter(TaskCommand)
+def validate_command(raw: Any) -> MemoryCommand:
+    """Validate a raw mapping into a typed memory command."""
+    return MemoryCommandAdapter.validate_python(raw)
+def validate_intent_command(raw: Any) -> IntentCommand:
+    return IntentCommandAdapter.validate_python(raw)
+def validate_task_command(raw: Any) -> TaskCommand:
+    return TaskCommandAdapter.validate_python(raw)
+def validate_memory_item(raw: Any) -> MemoryItem:
+    return MemoryItem.model_validate(raw)
+def validate_edge(raw: Any) -> Edge:
+    return Edge.model_validate(raw)

memex/serialization.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Serialize / restore a GraphState.
+The on-disk shape matches the TS library — ``{"items": [[id, item], ...],
+"edges": [[id, edge], ...]}`` with unset optionals omitted and edge ``from``
+emitted under its alias — so a Python event store stays wire-compatible with a
+TypeScript one.
+"""
+from __future__ import annotations
+import json
+from typing import Any
+from .graph import GraphState
+from .models import Edge, MemoryItem
+__all__ = ["SerializedGraphState", "to_json", "from_json", "stringify", "parse"]
+SerializedGraphState = dict[str, list[list[Any]]]
+def _dump(model: MemoryItem | Edge) -> dict[str, Any]:
+    return model.model_dump(by_alias=True, exclude_none=True)
+def to_json(state: GraphState) -> SerializedGraphState:
+    return {
+        "items": [[id_, _dump(item)] for id_, item in state.items.items()],
+        "edges": [[id_, _dump(edge)] for id_, edge in state.edges.items()],
+    }
+def from_json(data: SerializedGraphState) -> GraphState:
+    # Tolerate a missing items/edges key (mirrors JS `new Map(undefined)`).
+    items = {id_: MemoryItem.model_validate(d) for id_, d in data.get("items") or []}
+    edges = {id_: Edge.model_validate(d) for id_, d in data.get("edges") or []}
+    return GraphState(items=items, edges=edges)
+def stringify(state: GraphState, pretty: bool = False) -> str:
+    if pretty:
+        return json.dumps(to_json(state), indent=2)
+    return json.dumps(to_json(state), separators=(",", ":"))
+def parse(json_str: str) -> GraphState:
+    return from_json(json.loads(json_str))

memex/stats.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""Aggregate counts over a GraphState."""
+from __future__ import annotations
+from collections.abc import Callable, Iterable
+from typing import NamedTuple, TypeVar
+from .graph import GraphState
+__all__ = ["ItemStats", "EdgeStats", "GraphStats", "get_stats"]
+T = TypeVar("T")
+class ItemStats(NamedTuple):
+    total: int
+    by_kind: dict[str, int]
+    by_source_kind: dict[str, int]
+    by_author: dict[str, int]
+    by_scope: dict[str, int]
+    with_parents: int
+    root: int
+class EdgeStats(NamedTuple):
+    total: int
+    active: int
+    by_kind: dict[str, int]
+class GraphStats(NamedTuple):
+    items: ItemStats
+    edges: EdgeStats
+def _count_by(values: Iterable[T], key_fn: Callable[[T], str]) -> dict[str, int]:
+    counts: dict[str, int] = {}
+    for v in values:
+        key = key_fn(v)
+        counts[key] = counts.get(key, 0) + 1
+    return counts
+def get_stats(state: GraphState) -> GraphStats:
+    items = list(state.items.values())
+    edges = list(state.edges.values())
+    with_parents = 0
+    root = 0
+    for item in items:
+        if item.parents:
+            with_parents += 1
+        else:
+            root += 1
+    return GraphStats(
+        items=ItemStats(
+            total=len(items),
+            by_kind=_count_by(items, lambda i: i.kind),
+            by_source_kind=_count_by(items, lambda i: i.source_kind),
+            by_author=_count_by(items, lambda i: i.author),
+            by_scope=_count_by(items, lambda i: i.scope),
+            with_parents=with_parents,
+            root=root,
+        ),
+        edges=EdgeStats(
+            total=len(edges),
+            active=sum(1 for e in edges if e.active),
+            by_kind=_count_by(edges, lambda e: e.kind),
+        ),
+    )