PyPI - graphddb-runtime - Versions diffs - 0.1.0__py3-none-any.whl - Mend

graphddb-runtime 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

graphddb_runtime/__init__.py +58 -0
graphddb_runtime/async_runtime.py +110 -0
graphddb_runtime/batch.py +218 -0
graphddb_runtime/concurrency.py +87 -0
graphddb_runtime/cursor.py +49 -0
graphddb_runtime/errors.py +80 -0
graphddb_runtime/filters.py +194 -0
graphddb_runtime/hydration.py +75 -0
graphddb_runtime/limits.py +20 -0
graphddb_runtime/per_key_cursor.py +105 -0
graphddb_runtime/relations.py +199 -0
graphddb_runtime/runtime.py +1674 -0
graphddb_runtime/templates.py +131 -0
graphddb_runtime/transactions.py +440 -0
graphddb_runtime-0.1.0.dist-info/METADATA +160 -0
graphddb_runtime-0.1.0.dist-info/RECORD +18 -0
graphddb_runtime-0.1.0.dist-info/WHEEL +5 -0
graphddb_runtime-0.1.0.dist-info/top_level.txt +1 -0

graphddb_runtime/filters.py ADDED Viewed

@@ -0,0 +1,194 @@
+"""Declarative filter → DynamoDB ``FilterExpression`` compiler (issue #44).
+Port of the TypeScript ``compileFilterExpression``
+(``src/expression/filter-expression.ts``). The declarative tree carried in a
+query spec's ``filter.declarative`` is JSON-safe (the TS bridge guard rejects
+the non-serializable ``cond`` escape hatch), so only the operator/logical forms
+are handled here.
+Names are ``#``-aliased columns (reused per distinct column); values are
+``:``-aliased parameters — no literal interpolation. The result is returned in
+the boto3 *client* shape: attribute values are already serialized
+``AttributeValue`` dicts.
+"""
+from __future__ import annotations
+from typing import Any, Dict, List, Mapping, Optional
+from boto3.dynamodb.types import TypeSerializer
+_LOGICAL_KEYS = {"and", "or", "not"}
+_OPERATOR_KEYS = {
+    "eq",
+    "ne",
+    "gt",
+    "ge",
+    "lt",
+    "le",
+    "between",
+    "in",
+    "beginsWith",
+    "contains",
+    "notContains",
+    "attributeExists",
+    "attributeType",
+    "size",
+}
+class _Ctx:
+    def __init__(self, serializer: TypeSerializer) -> None:
+        self.names: Dict[str, str] = {}
+        self.values: Dict[str, Any] = {}
+        self._serializer = serializer
+        self._name_n = 0
+        self._value_n = 0
+    def name_alias(self, column: str) -> str:
+        for alias, col in self.names.items():
+            if col == column:
+                return alias
+        alias = f"#f{self._name_n}"
+        self._name_n += 1
+        self.names[alias] = column
+        return alias
+    def value_alias(self, raw: Any) -> str:
+        alias = f":vf{self._value_n}"
+        self._value_n += 1
+        self.values[alias] = self._serializer.serialize(raw)
+        return alias
+def _is_operator_object(value: Any) -> bool:
+    if not isinstance(value, dict) or not value:
+        return False
+    return all(k in _OPERATOR_KEYS for k in value.keys())
+def _is_already_wrapped(expr: str) -> bool:
+    if not (expr.startswith("(") and expr.endswith(")")):
+        return False
+    depth = 0
+    for i, ch in enumerate(expr):
+        if ch == "(":
+            depth += 1
+        elif ch == ")":
+            depth -= 1
+            if depth == 0 and i < len(expr) - 1:
+                return False
+    return depth == 0
+def _wrap(expr: str) -> str:
+    if _is_already_wrapped(expr):
+        return expr
+    if " AND " in expr or " OR " in expr:
+        return f"({expr})"
+    return expr
+def _join_and(clauses: List[str]) -> str:
+    if len(clauses) == 1:
+        return clauses[0]
+    return " AND ".join(_wrap(c) for c in clauses)
+def _compile_field(ctx: _Ctx, field: str, condition: Any) -> str:
+    n = ctx.name_alias(field)
+    if not _is_operator_object(condition):
+        return f"{n} = {ctx.value_alias(condition)}"
+    clauses: List[str] = []
+    for op, value in condition.items():
+        if op == "eq":
+            clauses.append(f"{n} = {ctx.value_alias(value)}")
+        elif op == "ne":
+            clauses.append(f"{n} <> {ctx.value_alias(value)}")
+        elif op == "gt":
+            clauses.append(f"{n} > {ctx.value_alias(value)}")
+        elif op == "ge":
+            clauses.append(f"{n} >= {ctx.value_alias(value)}")
+        elif op == "lt":
+            clauses.append(f"{n} < {ctx.value_alias(value)}")
+        elif op == "le":
+            clauses.append(f"{n} <= {ctx.value_alias(value)}")
+        elif op == "between":
+            lo, hi = value
+            clauses.append(
+                f"{n} BETWEEN {ctx.value_alias(lo)} AND {ctx.value_alias(hi)}"
+            )
+        elif op == "in":
+            aliases = [ctx.value_alias(v) for v in value]
+            clauses.append(f"{n} IN ({', '.join(aliases)})")
+        elif op == "beginsWith":
+            clauses.append(f"begins_with({n}, {ctx.value_alias(value)})")
+        elif op == "contains":
+            clauses.append(f"contains({n}, {ctx.value_alias(value)})")
+        elif op == "notContains":
+            clauses.append(f"NOT contains({n}, {ctx.value_alias(value)})")
+        elif op == "attributeExists":
+            clauses.append(
+                f"attribute_not_exists({n})"
+                if value is False
+                else f"attribute_exists({n})"
+            )
+        elif op == "attributeType":
+            clauses.append(f"attribute_type({n}, {ctx.value_alias(value)})")
+        elif op == "size":
+            clauses.append(f"size({n}) = {ctx.value_alias(value)}")
+        else:
+            raise ValueError(f"Unknown filter operator '{op}' on field '{field}'")
+    return _join_and(clauses)
+def _compile_node(ctx: _Ctx, node: Any) -> str:
+    clauses: List[str] = []
+    for key, value in node.items():
+        if value is None:
+            continue
+        if key in _LOGICAL_KEYS:
+            if key in ("and", "or"):
+                parts = [p for p in (_compile_node(ctx, s) for s in value) if p]
+                if not parts:
+                    continue
+                if len(parts) == 1:
+                    clauses.append(parts[0])
+                else:
+                    sep = " AND " if key == "and" else " OR "
+                    clauses.append("(" + sep.join(_wrap(p) for p in parts) + ")")
+            else:  # not
+                inner = _compile_node(ctx, value)
+                if inner:
+                    clauses.append(f"NOT {_wrap(inner)}")
+            continue
+        clause = _compile_field(ctx, key, value)
+        if clause:
+            clauses.append(clause)
+    return _join_and(clauses)
+def compile_filter(
+    declarative: Mapping[str, Any],
+    serializer: TypeSerializer,
+) -> Optional[Dict[str, Any]]:
+    """Compile a declarative filter tree into a client-shape FilterExpression.
+    Returns ``None`` for an empty / no-op filter so callers can skip attaching
+    it. Otherwise returns
+    ``{"FilterExpression", "ExpressionAttributeNames", "ExpressionAttributeValues"}``.
+    """
+    if not declarative:
+        return None
+    ctx = _Ctx(serializer)
+    expr = _compile_node(ctx, declarative)
+    if not expr:
+        return None
+    return {
+        "FilterExpression": expr,
+        "ExpressionAttributeNames": ctx.names,
+        "ExpressionAttributeValues": ctx.values,
+    }

graphddb_runtime/hydration.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""Hydration of raw DynamoDB items into result dicts (issue #44).
+Mirrors the TS hydrator (``src/hydrator/hydrator.ts``):
+- only fields named in ``select`` (value ``True``) are copied out;
+- internal key attributes (``PK`` / ``SK`` / ``GSI*PK`` / ``GSI*SK``) and the
+  entity's PK prefix are never part of the result (they are simply not selected,
+  so they are dropped naturally);
+- a ``string`` field whose manifest carries ``format: "datetime"`` is restored
+  to a ``datetime``; ``format: "date"`` to a ``datetime`` at midnight UTC.
+Relation keys in the select (objects, not ``True``) are skipped here; the
+single-operation core has no relations to assemble, so a relation select simply
+contributes nothing to the hydrated root item.
+"""
+from __future__ import annotations
+from datetime import datetime, timezone
+from typing import Any, Dict, Mapping
+from .errors import HydrationError
+def _is_internal_key(name: str) -> bool:
+    if name in ("PK", "SK"):
+        return True
+    # GSI1PK / GSI1SK / GSI12PK ... — index key attributes.
+    return name.startswith("GSI") and (name.endswith("PK") or name.endswith("SK"))
+def hydrate_item(
+    raw: Mapping[str, Any],
+    select: Mapping[str, Any],
+    entity_meta: Mapping[str, Any],
+) -> Dict[str, Any]:
+    """Hydrate a single deserialized item against a select + entity manifest."""
+    fields = entity_meta.get("fields", {})
+    result: Dict[str, Any] = {}
+    for field_name, select_value in select.items():
+        if select_value is True:
+            if _is_internal_key(field_name):
+                continue
+            if field_name in raw:
+                field_meta = fields.get(field_name)
+                result[field_name] = _deserialize_value(raw[field_name], field_meta)
+        # Relation / nested objects: skipped in the single-op core.
+    return result
+def _deserialize_value(value: Any, field_meta: Mapping[str, Any] | None) -> Any:
+    if not field_meta:
+        return value
+    fmt = field_meta.get("format")
+    if fmt == "datetime" and isinstance(value, str):
+        return _parse_iso8601(value)
+    if fmt == "date" and isinstance(value, str):
+        return _parse_iso8601(value + "T00:00:00.000Z")
+    return value
+def _parse_iso8601(value: str) -> datetime:
+    """Parse an ISO 8601 instant (the TS ``toISOString`` form, ``...Z``)."""
+    text = value
+    if text.endswith("Z"):
+        text = text[:-1] + "+00:00"
+    try:
+        dt = datetime.fromisoformat(text)
+    except ValueError as exc:
+        raise HydrationError(f"invalid ISO 8601 datetime: {value!r}") from exc
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    return dt

graphddb_runtime/limits.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""Runtime limits for the GraphDDB Python runtime (issue #44).
+The single-operation core applies only the limits that are meaningful without
+relation traversal (``max_operations`` and ``max_items``); the remaining fields
+are defined here so the full surface exists for the relation runtime (#45).
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass(frozen=True)
+class RuntimeLimits:
+    """Execution-time upper bounds, layered on top of the TS-defined limits."""
+    max_operations: int = 20
+    max_items: int = 100
+    max_depth: int = 1
+    max_batch_get_items: int = 100

graphddb_runtime/per_key_cursor.py ADDED Viewed

@@ -0,0 +1,105 @@
+"""Per-key cursor envelope for batched ``range`` contract methods (issue #62,
+CQRS single-service runtime; spec ``docs/cqrs-contract.md``,
+"Pagination under batch + list").
+A ``range`` contract method paginates **per key**: each key owns its own
+connection and therefore its own pagination position. The proposal requires that
+"the cursor envelope must carry the key it belongs to" so a caller can never
+accidentally resume one key's pagination against another key.
+This module wraps the **inner** page cursor (the base64url-encoded DynamoDB
+``LastEvaluatedKey`` produced by :func:`encode_cursor`) together with a **stable
+identity of the owning key** into a single opaque envelope, itself base64url
+JSON. It is a byte-for-byte port of the TypeScript ``src/runtime/per-key-cursor.ts``
+so TS and Python mint and accept the *same* cursor strings (the parity
+foundation conformance #65 will lock):
+- :func:`serialize_contract_key` — canonical, field-sorted JSON identity of a
+  key (matching the TS ``serializeContractKey``).
+- :func:`encode_per_key_cursor` — ``{key, inner}`` envelope → base64url string,
+  or ``None`` when there is no further page.
+- :func:`decode_per_key_cursor` — decode + **verify** the envelope belongs to the
+  key being read.
+Even the single-key range form is wrapped, keeping the cursor shape uniform
+(a single contract's range method is ``inputArity: 'single'`` — see the runtime's
+``execute_query_method`` — so the array fan-out is #63 territory, but the
+envelope shape does not branch on arity).
+"""
+from __future__ import annotations
+import json
+from typing import Any, Mapping, Optional
+from .cursor import decode_cursor, encode_cursor
+def serialize_contract_key(key: Mapping[str, Any]) -> str:
+    """Canonical, cross-runtime-stable string identity of a contract key.
+    Object fields are sorted by name so ``{a, b}`` and ``{b, a}`` serialize
+    identically; values are emitted via ``json.dumps`` with compact separators.
+    Matches the TS ``serializeContractKey`` (``JSON.stringify`` over a
+    field-sorted object), e.g. ``{"categoryId":"tech"}``.
+    """
+    ordered = {field: key[field] for field in sorted(key.keys())}
+    return json.dumps(ordered, separators=(",", ":"), ensure_ascii=False)
+def encode_per_key_cursor(
+    key: Mapping[str, Any], inner: Optional[str]
+) -> Optional[str]:
+    """Build a per-key cursor envelope from the owning key and an inner page
+    cursor, encoded as a single opaque base64url string.
+    Returns ``None`` when ``inner`` is ``None`` (the key has no further pages — a
+    terminal connection has a ``None`` cursor, never an envelope wrapping
+    nothing).
+    """
+    if inner is None:
+        return None
+    envelope = {"key": serialize_contract_key(key), "inner": inner}
+    return encode_cursor(envelope)
+def decode_per_key_cursor(cursor: str, expected_key: Mapping[str, Any]) -> str:
+    """Decode a per-key cursor envelope and **verify** it belongs to
+    ``expected_key``.
+    A cursor minted for one key fed back for another is a caller error (it would
+    silently resume the wrong key's pagination), so it is rejected.
+    Returns the inner page cursor to hand to the underlying ``Query``.
+    :raises ValueError: if the envelope is malformed, or its key identity does
+        not match ``expected_key``.
+    """
+    try:
+        envelope = decode_cursor(cursor)
+    except Exception as exc:  # noqa: BLE001 - any decode failure is a bad cursor
+        raise ValueError(
+            "Invalid per-key cursor: the value passed as `after` is not a cursor "
+            "minted by this runtime (it failed to decode)."
+        ) from exc
+    if (
+        not isinstance(envelope, dict)
+        or not isinstance(envelope.get("key"), str)
+        or not isinstance(envelope.get("inner"), str)
+    ):
+        raise ValueError(
+            "Invalid per-key cursor: the decoded envelope is missing its key / "
+            "inner fields. A range cursor must be a per-key envelope minted by "
+            "this runtime."
+        )
+    expected = serialize_contract_key(expected_key)
+    if envelope["key"] != expected:
+        raise ValueError(
+            f"Per-key cursor mismatch: the supplied `after` cursor belongs to key "
+            f"{envelope['key']}, but the method is being called for key {expected}. "
+            f"A range cursor may only resume pagination of the same key it was "
+            f"issued for."
+        )
+    return envelope["inner"]

graphddb_runtime/relations.py ADDED Viewed

@@ -0,0 +1,199 @@
+"""Multi-operation relation traversal / result assembly (issue #45).
+The single-operation core (#44) executes one ``OperationSpec`` and returns its
+result. A relation query is expressed (by the #42 static planner) as **several**
+operations whose ``resultPath`` / ``{result.<sourceField>}`` templates wire them
+into a tree, mirroring the TypeScript runtime semantics:
+- ``hasMany`` → a per-parent ``Query`` (with an optional ``begins_with`` range
+  and server-side ``FilterExpression``) producing a ``{items, cursor}``
+  connection (``src/relation/traversal.ts`` ``resolveRelations`` hasMany branch);
+- ``belongsTo`` / ``hasOne`` → a single ``BatchGetItem`` over **all** parents'
+  child keys, with **dedup**, 100-key chunking, and ``UnprocessedKeys``
+  exponential-backoff retry, matched back to parents by key (no per-parent
+  ``GetItem`` — N+1 avoided), mirroring ``planBatchGetForQueryKeys`` +
+  ``executeBatchGet`` + ``batchGetChunkWithRetry``.
+This module owns the orchestration; the executing runtime injects callables for
+the actual boto3 work so the relation logic stays testable without a client.
+## resultPath grammar
+A ``resultPath`` is ``$`` (root) or ``$`` followed by ``.``-separated tokens.
+A trailing ``items`` token means the write target is a hasMany **connection**
+(``{items, cursor}``); the token immediately before ``items`` is the property
+name. Otherwise the final token is the property name for a single-value
+(belongsTo / hasOne) relation. ``items`` tokens in the interior mean "iterate
+into the elements of that connection".
+Examples (root op already placed at ``$``):
+- ``$.members.items``                              → root.members = connection
+- ``$.groups.items.group``                         → for each g in
+  root.groups.items: g.group = item|None
+- ``$.groups.items.group.permissions.items``       → for each non-null
+  root.groups.items[*].group: .permissions = connection
+"""
+from __future__ import annotations
+from typing import Any, Callable, Dict, List, Optional, Tuple
+ITEMS = "items"
+def parse_result_path(path: str) -> Tuple[List[str], str, bool]:
+    """Split a ``resultPath`` into (parent_tokens, write_key, is_connection).
+    ``parent_tokens`` is the token sequence to navigate from the root to the set
+    of parent nodes the operation writes onto (``items`` tokens iterate into
+    connections). ``write_key`` is the property each parent gets; ``is_connection``
+    is True when the write target is a hasMany ``{items, cursor}`` connection.
+    """
+    if path == "$" or path == "":
+        raise ValueError("root operation has no relation path")
+    if not path.startswith("$."):
+        raise ValueError(f"unsupported resultPath {path!r}")
+    tokens = path[2:].split(".")
+    if tokens[-1] == ITEMS:
+        # ...<prop>.items  → connection written at <prop>.
+        if len(tokens) < 2:
+            raise ValueError(f"malformed resultPath {path!r}")
+        write_key = tokens[-2]
+        parent_tokens = tokens[:-2]
+        return parent_tokens, write_key, True
+    # ...<prop>  → single-value relation written at <prop>.
+    write_key = tokens[-1]
+    parent_tokens = tokens[:-1]
+    return parent_tokens, write_key, False
+def collect_parents(root: Any, parent_tokens: List[str]) -> List[Dict[str, Any]]:
+    """Navigate ``parent_tokens`` from ``root`` to the list of parent dicts.
+    ``items`` tokens expand into each element of the connection they follow.
+    ``None`` nodes (an unresolved belongsTo) are skipped, so a downstream
+    relation simply has no parent to attach to.
+    """
+    nodes: List[Any] = [root]
+    i = 0
+    while i < len(parent_tokens):
+        token = parent_tokens[i]
+        nxt: List[Any] = []
+        if token == ITEMS:
+            for node in nodes:
+                if isinstance(node, dict):
+                    nxt.extend(node.get(ITEMS, []) or [])
+        else:
+            for node in nodes:
+                if isinstance(node, dict):
+                    child = node.get(token)
+                    if child is not None:
+                        nxt.append(child)
+        nodes = nxt
+        i += 1
+    return [n for n in nodes if isinstance(n, dict)]
+class RelationAssembler:
+    """Assembles a multi-operation result tree from per-operation executors.
+    The runtime supplies three callables (so this stays client-free):
+    - ``run_query(op, source_values) -> {"items": [...], "cursor": str|None}``
+      executes a single per-parent hasMany Query for one resolved source value.
+    - ``run_batch_get(op, source_values) -> {serialized_key: item}`` executes a
+      deduped, chunked, retrying BatchGetItem over **all** parents' source values
+      and returns a map from a serialized child key back to the resolved item.
+    - ``key_for(op, source_value) -> serialized_key`` produces the same
+      serialized key ``run_batch_get`` uses, for matching items back to parents.
+    """
+    def __init__(
+        self,
+        *,
+        run_query: Callable[[Dict[str, Any], Dict[str, Any]], Dict[str, Any]],
+        run_batch_get: Callable[
+            [Dict[str, Any], List[Dict[str, Any]]], Dict[str, Any]
+        ],
+        key_for: Callable[[Dict[str, Any], Dict[str, Any]], str],
+    ) -> None:
+        self._run_query = run_query
+        self._run_batch_get = run_batch_get
+        self._key_for = key_for
+    def apply(self, root: Any, op: Dict[str, Any]) -> None:
+        """Resolve one relation operation and merge its results into ``root``."""
+        parent_tokens, write_key, is_connection = parse_result_path(
+            op["resultPath"]
+        )
+        parents = collect_parents(root, parent_tokens)
+        if not parents:
+            return
+        if op["type"] == "BatchGetItem":
+            self._apply_batch_get(parents, op, write_key)
+        else:  # Query (hasMany)
+            self._apply_query(parents, op, write_key)
+    # ── belongsTo / hasOne: a single BatchGetItem across all parents ────────────
+    def _apply_batch_get(
+        self,
+        parents: List[Dict[str, Any]],
+        op: Dict[str, Any],
+        write_key: str,
+    ) -> None:
+        source_field = op["sourceField"]
+        # Gather every parent's source value; parents missing it resolve to None.
+        source_values: List[Dict[str, Any]] = []
+        seen_sources: set = set()
+        for parent in parents:
+            sv = self._source_values(parent, source_field)
+            if sv is None:
+                continue
+            marker = self._key_for(op, sv)
+            if marker in seen_sources:
+                continue
+            seen_sources.add(marker)
+            source_values.append(sv)
+        key_to_item = self._run_batch_get(op, source_values)
+        for parent in parents:
+            sv = self._source_values(parent, source_field)
+            if sv is None:
+                parent[write_key] = None
+                continue
+            parent[write_key] = key_to_item.get(self._key_for(op, sv))
+    # ── hasMany: a per-parent Query ─────────────────────────────────────────────
+    def _apply_query(
+        self,
+        parents: List[Dict[str, Any]],
+        op: Dict[str, Any],
+        write_key: str,
+    ) -> None:
+        source_field = op["sourceField"]
+        for parent in parents:
+            sv = self._source_values(parent, source_field)
+            if sv is None:
+                parent[write_key] = {"items": [], "cursor": None}
+                continue
+            parent[write_key] = self._run_query(op, sv)
+    @staticmethod
+    def _source_values(parent: Dict[str, Any], source_field: str) -> Optional[
+        Dict[str, Any]
+    ]:
+        """Return the ``{source_field: value}`` binding for a parent, or None.
+        ``None`` (value absent / null) means the relation cannot be resolved for
+        this parent — it gets ``None`` (belongsTo) or an empty connection
+        (hasMany), matching the TS ``hasCompleteQueryKey`` guard.
+        """
+        value = parent.get(source_field)
+        if value is None:
+            return None
+        return {source_field: value}