PyPI - pycograd - Versions diffs - 0.0.1__py3-none-any.whl - Mend

pycograd 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

pycograd/__init__.py +390 -0
pycograd/_constraints.py +148 -0
pycograd/_dims.py +453 -0
pycograd/_typing.py +97 -0
pycograd/_version.py +669 -0
pycograd/ad_graph.py +372 -0
pycograd/backends/__init__.py +302 -0
pycograd/backends/abstract_backend.py +73 -0
pycograd/backends/cupy_backend.py +47 -0
pycograd/backends/jax_backend.py +207 -0
pycograd/backends/mps_backend.py +47 -0
pycograd/backends/numpy_backend.py +66 -0
pycograd/backends/tf_backend.py +407 -0
pycograd/backends/torch_backend.py +482 -0
pycograd/batching.py +638 -0
pycograd/capture.py +527 -0
pycograd/checkpoint.py +420 -0
pycograd/compile.py +199 -0
pycograd/cost.py +548 -0
pycograd/data.py +115 -0
pycograd/dtypes.py +152 -0
pycograd/examples/__init__.py +12 -0
pycograd/examples/__main__.py +242 -0
pycograd/examples/models.py +953 -0
pycograd/export.py +121 -0
pycograd/extension.py +137 -0
pycograd/forward.py +683 -0
pycograd/functional.py +808 -0
pycograd/ops.py +1575 -0
pycograd/optimizers.py +284 -0
pycograd/params.py +882 -0
pycograd/passes.py +580 -0
pycograd/random.py +92 -0
pycograd/remat.py +779 -0
pycograd/shapes.py +1174 -0
pycograd/tensor.py +650 -0
pycograd/trace.py +420 -0
pycograd/tracer.py +531 -0
pycograd/training.py +136 -0
pycograd/transforms.py +1078 -0
pycograd/transpose.py +167 -0
pycograd/tree.py +109 -0
pycograd/version.py +18 -0
pycograd-0.0.1.dist-info/METADATA +324 -0
pycograd-0.0.1.dist-info/RECORD +48 -0
pycograd-0.0.1.dist-info/WHEEL +5 -0
pycograd-0.0.1.dist-info/licenses/docs/LICENSE.txt +11 -0
pycograd-0.0.1.dist-info/top_level.txt +1 -0

pycograd/__init__.py ADDED Viewed

@@ -0,0 +1,390 @@
+# -*- coding: utf-8 -*-
+"""pycograd: a small, readable reverse-mode autograd built on numpy and pyccolo.
+Write ordinary numeric Python -- including ``numpy`` calls like ``np.exp``,
+``np.dot``, ``np.sum`` and operators like ``@`` -- and get correct gradients.
+``Var`` is the reverse-mode tape node; ``value_and_grad`` / ``grad`` wrap a
+function to return gradients with the same pytree structure as its arguments.
+"""
+from importlib.metadata import PackageNotFoundError, version
+from pycograd import random
+from pycograd._typing import Operand, Tensor
+from pycograd.ad_graph import grad_graph, jit
+from pycograd.backends import activate, device, get_backend
+from pycograd.capture import Graph, capture, eval_graph
+from pycograd.checkpoint import checkpoint
+from pycograd.compile import compile_to
+from pycograd.cost import (
+    DEFAULT_COST_MODEL,
+    CostModel,
+    GraphCost,
+    NodeCost,
+    calibrate,
+    cost_report,
+)
+from pycograd.data import DataLoader, batches
+from pycograd.dtypes import current_dtype, dtype, resolve_dtype
+from pycograd.export import export_onnx, export_torchscript, to_torch_module
+from pycograd.extension import load_ipython_extension, unload_ipython_extension
+from pycograd.functional import (
+    avg_pool2d,
+    batch_norm,
+    batch_norm_init,
+    causal_conv1d,
+    conv1d,
+    conv2d,
+    conv_transpose1d,
+    conv_transpose2d,
+    cross_entropy,
+    dropout,
+    elu,
+    embedding,
+    gelu,
+    group_norm,
+    hardsigmoid,
+    hardswish,
+    instance_norm,
+    layer_norm,
+    leaky_relu,
+    linear,
+    log_softmax,
+    logsumexp,
+    max_pool2d,
+    mish,
+    multi_head_attention,
+    one_hot,
+    relu,
+    rms_norm,
+    scaled_dot_product_attention,
+    selu,
+    sigmoid,
+    silu,
+    softmax,
+    softplus,
+    softsign,
+    streaming_conv1d,
+    streaming_conv1d_init,
+    streaming_conv2d,
+    streaming_conv2d_init,
+    streaming_conv_transpose1d,
+    streaming_conv_transpose1d_init,
+    streaming_conv_transpose2d,
+    streaming_conv_transpose2d_init,
+    swish,
+    tanh,
+    upsample_nearest2d,
+)
+from pycograd.ops import (
+    AutodiffWarning,
+    d_abs,
+    d_arctan,
+    d_clip,
+    d_column_stack,
+    d_concatenate,
+    d_cos,
+    d_cosh,
+    d_cumsum,
+    d_dstack,
+    d_einsum,
+    d_exp,
+    d_expand_dims,
+    d_expm1,
+    d_gated_act,
+    d_hstack,
+    d_log,
+    d_log1p,
+    d_logsumexp,
+    d_max,
+    d_maximum,
+    d_mean,
+    d_min,
+    d_minimum,
+    d_reciprocal,
+    d_reshape,
+    d_sigmoid,
+    d_sin,
+    d_sinh,
+    d_softmax,
+    d_sqrt,
+    d_square,
+    d_stack,
+    d_std,
+    d_sum,
+    d_tanh,
+    d_transpose,
+    d_var,
+    d_vstack,
+    d_where,
+)
+from pycograd.optimizers import (
+    SGD,
+    Adam,
+    AdamW,
+    Optimizer,
+    clip_grad_norm,
+    constant_lr,
+    cosine_decay,
+    step_decay,
+)
+from pycograd.params import (
+    Param,
+    ParamDict,
+    Weight,
+    buffer,
+    frozen,
+    on_cpu,
+    on_device,
+    param_values,
+    params,
+    register_pipescript_params_macro,
+    tied,
+)
+from pycograd.passes import optimize
+from pycograd.remat import (
+    Decision,
+    RematPlan,
+    apply_remat_plan,
+    eval_scheduled,
+    plan_remat,
+)
+from pycograd.shapes import (
+    Dim,
+    ShapedArray,
+    ShapeDtypeStruct,
+    ShapeError,
+    Summary,
+    bind,
+    eval_shape,
+    infer_shapes,
+    substitute,
+    summary,
+)
+from pycograd.tensor import Var, detach
+from pycograd.tracer import AutodiffTracer, resolve_call
+from pycograd.training import accuracy, fit, train
+from pycograd.transforms import (
+    grad,
+    gradient_descent,
+    jacfwd,
+    jacrev,
+    jvp,
+    value_and_grad,
+    vmap,
+)
+from pycograd.tree import (
+    sgd_update,
+    tree_flatten,
+    tree_leaves,
+    tree_map,
+    tree_structure,
+    tree_unflatten,
+)
+# Friendly aliases for the fused primitives (also reached via ``np.einsum`` / ``np.cumsum``
+# interception); these read more naturally than the ``d_`` names at a call site.
+einsum = d_einsum
+cumsum = d_cumsum
+gated_act = d_gated_act  # tanh(f) * sigmoid(s), the WaveNet / GLU gate
+try:
+    __version__ = version("pycograd")
+except PackageNotFoundError:  # not installed (e.g. running from a source checkout)
+    __version__ = "0.0.0+unknown"
+__all__ = [
+    "__version__",
+    # core
+    "Var",
+    "detach",
+    "Tensor",
+    "Operand",
+    # parameters
+    "Param",
+    "ParamDict",
+    "Weight",
+    "frozen",
+    "buffer",
+    "tied",
+    "on_cpu",
+    "on_device",
+    "params",
+    "param_values",
+    # transforms / training
+    "value_and_grad",
+    "grad",
+    "checkpoint",
+    "vmap",
+    "jvp",
+    "jacfwd",
+    "jacrev",
+    "gradient_descent",
+    "sgd_update",
+    "train",
+    "fit",
+    "accuracy",
+    # shape inference
+    "eval_shape",
+    "infer_shapes",
+    "substitute",
+    "bind",
+    "summary",
+    "Summary",
+    "ShapeDtypeStruct",
+    "ShapedArray",
+    "ShapeError",
+    "Dim",
+    # compile to other frameworks (torch / tf / jax)
+    "compile_to",
+    "get_backend",
+    # graph-capture IR + optimization passes
+    "capture",
+    "eval_graph",
+    "optimize",
+    "grad_graph",
+    "jit",
+    "Graph",
+    # static cost model over the capture IR (CPU / memory / disk)
+    "cost_report",
+    "CostModel",
+    "GraphCost",
+    "NodeCost",
+    "DEFAULT_COST_MODEL",
+    "calibrate",
+    # rematerialization / spill planning + memory-managed execution
+    "plan_remat",
+    "RematPlan",
+    "Decision",
+    "apply_remat_plan",
+    "eval_scheduled",
+    # device / array backend seam (numpy default, cupy for GPU)
+    "device",
+    "activate",
+    # working-dtype seam (float64 default; float32 / float16 / bfloat16)
+    "dtype",
+    "current_dtype",
+    "resolve_dtype",
+    # static export (standalone artifacts)
+    "to_torch_module",
+    "export_torchscript",
+    "export_onnx",
+    # optimizers
+    "Optimizer",
+    "SGD",
+    "Adam",
+    "AdamW",
+    "clip_grad_norm",
+    "constant_lr",
+    "step_decay",
+    "cosine_decay",
+    # neural-net ops (stable softmax family, cross-entropy, activations)
+    "softmax",
+    "log_softmax",
+    "logsumexp",
+    "cross_entropy",
+    "relu",
+    "sigmoid",
+    "silu",
+    "swish",
+    "gelu",
+    "tanh",
+    "leaky_relu",
+    "elu",
+    "softplus",
+    "mish",
+    "hardswish",
+    "hardsigmoid",
+    "softsign",
+    "selu",
+    "conv1d",
+    "conv2d",
+    "causal_conv1d",
+    "conv_transpose1d",
+    "conv_transpose2d",
+    "streaming_conv1d",
+    "streaming_conv1d_init",
+    "streaming_conv2d",
+    "streaming_conv2d_init",
+    "streaming_conv_transpose1d",
+    "streaming_conv_transpose1d_init",
+    "streaming_conv_transpose2d",
+    "streaming_conv_transpose2d_init",
+    "upsample_nearest2d",
+    "max_pool2d",
+    "avg_pool2d",
+    "one_hot",
+    # neural-net layers (normalization, attention, embedding, linear, dropout)
+    "layer_norm",
+    "rms_norm",
+    "batch_norm",
+    "batch_norm_init",
+    "group_norm",
+    "instance_norm",
+    "scaled_dot_product_attention",
+    "multi_head_attention",
+    "embedding",
+    "linear",
+    "dropout",
+    # splittable PRNG keys (pycograd.random: key / split / fold_in + samplers)
+    "random",
+    # data / batching
+    "batches",
+    "DataLoader",
+    # pytrees
+    "tree_flatten",
+    "tree_unflatten",
+    "tree_leaves",
+    "tree_structure",
+    "tree_map",
+    # tracer / interception
+    "AutodiffTracer",
+    "resolve_call",
+    "AutodiffWarning",
+    "register_pipescript_params_macro",
+    # ipython / jupyter extension
+    "load_ipython_extension",
+    "unload_ipython_extension",
+    # differentiable primitives
+    "d_exp",
+    "d_log",
+    "d_sin",
+    "d_cos",
+    "d_tanh",
+    "d_sqrt",
+    "d_sigmoid",
+    "d_abs",
+    "d_square",
+    "d_sinh",
+    "d_cosh",
+    "d_arctan",
+    "d_log1p",
+    "d_expm1",
+    "d_reciprocal",
+    "d_maximum",
+    "d_minimum",
+    "d_clip",
+    "d_where",
+    "d_sum",
+    "d_mean",
+    "d_var",
+    "d_std",
+    "d_max",
+    "d_min",
+    "d_softmax",
+    "d_logsumexp",
+    "d_concatenate",
+    "d_transpose",
+    "d_reshape",
+    "d_expand_dims",
+    "d_stack",
+    "d_vstack",
+    "d_hstack",
+    "d_column_stack",
+    "d_dstack",
+    "d_einsum",
+    "einsum",
+    "d_cumsum",
+    "cumsum",
+    "d_gated_act",
+    "gated_act",
+]

pycograd/_constraints.py ADDED Viewed

@@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+"""Dimension-equality constraints for shape polymorphism.
+When shape inference runs over *symbolic* input dims (e.g. a batch ``B`` declared via
+``ShapeDtypeStruct(("B", 768))``), each contraction registers an equality: a matmul
+asserts its inner dims equal, concatenate asserts its non-axis dims equal, broadcasting
+asserts compatible dims equal. :class:`ConstraintEnv` is the union-find that records
+those equalities, refines a symbol pinned to a concrete (``K`` forced to ``4``), and
+reports a contradiction (two concretes forced equal) as a shape error.
+Only *solvable* symbols -- caller-declared input dims, whose key is a ``str`` -- get
+bound to concretes or merged. *Data-dependent* symbols (a mask count, a broadcast;
+their key is a tuple) are runtime facts, not statically known, so they are left opaque,
+preserving the optimistic "carry it forward" behavior of plain symbolic inference.
+"""
+from __future__ import annotations
+import contextlib
+import contextvars
+from typing import Any, Hashable, Iterator, cast
+from pycograd import _dims
+from pycograd._dims import Dim
+def _as_atom(x: int | Dim) -> tuple:
+    """Classify a dim as ``("int", v)``, ``("sym", key, name)``, or ``("expr",)``."""
+    if isinstance(x, Dim):
+        s = x.as_symbol()
+        return ("sym", s[0], s[1]) if s is not None else ("expr",)
+    if _dims._is_int(x):
+        return ("int", int(cast(Any, x)))
+    return ("expr",)
+class ConstraintEnv:
+    """Union-find over symbol keys with at most one concrete value per class."""
+    def __init__(self) -> None:
+        self.parent: dict[Hashable, Hashable] = {}  # key -> parent key
+        self.value: dict[Hashable, int] = {}  # root key -> concrete int
+        self.name: dict[Hashable, str] = {}  # key -> rendered name
+    def _add(self, key: Hashable, name: str) -> None:
+        if key not in self.parent:
+            self.parent[key] = key
+            self.name[key] = name
+    def _find(self, key: Hashable) -> Hashable:
+        root = key
+        while self.parent[root] != root:
+            root = self.parent[root]
+        while self.parent[key] != root:  # path compression
+            self.parent[key], key = root, self.parent[key]
+        return root
+    @staticmethod
+    def _solvable(key: Hashable) -> bool:
+        # Caller-declared input dims have string keys; data-dependent symbols (nonzero,
+        # bcast, slice) have tuple keys and are never bound/merged.
+        return isinstance(key, str)
+    def assert_eq(self, a: int | Dim, b: int | Dim) -> bool:
+        """Record ``a == b``; return ``False`` if that is a provable contradiction."""
+        ta, tb = _as_atom(a), _as_atom(b)
+        if ta[0] == "int" and tb[0] == "int":
+            return ta[1] == tb[1]
+        if ta[0] == "int" and tb[0] == "sym":
+            return self._bind(tb, ta[1])
+        if ta[0] == "sym" and tb[0] == "int":
+            return self._bind(ta, tb[1])
+        if ta[0] == "sym" and tb[0] == "sym":
+            return self._union(ta, tb)
+        return True  # an expression is involved -- can't reason, stay optimistic
+    def _bind(self, sym: tuple, val: int) -> bool:
+        _, key, name = sym
+        if not self._solvable(key):
+            return True  # data-dependent: a runtime fact, never statically pinned
+        self._add(key, name)
+        root = self._find(key)
+        cur = self.value.get(root)
+        if cur is not None and cur != val:
+            return False
+        self.value[root] = val
+        return True
+    def _union(self, s1: tuple, s2: tuple) -> bool:
+        _, k1, n1 = s1
+        _, k2, n2 = s2
+        if not (self._solvable(k1) and self._solvable(k2)):
+            return True  # leave data-dependent symbols opaque
+        self._add(k1, n1)
+        self._add(k2, n2)
+        r1, r2 = self._find(k1), self._find(k2)
+        if r1 == r2:
+            return True
+        v1, v2 = self.value.get(r1), self.value.get(r2)
+        if v1 is not None and v2 is not None and v1 != v2:
+            return False
+        self.parent[r2] = r1
+        if v1 is None and v2 is not None:
+            self.value[r1] = v2
+        return True
+    def mapping(self) -> dict[Hashable, int | Dim]:
+        """A substitution mapping each known symbol key to its concrete value (if its
+        class is pinned) or to its class representative symbol (if merged)."""
+        m: dict[Hashable, int | Dim] = {}
+        for key in self.parent:
+            root = self._find(key)
+            v = self.value.get(root)
+            if v is not None:
+                m[key] = v
+            elif root != key:
+                m[key] = _dims.symbol(root, name=self.name[root])
+        return m
+# ---------------------------------------------------------------------------
+# Active environment (entered for the duration of an abstract inference run).
+# ---------------------------------------------------------------------------
+_env: "contextvars.ContextVar[ConstraintEnv | None]" = contextvars.ContextVar(
+    "dim_env", default=None
+)
+@contextlib.contextmanager
+def constraint_scope() -> Iterator[ConstraintEnv]:
+    env = ConstraintEnv()
+    token = _env.set(env)
+    try:
+        yield env
+    finally:
+        _env.reset(token)
+def active_env() -> "ConstraintEnv | None":
+    return _env.get()
+def register_eq(a: int | Dim, b: int | Dim) -> bool:
+    """Register ``a == b`` with the active env (if any); ``False`` on a provable
+    contradiction. With no active env, falls back to the concrete-only check."""
+    env = _env.get()
+    if env is None:
+        return not _dims.provably_unequal(a, b)
+    return env.assert_eq(a, b)