PyPI - embedl-deploy - Versions diffs - 0.2.0__tar.gz → 0.4.0__tar.gz - Mend

embedl-deploy 0.2.0tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

embedl_deploy-0.4.0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,10 @@
+prune *
+graft src
+include LICENSE
+include NOTICE
+include README.md
+prune src/embedl_deploy/tensorrt
+prune src/embedl_deploy/_internal/tensorrt
+global-exclude CLAUDE.md
+global-exclude *.pyc
+global-exclude __pycache__

{embedl_deploy-0.2.0/src/embedl_deploy.egg-info → embedl_deploy-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: embedl-deploy
-Version: 0.2.0
+Version: 0.4.0
 Summary: Python package to make AI models deployment-ready for any hardware.
 Author-email: Embedl AB <support@embedl.com>
 Project-URL: Homepage, https://www.embedl.com/
@@ -15,7 +15,7 @@ License-File: LICENSE
 License-File: NOTICE
 Requires-Dist: torch
 Provides-Extra: tensorrt
-Requires-Dist: tensorrt; extra == "tensorrt"
+Requires-Dist: embedl-deploy-tensorrt; extra == "tensorrt"
 Dynamic: license-file
 # embedl-deploy

{embedl_deploy-0.2.0 → embedl_deploy-0.4.0}/pyproject.toml RENAMED Viewed

@@ -27,7 +27,7 @@ dynamic = ["version"]
 dependencies = ["torch"]
 [project.optional-dependencies]
-tensorrt = ["tensorrt"]
+tensorrt = ["embedl-deploy-tensorrt"]
 [project.urls]
 Homepage = "https://www.embedl.com/"
@@ -100,13 +100,58 @@ line-length = 79
 quote-style = "preserve"
 [tool.ruff.lint]
-select = [
-    # isort
-    "I",
-    # Use `from X import Y` instead of `import X.Y as Y`
-    "PLR0402",
+select = ["ALL"]
+ignore = [
+    # Dynamic attributes on fx.Node require string-based access for mypy
+    "B009", "B010",
+    # Conflicts with ruff format
+    "COM812",
+    # Descriptive exception messages preferred
+    "EM", "TRY003",
+    # Allow long lines for URLs, Sphinx cross-references, and imports
+    "E501",
+    # Too many false positives
+    "ERA001",
+    # Common in PyTorch-style APIs
+    "FBT",
+    # TODOs are fine
+    "FIX002",
+    # PyTorch naming conventions (N, C, H, W; import F)
+    "N806", "N812",
+    # Allow magic value comparisons
+    "PLR2004",
+    # Intermediate variables before return aid readability
+    "RET504",
+    # Conflicts with quote-style = "preserve"
+    "Q000",
+    # Intentional Unicode in docstrings and comments
+    "RUF002", "RUF003",
+    # Explicit if/return True/return False is clearer for predicate functions
+    "SIM103",
+    # Type-only imports are fine as regular imports
+    "TC001",
+    # Non-cryptographic random is expected in ML code
+    "S311",
+    # Prefer unquoted type expressions in cast()
+    "TC006",
+    # Clashes with dataclass and nn.Module patterns
+    "RUF012",
+    # Too prescriptive about TODO format
+    "TD",
+    # D203/D211 and D212/D213 are mutually exclusive pairs
+    "D203", "D213",
 ]
+[tool.ruff.lint.per-file-ignores]
+"src/**/*.py" = ["S101"]
+"tests/**/*.py" = ["ANN", "D103", "S101"]
+"docs/**/*.py" = ["ANN", "E402", "INP001", "S", "T201"]
+"examples/**/*.py" = ["INP001", "T201"]
+".claude/**/*.py" = ["ALL"]
+[tool.ruff.lint.pylint]
+max-args = 8
 [tool.mypy]
 ignore_missing_imports = false
 strict = true
@@ -125,5 +170,8 @@ disable_error_code = ["misc", "no-any-return"]
 module = ["embedl_deploy._internal.tensorrt.modules.*"]
 disable_error_code = ["no-any-return"]
+[tool.setuptools.package-data]
+embedl_deploy = ["py.typed"]
 [tool.setuptools.dynamic]
 version = { attr = "embedl_deploy.version.public.PUBLIC_VERSION" }

{embedl_deploy-0.2.0 → embedl_deploy-0.4.0}/src/embedl_deploy/_internal/core/backend.py RENAMED Viewed

@@ -22,7 +22,7 @@ class Backend:
     fusion_patterns: Sequence[Pattern]
     #: SmoothQuant preparation patterns.
     smooth_patterns: Sequence[Pattern]
-    #: Q/DQ stub insertion patterns for quantisation.
+    #: Q/DQ stub insertion patterns for quantization.
     quantized_patterns: Sequence[Pattern]
@@ -120,6 +120,6 @@ def set_backend(name: str) -> None:
     if name not in backends:
         available = ", ".join(sorted(backends)) or "(none)"
         raise ValueError(
-            f"Backend {name!r} not found. " f"Available backends: {available}"
+            f"Backend {name!r} not found. Available backends: {available}"
         )
     _BackendState.backend = backends[name]

{embedl_deploy-0.2.0 → embedl_deploy-0.4.0}/src/embedl_deploy/_internal/core/modules.py RENAMED Viewed

@@ -63,6 +63,10 @@ class FusedModule(nn.Module, ABC):
         self.input_quant_stubs: dict[int, QuantStub] = {
             idx: QuantStub({self}) for idx in self.inputs_to_quantize
         }
+        #: Whether this module has been surrounded with input
+        #: ``QuantStub`` entries by
+        #: :class:`~embedl_deploy._internal.tensorrt.patterns.quantizations.SurroundWithQuantStubsPattern`.
+        self.surrounded: bool = False
 class _LeafTracer(fx.Tracer):

embedl_deploy-0.4.0/src/embedl_deploy/_internal/core/pattern.py ADDED Viewed

@@ -0,0 +1,204 @@
+# Copyright (C) 2026 Embedl AB
+"""Core abstractions: Pattern base class and PatternMatch dataclass.
+Every fusion, conversion, and quantization rule is a
+:class:`~embedl_deploy._internal.core.pattern.Pattern` subclass.  The two
+methods — :meth:`~embedl_deploy._internal.core.pattern.Pattern.match` and
+:meth:`~embedl_deploy._internal.core.pattern.Pattern.replace` — encapsulate
+what to look for and how to rewrite the graph.
+"""
+from dataclasses import dataclass
+from torch import fx, nn
+from embedl_deploy._internal.core.tree.match import match_tree
+from embedl_deploy._internal.core.tree.replace import replace_tree
+from embedl_deploy._internal.core.tree.types import (
+    Graft,
+    Replacement,
+    Tree,
+    TreeMatch,
+    Wildcard,
+)
+from embedl_deploy._internal.core.tree.utils import get_module
+def _collect_modules(tree_match: TreeMatch) -> list[nn.Module | None]:
+    """Resolve matched modules from a tree match.
+    Walks nested branches first (in input order), then
+    trunk nodes.  For a
+    :class:`~embedl_deploy._internal.core.tree.types.Fork`
+    tree this means the fork-input branches precede the output
+    trunk, so the resulting list matches a constructor signature
+    like
+    ``FusedModule(branch0_mod, branch1_mod, …, output_mod)``.
+    :class:`~embedl_deploy._internal.core.tree.types.Wildcard`
+    entries with ``"?"`` quantifier that matched nothing
+    contribute ``None``.
+    :raises TypeError:
+        If a matched node is not a ``call_module`` node.
+    """
+    modules: list[nn.Module | None] = []
+    for nested in tree_match.nested:
+        modules.extend(_collect_modules(nested))
+    for entry in tree_match.trunk_nodes:
+        if isinstance(entry, Wildcard):
+            if entry.quantifier != "?":
+                raise TypeError(
+                    f"wildcard with quantifier"
+                    f" {entry.quantifier!r} is not"
+                    f" supported — graft only supports"
+                    f" '?' wildcards"
+                )
+            node = entry.nodes[0] if entry.nodes else None
+        else:
+            node = entry
+        if node is None:
+            modules.append(None)
+        else:
+            mod = get_module(node)
+            if mod is None:
+                raise TypeError(
+                    f"node {node.name!r} is not a call_module "
+                    f"node — graft only works with "
+                    f"module-only trees"
+                )
+            modules.append(mod)
+    return modules
+def _get_replacements(
+    graft: Graft,
+    tree_match: TreeMatch,
+) -> list[Replacement]:
+    """Build the replacement list from a graft specification."""
+    if isinstance(graft, tuple):
+        replacements: list[Replacement] = []
+        for rep_maker in graft:
+            replacements.extend(rep_maker(tree_match))
+        return replacements
+    modules = _collect_modules(tree_match)
+    try:
+        return [graft(*modules)]
+    except TypeError as exc:
+        raise TypeError(
+            f"{graft.__name__}() got"
+            f" {len(modules)} modules from"
+            f" the tree match — check that"
+            f" the tree shape matches the"
+            f" constructor signature"
+        ) from exc
+class Pattern:
+    """A graph transformation rule: find a sub-graph and replace it.
+    The default :meth:`match` delegates to
+    :func:`~embedl_deploy._internal.core.tree.match.match_tree` using the
+    class's :attr:`tree`.  The default :meth:`replace` constructs
+    replacements from :attr:`graft` and delegates to
+    :func:`~embedl_deploy._internal.core.tree.replace.replace_tree`.
+    Subclasses override either method when they need custom logic
+    (pre/post side-effects, post-match filtering, etc.).
+    Patterns with
+    :attr:`~embedl_deploy._internal.core.pattern.Pattern.is_conversion` set to
+    ``True`` are applied in a first pass to rewrite graph topology before
+    fusion patterns are matched.
+    """
+    tree: Tree | None = None
+    """The pattern topology to match, if using tree-based matching."""
+    graft: Graft | None = None
+    """The factories to make replacements for each matched tree, if used."""
+    is_conversion: bool = False
+    """If ``True``, this pattern is a structural conversion that must
+    be applied before fusion matching."""
+    symbolic_trace_only: bool = False
+    """If ``True``, this pattern removes nodes that are artifacts of
+    ``symbolic_trace``. This pattern has no effect on graphs exported with
+    ``torch.export`` because the nodes never appear in those graphs."""
+    export_graph_only: bool = False
+    """If ``True``, this pattern targets nodes that only appear in
+    ``torch.export`` aten graphs and has no effect on symbolic-trace output."""
+    def match(self, graph_module: fx.GraphModule) -> list["PatternMatch"]:
+        """Find all occurrences of this pattern in `graph_module`.
+        :raises ValueError:
+            If the pattern has no ``tree``.
+        """
+        tree = self.tree
+        if tree is None:
+            raise ValueError(f"{type(self).__name__} has no tree to match.")
+        tree_matches = match_tree(graph_module, tree)
+        return [
+            PatternMatch(
+                pattern=self,
+                graph_module=graph_module,
+                tree_match=tm,
+            )
+            for tm in tree_matches
+        ]
+    def replace(
+        self,
+        pattern_match: "PatternMatch",
+    ) -> list[fx.Node]:
+        """Replace one matched occurrence in-place.
+        :param pattern_match:
+            The pattern match to replace.
+        :returns:
+            The replacement nodes inserted into the graph.
+        :raises ValueError:
+            If the pattern has no ``graft``.
+        :raises TypeError:
+            If the ``graft`` class constructor rejects the
+            collected modules.
+        """
+        assert pattern_match.pattern is self
+        tree_match = pattern_match.tree_match
+        graft = self.graft
+        if graft is None:
+            raise ValueError(
+                f"{type(self).__name__} has no graft"
+                f" — override replace() or set graft."
+            )
+        replacements = _get_replacements(graft, tree_match)
+        return replace_tree(
+            pattern_match.graph_module, tree_match, replacements
+        )
+@dataclass
+class PatternMatch:
+    """One matched occurrence of a ``Pattern`` in a graph."""
+    #: The pattern that produced this match.
+    pattern: Pattern
+    #: The graph module that produced this match.
+    graph_module: fx.GraphModule
+    #: Structured match result produced by
+    #: :func:`~embedl_deploy._internal.core.tree.match.match_tree`.
+    #: Contains the matched nodes, modules, and nested per-branch
+    #: sub-matches for
+    #: :class:`~embedl_deploy._internal.core.tree.types.Fork`
+    #: topologies.
+    tree_match: TreeMatch
+    #: Whether to apply this match during transformation.
+    apply: bool = True
+    def __repr__(self) -> str:
+        pat = type(self.pattern).__name__
+        node_names = [n.name for n in self.tree_match.get_tree_nodes()]
+        return f"PatternMatch({pat}: {' -> '.join(node_names)})"

{embedl_deploy-0.2.0 → embedl_deploy-0.4.0}/src/embedl_deploy/_internal/core/plan.py RENAMED Viewed

@@ -144,11 +144,24 @@ def get_transformation_plan(
         for node, pats in plan.matches.items():
             for name, match in pats.items():
                 print(f"{node}: {name} apply={match.apply}")
     """
     if not getattr(graph_module, "_deep_copy_done", False):
         graph_module = copy.deepcopy(graph_module)
         setattr(graph_module, "_deep_copy_done", True)
+    # Strip torch.export shape-guard nodes that ShapeProp cannot evaluate.
+    guards = [
+        n
+        for n in graph_module.graph.nodes
+        if n.op == "call_module" and n.name.startswith("_guards")
+    ]
+    for node in guards:
+        node.replace_all_uses_with(next(iter(node.args)))
+        graph_module.graph.erase_node(node)
+    if guards:
+        graph_module.recompile()
     pattern_matches: list[PatternMatch] = []
     for pattern in patterns:
         pattern_matches.extend(pattern.match(graph_module))
@@ -168,6 +181,53 @@ def get_transformation_plan(
     )
+def _propagate_shapes(graph_module: fx.GraphModule) -> None:
+    """Re-propagate tensor shapes after graph surgery.
+    Builds fake inputs from placeholder ``tensor_meta`` and runs
+    :class:`~torch.fx.passes.shape_prop.ShapeProp`.  Pins tensors to the
+    graph's parameter device so ``torch.export``'d graphs with
+    device-dispatched ops (e.g. SDPA) don't crash on cross-device tensors.
+    :param graph_module:
+        The graph module whose shapes should be refreshed.
+    """
+    try:
+        device = next(graph_module.parameters()).device
+    except StopIteration:
+        device = torch.device("cpu")
+    # Patterns may register new submodules (QuantStub, FusedX) whose
+    # buffers default to CPU.  Sync to the graph's device so ShapeProp
+    # doesn't hit a mixed-device forward.
+    if device.type != "cpu":
+        graph_module.to(device)
+    fake_args: list[torch.Tensor] = []
+    for n in graph_module.graph.nodes:
+        if n.op != "placeholder":
+            continue
+        meta = n.meta.get("tensor_meta")
+        if meta is None or not hasattr(meta, "shape"):
+            fake_args.clear()
+            break
+        dtype = getattr(meta, "dtype", torch.float32)
+        if dtype.is_floating_point:
+            fake_args.append(
+                torch.randn(meta.shape, dtype=dtype, device=device)
+            )
+        else:
+            fake_args.append(
+                torch.zeros(meta.shape, dtype=dtype, device=device)
+            )
+    if fake_args:
+        # `no_grad` keeps ShapeProp from materialising an autograd tape
+        # for the whole forward pass — for large transformer graphs
+        # (SAM3, ViT-L, …) the activation tape can blow GPU memory.
+        with torch.no_grad():
+            ShapeProp(graph_module).propagate(*fake_args)  # type: ignore[no-untyped-call]
 def apply_transformation_plan(
     plan: TransformationPlan,
 ) -> TransformationResult:
@@ -196,6 +256,7 @@ def apply_transformation_plan(
         result = apply_transformation_plan(plan)
         print(result.report)
         torch.onnx.export(result.model, x, "deployed.onnx")
     """
     graph_module = plan.model
@@ -219,10 +280,7 @@ def apply_transformation_plan(
         graph_module.recompile()
     graph_module.eval()
-    input_node = next(iter(graph_module.graph.nodes))
-    meta = input_node.meta.get("tensor_meta")
-    if meta is not None and hasattr(meta, "shape"):
-        ShapeProp(graph_module).propagate(torch.randn(meta.shape))  # type: ignore[no-untyped-call]
+    _propagate_shapes(graph_module)
     report = _build_report(enabled, skipped)
@@ -264,6 +322,7 @@ def transform(
         from embedl_deploy.tensorrt import TENSORRT_PATTERNS
         deployable_model = transform(model, patterns=TENSORRT_PATTERNS).model
     """
     graph_module = (
         model if isinstance(model, fx.GraphModule) else symbolic_trace(model)

{embedl_deploy-0.2.0 → embedl_deploy-0.4.0}/src/embedl_deploy/_internal/core/quantize/calibrate.py RENAMED Viewed

@@ -46,12 +46,13 @@ def calibrate_smooth_quant(
     if not mq.smooth:
         return
-    hooks = []
     for stub in enabled_stubs:
         stub.enabled = False
-    for obs in mq.smooth:
-        if obs.downstream_linears:
-            hooks.append(obs.register_forward_hook())
+    hooks = [
+        obs.register_forward_hook()
+        for obs in mq.smooth
+        if obs.downstream_linears
+    ]
     try:
         model.eval()

{embedl_deploy-0.2.0 → embedl_deploy-0.4.0}/src/embedl_deploy/_internal/core/quantize/main.py RENAMED Viewed

@@ -86,9 +86,23 @@ def configure(
         if obs.enabled:
             obs.config = copy.copy(config.smooth_quant)
+    # Snapshot the model device before insertion so newly-created
+    # QuantStub / WeightFakeQuantize / observer buffers can be moved
+    # to the same device as the rest of the graph after the
+    # smooth-quant / Q-DQ insertion passes run. Without this, models
+    # already on CUDA hit "scale on cpu vs other tensors on cuda" in
+    # ``fake_quantize_per_tensor_affine_cachemask_tensor_qparams``.
+    try:
+        device = next(model.parameters()).device
+    except StopIteration:
+        device = torch.device("cpu")
     prepare_smooth_quant(model)
     prepare_qdq(model)
+    if device.type != "cpu":
+        model.to(device)
 def quantize(
     model: fx.GraphModule,
@@ -153,7 +167,7 @@ def freeze_weight_quantization(model: fx.GraphModule) -> None:
     mq = get_model_quants(model)
     for wfq in mq.weight:
-        mod = list(wfq.consumers)[0]
+        mod = next(iter(wfq.consumers))
         weight = _get_quantized_weight(mod)
         if isinstance(weight, torch.Tensor):
             wfq.freeze(weight)

{embedl_deploy-0.2.0 → embedl_deploy-0.4.0}/src/embedl_deploy/_internal/core/quantize/stubs.py RENAMED Viewed

@@ -159,7 +159,8 @@ class WeightFakeQuantize(nn.Module):
             return weight
         if self.frozen:
             scale, zero_point = self.scale, self.zero_point
-            assert scale is not None and zero_point is not None
+            assert scale is not None
+            assert zero_point is not None
         else:
             scale, zero_point = self._compute_quant_params(weight)
         q_min, q_max = self.config.quant_min, self.config.quant_max

embedl_deploy-0.4.0/src/embedl_deploy/_internal/core/tree/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+# Copyright (C) 2026 Embedl AB
+"""Tree-based pattern topology DSL, matching, and replacement."""

embedl-deploy 0.2.0__tar.gz → 0.4.0__tar.gz

embedl-deploy 0.2.0tar.gz → 0.4.0tar.gz