PyPI - causal-conv1d - Versions diffs - 1.1.3.post1__tar.gz → 1.2.0.post1__tar.gz - Mend

causal-conv1d 1.1.3.post1tar.gz → 1.2.0.post1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{causal_conv1d-1.1.3.post1 → causal_conv1d-1.2.0.post1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: causal_conv1d
-Version: 1.1.3.post1
+Version: 1.2.0.post1
 Summary: Causal depthwise conv1d in CUDA, with a PyTorch interface
 Home-page: https://github.com/Dao-AILab/causal-conv1d
 Author: Tri Dao

{causal_conv1d-1.1.3.post1 → causal_conv1d-1.2.0.post1}/causal_conv1d/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
-__version__ = "1.1.3.post1"
+__version__ = "1.2.0.post1"
 from causal_conv1d.causal_conv1d_interface import causal_conv1d_fn, causal_conv1d_update

causal_conv1d-1.2.0.post1/causal_conv1d/causal_conv1d_interface.py ADDED Viewed

@@ -0,0 +1,213 @@
+# Copyright (c) 2024, Tri Dao.
+import torch
+import torch.nn.functional as F
+import causal_conv1d_cuda
+class CausalConv1dFn(torch.autograd.Function):
+    @staticmethod
+    def forward(
+        ctx,
+        x,
+        weight,
+        bias=None,
+        seq_idx=None,
+        initial_states=None,
+        return_final_states=False,
+        final_states_out=None,
+        activation=None,
+    ):
+        if activation not in [None, "silu", "swish"]:
+            raise NotImplementedError("activation must be None, silu, or swish")
+        if x.stride(2) != 1 and x.stride(1) != 1:
+            x = x.contiguous()
+        bias = bias.contiguous() if bias is not None else None
+        if seq_idx is not None:
+            assert (
+                initial_states is None
+            ), "initial_states must be None if seq_idx is not None"
+            assert (
+                not return_final_states
+            ), "If seq_idx is not None, we don't return final_states_out"
+        seq_idx = seq_idx.contiguous() if seq_idx is not None else None
+        if initial_states is not None and (
+            initial_states.stride(2) != 1 and initial_states.stride(1) != 1
+        ):
+            initial_states = initial_states.contiguous()
+        if return_final_states:
+            assert (
+                x.stride(1) == 1
+            ), "Only channel-last layout support returning final_states_out"
+            if final_states_out is not None:
+                assert (
+                    final_states_out.stride(2) == 1 or final_states_out.stride(1) == 1
+                )
+            else:
+                batch, dim, seqlen = x.shape
+                width = weight.shape[1]
+                final_states_out = torch.empty(
+                    batch, width - 1, dim, device=x.device, dtype=x.dtype
+                ).transpose(1, 2)
+        else:
+            final_states_out = None
+        ctx.activation = activation in ["silu", "swish"]
+        out = causal_conv1d_cuda.causal_conv1d_fwd(
+            x, weight, bias, seq_idx, initial_states, final_states_out, ctx.activation
+        )
+        ctx.save_for_backward(x, weight, bias, seq_idx, initial_states)
+        ctx.return_final_states = return_final_states
+        ctx.return_dinitial_states = (
+            initial_states is not None and initial_states.requires_grad
+        )
+        return out if not return_final_states else (out, final_states_out)
+    @staticmethod
+    def backward(ctx, dout, *args):
+        x, weight, bias, seq_idx, initial_states = ctx.saved_tensors
+        dfinal_states = args[0] if ctx.return_final_states else None
+        if dout.stride(2) != 1 and dout.stride(1) != 1:
+            dout = dout.contiguous()
+        # The kernel supports passing in a pre-allocated dx (e.g., in case we want to fuse the
+        # backward of conv1d with the backward of chunk).
+        # Here we just pass in None and dx will be allocated in the C++ code.
+        dx, dweight, dbias, dinitial_states = causal_conv1d_cuda.causal_conv1d_bwd(
+            x,
+            weight,
+            bias,
+            dout,
+            seq_idx,
+            initial_states,
+            dfinal_states,
+            None,
+            ctx.return_dinitial_states,
+            ctx.activation,
+        )
+        return (
+            dx,
+            dweight,
+            dbias if bias is not None else None,
+            None,
+            dinitial_states if initial_states is not None else None,
+            None,
+            None,
+            None,
+        )
+def causal_conv1d_fn(
+    x,
+    weight,
+    bias=None,
+    seq_idx=None,
+    initial_states=None,
+    return_final_states=False,
+    final_states_out=None,
+    activation=None,
+):
+    """
+    x: (batch, dim, seqlen)
+    weight: (dim, width)
+    bias: (dim,)
+    seq_idx: (batch, seqlen)
+    initial_states: (batch, dim, width - 1)
+    final_states_out: (batch, dim, width - 1), to be written to
+    activation: either None or "silu" or "swish"
+    out: (batch, dim, seqlen)
+    """
+    return CausalConv1dFn.apply(
+        x,
+        weight,
+        bias,
+        seq_idx,
+        initial_states,
+        return_final_states,
+        final_states_out,
+        activation,
+    )
+def causal_conv1d_ref(
+    x,
+    weight,
+    bias=None,
+    initial_states=None,
+    return_final_states=False,
+    final_states_out=None,
+    activation=None,
+):
+    """
+    x: (batch, dim, seqlen)
+    weight: (dim, width)
+    bias: (dim,)
+    initial_states: (batch, dim, width - 1)
+    final_states_out: (batch, dim, width - 1)
+    out: (batch, dim, seqlen)
+    """
+    if activation not in [None, "silu", "swish"]:
+        raise NotImplementedError("activation must be None, silu, or swish")
+    dtype_in = x.dtype
+    x = x.to(weight.dtype)
+    seqlen = x.shape[-1]
+    dim, width = weight.shape
+    if initial_states is None:
+        out = F.conv1d(x, weight.unsqueeze(1), bias, padding=width - 1, groups=dim)
+    else:
+        x = torch.cat([initial_states, x], dim=-1)
+        out = F.conv1d(x, weight.unsqueeze(1), bias, padding=0, groups=dim)
+    out = out[..., :seqlen]
+    if return_final_states:
+        final_states = F.pad(x, (width - 1 - x.shape[-1], 0)).to(
+            dtype_in
+        )  # (batch, dim, width - 1)
+        if final_states_out is not None:
+            final_states_out.copy_(final_states)
+        else:
+            final_states_out = final_states
+    out = (out if activation is None else F.silu(out)).to(dtype=dtype_in)
+    return out if not return_final_states else (out, final_states_out)
+def causal_conv1d_update(x, conv_state, weight, bias=None, activation=None):
+    """
+    x: (batch, dim)
+    conv_state: (batch, dim, width)
+    weight: (dim, width)
+    bias: (dim,)
+    out: (batch, dim)
+    """
+    if activation not in [None, "silu", "swish"]:
+        raise NotImplementedError("activation must be None, silu, or swish")
+    activation = activation in ["silu", "swish"]
+    return causal_conv1d_cuda.causal_conv1d_update(
+        x, conv_state, weight, bias, activation
+    )
+def causal_conv1d_update_ref(x, conv_state, weight, bias=None, activation=None):
+    """
+    x: (batch, dim)
+    conv_state: (batch, dim, width)
+    weight: (dim, width)
+    bias: (dim,)
+    out: (batch, dim)
+    """
+    if activation not in [None, "silu", "swish"]:
+        raise NotImplementedError("activation must be None, silu, or swish")
+    dtype_in = x.dtype
+    batch, dim = x.shape
+    width = weight.shape[1]
+    assert conv_state.shape == (batch, dim, width)
+    assert weight.shape == (dim, width)
+    conv_state.copy_(torch.roll(conv_state, shifts=-1, dims=-1))  # Update state (B D W)
+    conv_state[:, :, -1] = x
+    out = torch.sum(conv_state * weight, dim=-1)  # (B D)
+    if bias is not None:
+        out += bias
+    return (out if activation is None else F.silu(out)).to(dtype=dtype_in)

{causal_conv1d-1.1.3.post1 → causal_conv1d-1.2.0.post1}/causal_conv1d.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: causal-conv1d
-Version: 1.1.3.post1
+Version: 1.2.0.post1
 Summary: Causal depthwise conv1d in CUDA, with a PyTorch interface
 Home-page: https://github.com/Dao-AILab/causal-conv1d
 Author: Tri Dao

{causal_conv1d-1.1.3.post1 → causal_conv1d-1.2.0.post1}/causal_conv1d.egg-info/requires.txt RENAMED Viewed

@@ -1,4 +1,3 @@
 torch
 packaging
-buildtools
 ninja

{causal_conv1d-1.1.3.post1 → causal_conv1d-1.2.0.post1}/setup.py RENAMED Viewed

@@ -256,7 +256,6 @@ setup(
     install_requires=[
         "torch",
         "packaging",
-        "buildtools",
         "ninja",
     ],
 )

causal_conv1d-1.1.3.post1/causal_conv1d/causal_conv1d_interface.py DELETED Viewed

@@ -1,107 +0,0 @@
-# Copyright (c) 2023, Tri Dao.
-import torch
-import torch.nn.functional as F
-import causal_conv1d_cuda
-class CausalConv1dFn(torch.autograd.Function):
-    @staticmethod
-    def forward(ctx, x, weight, bias=None, seq_idx=None, activation=None):
-        if activation not in [None, "silu", "swish"]:
-            raise NotImplementedError("activation must be None, silu, or swish")
-        if x.stride(2) != 1 and x.stride(1) != 1:
-            x = x.contiguous()
-        bias = bias.contiguous() if bias is not None else None
-        seq_idx = seq_idx.contiguous() if seq_idx is not None else None
-        ctx.save_for_backward(x, weight, bias, seq_idx)
-        ctx.activation = activation in ["silu", "swish"]
-        out = causal_conv1d_cuda.causal_conv1d_fwd(x, weight, bias, seq_idx, ctx.activation)
-        return out
-    @staticmethod
-    def backward(ctx, dout):
-        x, weight, bias, seq_idx = ctx.saved_tensors
-        if dout.stride(2) != 1 and dout.stride(1) != 1:
-            dout = dout.contiguous()
-        # The kernel supports passing in a pre-allocated dx (e.g., in case we want to fuse the
-        # backward of conv1d with the backward of chunk).
-        # Here we just pass in None and dx will be allocated in the C++ code.
-        dx, dweight, dbias = causal_conv1d_cuda.causal_conv1d_bwd(
-            x, weight, bias, dout, seq_idx, None, ctx.activation
-        )
-        return dx, dweight, dbias if bias is not None else None, None, None
-def causal_conv1d_fn(x, weight, bias=None, seq_idx=None, activation=None):
-    """
-    x: (batch, dim, seqlen)
-    weight: (dim, width)
-    bias: (dim,)
-    seq_idx: (batch, seqlen)
-    activation: either None or "silu" or "swish"
-    out: (batch, dim, seqlen)
-    """
-    return CausalConv1dFn.apply(x, weight, bias, seq_idx, activation)
-def causal_conv1d_ref(x, weight, bias=None, activation=None):
-    """
-    x: (batch, dim, seqlen)
-    weight: (dim, width)
-    bias: (dim,)
-    out: (batch, dim, seqlen)
-    """
-    if activation not in [None, "silu", "swish"]:
-        raise NotImplementedError("activation must be None, silu, or swish")
-    dtype_in = x.dtype
-    x = x.to(weight.dtype)
-    seqlen = x.shape[-1]
-    dim, width = weight.shape
-    out = F.conv1d(x, weight.unsqueeze(1), bias, padding=width - 1, groups=dim)
-    out = out[..., :seqlen]
-    return (out if activation is None else F.silu(out)).to(dtype=dtype_in)
-def causal_conv1d_update(x, conv_state, weight, bias=None, activation=None):
-    """
-    x: (batch, dim)
-    conv_state: (batch, dim, width)
-    weight: (dim, width)
-    bias: (dim,)
-    out: (batch, dim)
-    """
-    if activation not in [None, "silu", "swish"]:
-        raise NotImplementedError("activation must be None, silu, or swish")
-    activation = activation in ["silu", "swish"]
-    return causal_conv1d_cuda.causal_conv1d_update(x, conv_state, weight, bias, activation)
-def causal_conv1d_update_ref(x, conv_state, weight, bias=None, activation=None):
-    """
-    x: (batch, dim)
-    conv_state: (batch, dim, width)
-    weight: (dim, width)
-    bias: (dim,)
-    out: (batch, dim)
-    """
-    if activation not in [None, "silu", "swish"]:
-        raise NotImplementedError("activation must be None, silu, or swish")
-    dtype_in = x.dtype
-    batch, dim = x.shape
-    width = weight.shape[1]
-    assert conv_state.shape == (batch, dim, width)
-    assert weight.shape == (dim, width)
-    conv_state.copy_(torch.roll(conv_state, shifts=-1, dims=-1)) # Update state (B D W)
-    conv_state[:, :, -1] = x
-    out = torch.sum(conv_state * weight, dim=-1) # (B D)
-    if bias is not None:
-        out += bias
-    return (out if activation is None else F.silu(out)).to(dtype=dtype_in)

{causal_conv1d-1.1.3.post1 → causal_conv1d-1.2.0.post1}/AUTHORS RENAMED Viewed

File without changes

{causal_conv1d-1.1.3.post1 → causal_conv1d-1.2.0.post1}/LICENSE RENAMED Viewed

File without changes

{causal_conv1d-1.1.3.post1 → causal_conv1d-1.2.0.post1}/README.md RENAMED Viewed

File without changes

{causal_conv1d-1.1.3.post1 → causal_conv1d-1.2.0.post1}/causal_conv1d.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{causal_conv1d-1.1.3.post1 → causal_conv1d-1.2.0.post1}/causal_conv1d.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{causal_conv1d-1.1.3.post1 → causal_conv1d-1.2.0.post1}/causal_conv1d.egg-info/top_level.txt RENAMED Viewed

File without changes

{causal_conv1d-1.1.3.post1 → causal_conv1d-1.2.0.post1}/setup.cfg RENAMED Viewed

File without changes

causal-conv1d 1.1.3.post1__tar.gz → 1.2.0.post1__tar.gz

causal-conv1d 1.1.3.post1tar.gz → 1.2.0.post1tar.gz