PyPI - causal-conv1d - Versions diffs - 1.5.0.post5__tar.gz → 1.5.1__tar.gz - Mend

causal-conv1d 1.5.0.post5tar.gz → 1.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

causal_conv1d-1.5.1/MANIFEST.in ADDED Viewed

@@ -0,0 +1,4 @@
+include csrc/*.h
+include csrc/*.cu
+recursive-include third_party *
+README.md

{causal_conv1d-1.5.0.post5 → causal_conv1d-1.5.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: causal_conv1d
-Version: 1.5.0.post5
+Version: 1.5.1
 Summary: Causal depthwise conv1d in CUDA, with a PyTorch interface
 Home-page: https://github.com/Dao-AILab/causal-conv1d
 Author: Tri Dao

{causal_conv1d-1.5.0.post5 → causal_conv1d-1.5.1}/causal_conv1d/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
-__version__ = "1.5.0.post5"
+__version__ = "1.5.1"
 from causal_conv1d.causal_conv1d_interface import causal_conv1d_fn, causal_conv1d_update

{causal_conv1d-1.5.0.post5 → causal_conv1d-1.5.1}/causal_conv1d/causal_conv1d_interface.py RENAMED Viewed

@@ -3,8 +3,7 @@
 import torch
 import torch.nn.functional as F
-import causal_conv1d_cuda
+from causal_conv1d.cpp_functions import causal_conv1d_fwd_function, causal_conv1d_bwd_function, causal_conv1d_update_function
 class CausalConv1dFn(torch.autograd.Function):
@@ -54,7 +53,7 @@ class CausalConv1dFn(torch.autograd.Function):
         else:
             final_states_out = None
         ctx.activation = activation in ["silu", "swish"]
-        out = causal_conv1d_cuda.causal_conv1d_fwd(
+        out = causal_conv1d_fwd_function(
             x, weight, bias, seq_idx, initial_states, final_states_out, ctx.activation
         )
         ctx.save_for_backward(x, weight, bias, seq_idx, initial_states)
@@ -73,7 +72,7 @@ class CausalConv1dFn(torch.autograd.Function):
         # The kernel supports passing in a pre-allocated dx (e.g., in case we want to fuse the
         # backward of conv1d with the backward of chunk).
         # Here we just pass in None and dx will be allocated in the C++ code.
-        dx, dweight, dbias, dinitial_states = causal_conv1d_cuda.causal_conv1d_bwd(
+        dx, dweight, dbias, dinitial_states = causal_conv1d_bwd_function(
             x,
             weight,
             bias,
@@ -195,7 +194,7 @@ def causal_conv1d_update(x, conv_state, weight, bias=None, activation=None, cach
     unsqueeze = x.dim() == 2
     if unsqueeze:
         x = x.unsqueeze(-1)
-    out = causal_conv1d_cuda.causal_conv1d_update(
+    out = causal_conv1d_update_function(
         x, conv_state, weight, bias, activation, cache_seqlens, conv_state_indices
     )
     if unsqueeze:

causal_conv1d-1.5.1/causal_conv1d/cpp_functions.py ADDED Viewed

@@ -0,0 +1,183 @@
+# Copyright (c) 2024, Tri Dao.
+import torch
+import causal_conv1d_cuda
+LIBRARY_NAME = "DaoAILab"
+@torch.library.custom_op(f"{LIBRARY_NAME}::_causal_conv1d_fwd_cpp", mutates_args={"out", "final_states_out"})
+def _causal_conv1d_fwd_cpp(
+    x: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor | None,
+    seq_idx: torch.Tensor | None,
+    initial_states: torch.Tensor | None,
+    out: torch.Tensor,
+    final_states_out: torch.Tensor | None,
+    silu_activation: bool,
+) -> None:
+    causal_conv1d_cuda.causal_conv1d_fwd(
+        x,
+        weight,
+        bias,
+        seq_idx,
+        initial_states,
+        out,
+        final_states_out,
+        silu_activation,
+    )
+@torch.library.custom_op(f"{LIBRARY_NAME}::_causal_conv1d_bwd_cpp", mutates_args={
+    "dfinal_states",
+    "dx",
+    "dweight",
+    "dbias",
+    "dinitial_states",
+})
+def _causal_conv1d_bwd_cpp(
+    x: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor | None,
+    dout: torch.Tensor,
+    seq_idx: torch.Tensor | None,
+    initial_states: torch.Tensor | None,
+    dfinal_states: torch.Tensor | None,
+    dx: torch.Tensor,
+    dweight: torch.Tensor,
+    dbias: torch.Tensor | None,
+    dinitial_states: torch.Tensor,
+    silu_activation: bool,
+) -> None:
+    causal_conv1d_cuda.causal_conv1d_bwd(
+        x,
+        weight,
+        bias,
+        dout,
+        seq_idx,
+        initial_states,
+        dfinal_states,
+        dx,
+        dweight,
+        dbias,
+        dinitial_states,
+        silu_activation,
+    )
+@torch.library.custom_op(f"{LIBRARY_NAME}::_causal_conv1d_update_cpp", mutates_args={"out", "conv_state"})
+def _causal_conv1d_update_cpp(
+    x: torch.Tensor,
+    conv_state: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor | None,
+    out: torch.Tensor,
+    silu_activation: bool,
+    cache_seqlens: torch.Tensor | None,
+    conv_state_indices: torch.Tensor | None,
+) -> None:
+    causal_conv1d_cuda.causal_conv1d_update(
+        x,
+        conv_state,
+        weight,
+        bias,
+        out,
+        silu_activation,
+        cache_seqlens,
+        conv_state_indices
+    )
+def causal_conv1d_fwd_function(
+    x: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor | None,
+    seq_idx: torch.Tensor | None,
+    initial_states: torch.Tensor | None,
+    final_states_out: torch.Tensor | None,
+    silu_activation: bool,
+) -> torch.Tensor:
+    out = torch.empty_like(x)
+    _causal_conv1d_fwd_cpp(
+        x=x,
+        weight=weight,
+        bias=bias,
+        seq_idx=seq_idx,
+        initial_states=initial_states,
+        out=out,
+        final_states_out=final_states_out,
+        silu_activation=silu_activation,
+    )
+    return out
+def causal_conv1d_bwd_function(
+    x: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor | None,
+    dout: torch.Tensor,
+    seq_idx: torch.Tensor | None,
+    initial_states: torch.Tensor | None,
+    dfinal_states: torch.Tensor | None,
+    dx: torch.Tensor | None,
+    return_dinitial_states: torch.Tensor,
+    silu_activation: bool,
+) -> tuple[torch.Tensor | None]:
+    batch_size, dim = x.size()[:2]
+    width = weight.size(-1)
+    if dx is None:
+        dx = torch.empty_like(x)
+    dweight = torch.zeros_like(weight, dtype=torch.float32)
+    dbias = None
+    if bias is not None:
+        dbias = torch.zeros_like(bias, dtype=torch.float32)
+    dinitial_states = None
+    if return_dinitial_states:
+        dinitial_states = torch.empty(batch_size, width - 1, dim, device=x.device, dtype=x.dtype).transpose(1, 2)
+    _causal_conv1d_bwd_cpp(
+        x=x,
+        weight=weight,
+        bias=bias,
+        dout=dout,
+        seq_idx=seq_idx,
+        initial_states=initial_states,
+        dfinal_states=dfinal_states,
+        dx=dx,
+        dweight=dweight,
+        dbias=dbias,
+        dinitial_states=dinitial_states,
+        silu_activation=silu_activation,
+    )
+    dweight = dweight.type_as(weight)
+    if dbias is not None:
+        dbias = dbias.type_as(bias)
+    return dx, dweight, dbias, dinitial_states
+def causal_conv1d_update_function(
+    x: torch.Tensor,
+    conv_state: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor | None,
+    silu_activation: bool,
+    cache_seqlens: torch.Tensor | None,
+    conv_state_indices: torch.Tensor | None,
+) -> torch.Tensor:
+    out = torch.empty_like(x)
+    _causal_conv1d_update_cpp(
+        x=x,
+        conv_state=conv_state,
+        weight=weight,
+        bias=bias,
+        out=out,
+        silu_activation=silu_activation,
+        cache_seqlens=cache_seqlens,
+        conv_state_indices=conv_state_indices,
+    )
+    return out

{causal_conv1d-1.5.0.post5 → causal_conv1d-1.5.1}/causal_conv1d.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: causal-conv1d
-Version: 1.5.0.post5
+Version: 1.5.1
 Summary: Causal depthwise conv1d in CUDA, with a PyTorch interface
 Home-page: https://github.com/Dao-AILab/causal-conv1d
 Author: Tri Dao

{causal_conv1d-1.5.0.post5 → causal_conv1d-1.5.1}/causal_conv1d.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,12 +1,21 @@
 AUTHORS
 LICENSE
+MANIFEST.in
 README.md
+pyproject.toml
 setup.py
 causal_conv1d/__init__.py
 causal_conv1d/causal_conv1d_interface.py
 causal_conv1d/causal_conv1d_varlen.py
+causal_conv1d/cpp_functions.py
 causal_conv1d.egg-info/PKG-INFO
 causal_conv1d.egg-info/SOURCES.txt
 causal_conv1d.egg-info/dependency_links.txt
 causal_conv1d.egg-info/requires.txt
-causal_conv1d.egg-info/top_level.txt
+causal_conv1d.egg-info/top_level.txt
+csrc/causal_conv1d.h
+csrc/causal_conv1d_bwd.cu
+csrc/causal_conv1d_common.h
+csrc/causal_conv1d_fwd.cu
+csrc/causal_conv1d_update.cu
+csrc/static_switch.h

causal_conv1d-1.5.1/csrc/causal_conv1d.h ADDED Viewed

@@ -0,0 +1,81 @@
+/******************************************************************************
+ * Copyright (c) 2024, Tri Dao.
+ ******************************************************************************/
+#pragma once
+////////////////////////////////////////////////////////////////////////////////////////////////////
+struct ConvParamsBase {
+    using index_t = uint32_t;
+    int batch, dim, seqlen, width;
+    bool silu_activation;
+    index_t x_batch_stride;
+    index_t x_c_stride;
+    index_t x_l_stride;
+    index_t weight_c_stride;
+    index_t weight_width_stride;
+    index_t out_batch_stride;
+    index_t out_c_stride;
+    index_t out_l_stride;
+    int conv_state_len;
+    index_t conv_state_batch_stride;
+    index_t conv_state_c_stride;
+    index_t conv_state_l_stride;
+    // Common data pointers.
+    void *__restrict__ x_ptr;
+    void *__restrict__ weight_ptr;
+    void *__restrict__ bias_ptr;
+    void *__restrict__ out_ptr;
+    void *__restrict__ conv_state_ptr;
+    int32_t *__restrict__ cache_seqlens;
+    // Only used if the elements of the batch are gathered from a larger buffer,
+    // which may happen for continuous batching.
+    int32_t *__restrict__ conv_state_indices_ptr;
+    void *__restrict__ seq_idx_ptr;
+    // No __restrict__ since initial_states could be the same as final_states.
+    void * initial_states_ptr;
+    index_t initial_states_batch_stride;
+    index_t initial_states_l_stride;
+    index_t initial_states_c_stride;
+    void * final_states_ptr;
+    index_t final_states_batch_stride;
+    index_t final_states_l_stride;
+    index_t final_states_c_stride;
+};
+struct ConvParamsBwd: public ConvParamsBase {
+    index_t dx_batch_stride;
+    index_t dx_c_stride;
+    index_t dx_l_stride;
+    index_t dweight_c_stride;
+    index_t dweight_width_stride;
+    index_t dout_batch_stride;
+    index_t dout_c_stride;
+    index_t dout_l_stride;
+    // Common data pointers.
+    void *__restrict__ dx_ptr;
+    void *__restrict__ dweight_ptr;
+    void *__restrict__ dbias_ptr;
+    void *__restrict__ dout_ptr;
+    void * dinitial_states_ptr;
+    index_t dinitial_states_batch_stride;
+    index_t dinitial_states_l_stride;
+    index_t dinitial_states_c_stride;
+    void * dfinal_states_ptr;
+    index_t dfinal_states_batch_stride;
+    index_t dfinal_states_l_stride;
+    index_t dfinal_states_c_stride;
+};

causal-conv1d 1.5.0.post5__tar.gz → 1.5.1__tar.gz

causal-conv1d 1.5.0.post5tar.gz → 1.5.1tar.gz