PyPI - paddlex - Versions diffs - 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl - Mend

paddlex 3.0.0rc0py3-none-any.whl → 3.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (824) hide show

paddlex/inference/models/common/vlm/activations.py ADDED Viewed

@@ -0,0 +1,189 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+from collections import OrderedDict
+import paddle
+import paddle.nn.functional as F
+from paddle import Tensor, nn
+class NewGELUActivation(nn.Layer):
+    """
+    Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
+    the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
+    """
+    def forward(self, input: Tensor) -> Tensor:
+        return (
+            0.5
+            * input
+            * (
+                1.0
+                + paddle.tanh(
+                    math.sqrt(2.0 / math.pi)
+                    * (input + 0.044715 * paddle.pow(input, 3.0))
+                )
+            )
+        )
+class GELUActivation(nn.Layer):
+    """
+    Original Implementation of the GELU activation function in Google BERT repo when initially created. For
+    information: OpenAI GPT's GELU is slightly different (and gives slightly different results): 0.5 * x * (1 +
+    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) This is now written in C in nn.functional
+    Also see the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
+    """
+    def __init__(self, use_gelu_python: bool = False):
+        super().__init__()
+        if use_gelu_python:
+            self.act = self._gelu_python
+        else:
+            self.act = nn.functional.gelu
+    def _gelu_python(self, input: Tensor) -> Tensor:
+        return input * 0.5 * (1.0 + paddle.erf(input / math.sqrt(2.0)))
+    def forward(self, input: Tensor) -> Tensor:
+        return self.act(input)
+class FastGELUActivation(nn.Layer):
+    """
+    Applies GELU approximation that is slower than QuickGELU but more accurate. See: https://github.com/hendrycks/GELUs
+    """
+    def forward(self, input: Tensor) -> Tensor:
+        return (
+            0.5
+            * input
+            * (
+                1.0
+                + paddle.tanh(input * 0.7978845608 * (1.0 + 0.044715 * input * input))
+            )
+        )
+class QuickGELUActivation(nn.Layer):
+    """
+    Applies GELU approximation that is fast but somewhat inaccurate. See: https://github.com/hendrycks/GELUs
+    """
+    def forward(self, input: Tensor) -> Tensor:
+        return input * F.sigmoid(1.702 * input)
+class ClippedGELUActivation(nn.Layer):
+    """
+    Clip the range of possible GeLU outputs between [min, max]. This is especially useful for quantization purpose, as
+    it allows mapping negatives values in the GeLU spectrum. For more information on this trick, please refer to
+    https://arxiv.org/abs/2004.09602.
+    Gaussian Error Linear Unit. Original Implementation of the gelu activation function in Google Bert repo when
+    initially created.
+    For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): 0.5 * x * (1 +
+    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))). See https://arxiv.org/abs/1606.08415
+    """
+    def __init__(self, min: float, max: float):
+        if min > max:
+            raise ValueError(f"min should be < max (got min: {min}, max: {max})")
+        super().__init__()
+        self.min = min
+        self.max = max
+    def forward(self, x: Tensor) -> Tensor:
+        return paddle.clip(gelu(x), self.min, self.max)
+class SiLUActivation(nn.Layer):
+    """
+    See Gaussian Error Linear Units (Hendrycks et al., https://arxiv.org/abs/1606.08415) where the SiLU (Sigmoid Linear
+    Unit) was originally introduced and coined, and see Sigmoid-Weighted Linear Units for Neural Network Function
+    Approximation in Reinforcement Learning (Elfwing et al., https://arxiv.org/abs/1702.03118) and Swish: a Self-Gated
+    Activation Function (Ramachandran et al., https://arxiv.org/abs/1710.05941v1) where the SiLU was experimented with
+    later.
+    """
+    def forward(self, input: Tensor) -> Tensor:
+        return F.silu(input)
+class MishActivation(nn.Layer):
+    """
+    See Mish: A Self-Regularized Non-Monotonic Activation Function (Misra., https://arxiv.org/abs/1908.08681). Also
+    visit the official repository for the paper: https://github.com/digantamisra98/Mish
+    """
+    def forward(self, input: Tensor) -> Tensor:
+        return F.mish(input)
+class LinearActivation(nn.Layer):
+    """
+    Applies the linear activation function, i.e. forwarding input directly to output.
+    """
+    def forward(self, input: Tensor) -> Tensor:
+        return input
+class ClassInstantier(OrderedDict):
+    def __getitem__(self, key):
+        content = super().__getitem__(key)
+        cls, kwargs = content if isinstance(content, tuple) else (content, {})
+        return cls(**kwargs)
+ACT2CLS = {
+    "gelu": GELUActivation,
+    "gelu_10": (ClippedGELUActivation, {"min": -10, "max": 10}),
+    "gelu_fast": FastGELUActivation,
+    "gelu_new": NewGELUActivation,
+    "gelu_python": (GELUActivation, {"use_gelu_python": True}),
+    "linear": LinearActivation,
+    "mish": MishActivation,
+    "quick_gelu": QuickGELUActivation,
+    "relu": nn.ReLU,
+    "relu6": nn.ReLU6,
+    "sigmoid": nn.Sigmoid,
+    "silu": SiLUActivation,
+    "swish": SiLUActivation,
+    "tanh": nn.Tanh,
+}
+ACT2FN = ClassInstantier(ACT2CLS)
+def get_activation(activation_string):
+    if activation_string in ACT2FN:
+        return ACT2FN[activation_string]
+    else:
+        raise KeyError(
+            f"function {activation_string} not found in ACT2FN mapping {list(ACT2FN.keys())}"
+        )
+gelu_python = get_activation("gelu_python")
+gelu_new = get_activation("gelu_new")
+gelu = get_activation("gelu")
+gelu_fast = get_activation("gelu_fast")
+quick_gelu = get_activation("quick_gelu")
+silu = get_activation("silu")
+mish = get_activation("mish")
+linear_act = get_activation("linear")

paddlex/inference/models/common/vlm/bert_padding.py ADDED Viewed

@@ -0,0 +1,127 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import operator
+from functools import reduce
+import paddle
+import paddle.nn.functional as F
+class IndexFirstAxis(paddle.autograd.PyLayer):
+    @staticmethod
+    def forward(ctx, input, indices):
+        from einops import rearrange, repeat
+        ctx.save_for_backward(indices)
+        assert input.ndim >= 2
+        ctx.first_axis_dim, other_shape = input.shape[0], input.shape[1:]
+        second_dim = reduce(operator.mul, other_shape, 1)
+        return paddle.take_along_axis(
+            arr=rearrange(input, "b ... -> b (...)"),
+            axis=0,
+            indices=repeat(indices, "z -> z d", d=second_dim),
+        ).reshape([-1, *other_shape])
+    @staticmethod
+    def backward(ctx, grad_output):
+        """Class Attribute: torch.autograd.function.FunctionCtx.saved_tensors, can not convert, please check whether it is torch.Tensor.*/torch.autograd.function.FunctionCtx.*/torch.distributions.Distribution.* and convert manually"""
+        from einops import rearrange, repeat
+        (indices,) = ctx.saved_tensor()
+        assert grad_output.ndim >= 2
+        other_shape = grad_output.shape[1:]
+        grad_output = rearrange(grad_output, "b ... -> b (...)")
+        grad_input = paddle.zeros(
+            shape=[ctx.first_axis_dim, tuple(grad_output.shape)[1]],
+            dtype=grad_output.dtype,
+        )
+        grad_input.put_along_axis_(
+            axis=0,
+            indices=repeat(indices, "z -> z d", d=tuple(grad_output.shape)[1]),
+            values=grad_output,
+        )
+        return grad_input.reshape([ctx.first_axis_dim, *other_shape]), None
+index_first_axis = IndexFirstAxis.apply
+class IndexPutFirstAxis(paddle.autograd.PyLayer):
+    @staticmethod
+    def forward(ctx, values, indices, first_axis_dim):
+        ctx.save_for_backward(indices)
+        assert indices.ndim == 1
+        assert values.ndim >= 2
+        output = paddle.zeros(
+            shape=[first_axis_dim, *tuple(values.shape)[1:]], dtype=values.dtype
+        )
+        output[indices] = values
+        return output
+    @staticmethod
+    def backward(ctx, grad_output):
+        """Class Attribute: torch.autograd.function.FunctionCtx.saved_tensors, can not convert, please check whether it is torch.Tensor.*/torch.autograd.function.FunctionCtx.*/torch.distributions.Distribution.* and convert manually"""
+        (indices,) = ctx.saved_tensor()
+        grad_values = grad_output[indices]
+        return grad_values, None
+index_put_first_axis = IndexPutFirstAxis.apply
+def unpad_input(hidden_states, attention_mask):
+    """
+    Arguments:
+        hidden_states: (batch, seqlen, ...)
+        attention_mask: (batch, seqlen), bool / int, 1 means valid and 0 means not valid.
+    Return:
+        hidden_states: (total_nnz, ...), where total_nnz = number of tokens in selected in attention_mask.
+        indices: (total_nnz), the indices of non-masked tokens from the flattened input sequence.
+        cu_seqlens: (batch + 1), the cumulative sequence lengths, used to index into hidden_states.
+        max_seqlen_in_batch: int
+    """
+    from einops import rearrange
+    seqlens_in_batch = paddle.sum(attention_mask, axis=-1, dtype="int32")
+    indices = paddle.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
+    max_seqlen_in_batch = paddle.max(seqlens_in_batch).item()
+    cu_seqlens = F.pad(paddle.cumsum(seqlens_in_batch, axis=0), [1, 0])
+    return (
+        index_first_axis(rearrange(hidden_states, "b s ... -> (b s) ..."), indices),
+        indices,
+        cu_seqlens,
+        max_seqlen_in_batch,
+    )
+def pad_input(hidden_states, indices, batch, seqlen):
+    """
+    Arguments:
+        hidden_states: (total_nnz, ...), where total_nnz = number of tokens in selected in attention_mask.
+        indices: (total_nnz), the indices that represent the non-masked tokens of the original padded input sequence.
+        batch: int, batch size for the padded sequence.
+        seqlen: int, maximum sequence length for the padded sequence.
+    Return:
+        hidden_states: (batch, seqlen, ...)
+    """
+    from einops import rearrange
+    output = index_put_first_axis(hidden_states, indices, batch * seqlen)
+    return rearrange(output, "(b s) ... -> b s ...", b=batch)

paddlex/inference/models/common/vlm/conversion_utils.py ADDED Viewed

@@ -0,0 +1,99 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import paddle
+def fuse_param_func():
+    def fn(fuse_params, is_qkv=False, num_heads=None, num_key_value_heads=None):
+        concat_fn = np.concatenate
+        split_fn = np.split
+        if isinstance(fuse_params[0], paddle.Tensor):
+            concat_fn = paddle.concat
+            split_fn = paddle.split
+        if is_qkv:
+            assert (
+                num_heads
+            ), f"num_heads should be number of heads for Q, but got {num_heads}"
+            assert (
+                num_key_value_heads
+            ), f"num_key_value_heads should be number of key_value_heads for K and V, but got {num_key_value_heads}"
+            assert (
+                len(fuse_params) == 3
+            ), f"fuse_params length is not equal 3, it should be Q K V list. but got length {len(fuse_params)}"
+            num_query_groups = num_heads // num_key_value_heads
+            q_list = split_fn(fuse_params[0], num_heads, axis=-1)
+            k_list = split_fn(fuse_params[1], num_key_value_heads, axis=-1)
+            v_list = split_fn(fuse_params[2], num_key_value_heads, axis=-1)
+            qkv_pairs = []
+            for i in range(num_key_value_heads):
+                qkv_pairs += q_list[i * num_query_groups : (i + 1) * num_query_groups]
+                qkv_pairs.append(k_list[i])
+                qkv_pairs.append(v_list[i])
+            return concat_fn(qkv_pairs, axis=-1)
+        else:
+            return concat_fn(fuse_params, axis=-1)
+    return fn
+def split_param_func():
+    def fn(
+        fused_param,
+        split_nums=2,
+        is_qkv=False,
+        num_heads=None,
+        num_key_value_heads=None,
+    ):
+        concat_fn = np.concatenate
+        split_fn = np.split
+        if isinstance(fused_param, paddle.Tensor):
+            concat_fn = paddle.concat
+            split_fn = paddle.split
+        if is_qkv:
+            assert (
+                num_heads
+            ), f"num_heads should be number of heads for Q, but got {num_heads}"
+            assert (
+                num_key_value_heads
+            ), f"num_key_value_heads should be number of key_value_heads for K and V, but got {num_key_value_heads}"
+            num_query_groups = num_heads // num_key_value_heads
+            q_list, k_list, v_list = [], [], []
+            split_heads = split_fn(
+                fused_param, num_heads + 2 * num_key_value_heads, axis=-1
+            )
+            for i in range(num_key_value_heads):
+                q_list += split_heads[
+                    i * (num_query_groups + 2) : (i + 1) * (num_query_groups + 2) - 2
+                ]
+                k_list.append(split_heads[(i + 1) * (num_query_groups + 2) - 2])
+                v_list.append(split_heads[(i + 1) * (num_query_groups + 2) - 1])
+            return (
+                concat_fn(q_list, axis=-1),
+                concat_fn(k_list, axis=-1),
+                concat_fn(v_list, axis=-1),
+            )
+        else:
+            return split_fn(fused_param, split_nums, axis=-1)
+    return fn
+def split_or_fuse_func(is_fuse=True):
+    return fuse_param_func() if is_fuse else split_param_func()

paddlex/inference/models/common/vlm/distributed.py ADDED Viewed

@@ -0,0 +1,229 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from typing import Any, Union
+import numpy as np
+import paddle
+import paddle.distributed as distributed
+from .utils import device_guard
+world_size = distributed.get_world_size()
+def convert_file_size_to_int(size: Union[int, str]):
+    """
+    Converts a size expressed as a string with digits an unit (like `"5MB"`) to an integer (in bytes).
+    Args:
+        size (`int` or `str`): The size to convert. Will be directly returned if an `int`.
+    """
+    if isinstance(size, int):
+        return size
+    if size.upper().endswith("GIB"):
+        return int(size[:-3]) * (2**30)
+    if size.upper().endswith("MIB"):
+        return int(size[:-3]) * (2**20)
+    if size.upper().endswith("KIB"):
+        return int(size[:-3]) * (2**10)
+    if size.upper().endswith("GB"):
+        int_size = int(size[:-2]) * (10**9)
+        return int_size // 8 if size.endswith("b") else int_size
+    if size.upper().endswith("MB"):
+        int_size = int(size[:-2]) * (10**6)
+        return int_size // 8 if size.endswith("b") else int_size
+    if size.upper().endswith("KB"):
+        int_size = int(size[:-2]) * (10**3)
+        return int_size // 8 if size.endswith("b") else int_size
+    raise ValueError(
+        "`size` is not in a valid format. Use an integer followed by the unit, e.g., '5GB'."
+    )
+def reduce_tensor(tensor, buffer_size="32MiB"):
+    if tensor.dtype == paddle.int8:
+        numel = np.prod(tensor.shape)
+    else:
+        numel = int(paddle.numel(tensor).item())
+    buffer_size = convert_file_size_to_int(buffer_size)
+    tensor.reshape_([-1])
+    send_size = buffer_size // dtype_byte_size(tensor.dtype)
+    for x in range(0, numel, send_size):
+        part_tensor = tensor[x : min(numel, x + send_size)]
+        yield part_tensor, (x, min(numel, x + send_size))
+def dtype_byte_size(dtype):
+    """
+    Returns the size (in bytes) occupied by one parameter of type `dtype`.
+    """
+    if dtype == paddle.bool:
+        return 1 / 8
+    bit_search = re.search(r"[^\d](\d+)$", str(dtype))
+    if bit_search is None:
+        raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
+    bit_size = int(bit_search.groups()[0])
+    return bit_size // 8
+@paddle.no_grad()
+def distributed_gather(tensor: Any, dst: int = 0, group=None, offload=False) -> Any:
+    try:
+        if isinstance(tensor, (tuple, list)):
+            return type(tensor)(
+                distributed_gather(t, dst, group, offload) for t in tensor
+            )
+        if isinstance(tensor, dict):
+            return {
+                k: distributed_gather(v, dst, group, offload) for k, v in tensor.items()
+            }
+        output_tensors = None
+        is_dst = dst == distributed.get_rank(group=group)
+        if is_dst:
+            if offload:
+                output_tensors = [
+                    [] for _ in range(distributed.get_world_size(group=group))
+                ]
+            else:
+                output_tensors = [
+                    paddle.empty_like(tensor)
+                    for _ in range(distributed.get_world_size(group=group))
+                ]
+                output_tensors = [
+                    t if len(t.shape) > 0 else t[None] for t in output_tensors
+                ]
+        if offload:
+            origin_shape = tensor.shape
+            tensor.reshape_([-1])
+            for slice_tensor, index in reduce_tensor(tensor):
+                slice_output_tensors = None
+                if distributed.get_rank(group=group) == dst:
+                    slice_output_tensors = [
+                        paddle.empty_like(slice_tensor)
+                        for _ in range(distributed.get_world_size(group=group))
+                    ]
+                paddle.distributed.communication.stream.gather(
+                    slice_tensor,
+                    slice_output_tensors,
+                    dst=group.ranks[dst] if group else dst,
+                    group=group,
+                    sync_op=True,
+                    use_calc_stream=False,
+                )
+                if is_dst:
+                    for i in range(len(output_tensors)):
+                        output_tensors[i].append(slice_output_tensors[i].cpu().numpy())
+            tensor.reshape_(origin_shape)
+            if is_dst:
+                with device_guard("cpu"):
+                    new_output_tensors = []
+                    for x in output_tensors:
+                        t = np.concatenate(x)
+                        t = t.reshape(origin_shape)
+                        new_output_tensors.append(t)
+                    output_tensors = new_output_tensors
+        else:
+            paddle.distributed.communication.stream.gather(
+                tensor,
+                output_tensors,
+                dst=group.ranks[dst] if group else dst,
+                group=group,
+                sync_op=True,
+                use_calc_stream=False,
+            )
+        return output_tensors
+    except AssertionError:
+        raise AssertionError("Not currently using distributed training")
+@paddle.no_grad()
+def distributed_allgather(tensor: Any, group=None, offload=False):
+    """nested all gather function with offload
+    Args:
+        tensor (Any): the desired tensor, list of tensor, dict of tensor to allgather.
+        group (_type_, optional): the communication group. Defaults to None.
+        offload (bool, optional): If True, we offload the received tensor to cpu/(numpy). Defaults to False.
+    Raises:
+        AssertionError: Unexpected errors.
+    Returns:
+        tensor list: list of all gathered tensors
+    """
+    try:
+        if isinstance(tensor, (tuple, list)):
+            return type(tensor)(
+                distributed_allgather(t, group, offload) for t in tensor
+            )
+        if isinstance(tensor, dict):
+            return {
+                k: distributed_allgather(v, group, offload) for k, v in tensor.items()
+            }
+        output_tensors = []
+        if offload:
+            with device_guard("cpu"):
+                output_tensors = [
+                    paddle.empty_like(tensor)
+                    for _ in range(distributed.get_world_size(group))
+                ]
+        else:
+            output_tensors = [
+                paddle.empty_like(tensor)
+                for _ in range(distributed.get_world_size(group))
+            ]
+        output_tensors = [t if len(t.shape) > 0 else t[None] for t in output_tensors]
+        if offload:
+            origin_shape = tensor.shape
+            tensor.reshape_([-1])
+            for x in output_tensors:
+                x.reshape_([-1])
+            for slice_tensor, index in reduce_tensor(tensor):
+                slice_output_tensors = [
+                    paddle.empty_like(slice_tensor)
+                    for _ in range(distributed.get_world_size(group))
+                ]
+                distributed.all_gather(slice_output_tensors, slice_tensor, group=group)
+                for x, y in zip(slice_output_tensors, output_tensors):
+                    with device_guard("cpu"):
+                        y[index[0] : index[1]] = x.cpu()
+            tensor.reshape_(origin_shape)
+            for x in output_tensors:
+                x.reshape_(origin_shape)
+        else:
+            distributed.all_gather(output_tensors, tensor)
+        return output_tensors
+    except AssertionError:
+        raise AssertionError("Not currently using distributed training")

paddlex 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl

paddlex 3.0.0rc0py3-none-any.whl → 3.0.1py3-none-any.whl