PyPI - fbgemm-gpu-genai-nightly - Versions diffs - 2025.12.19__cp310-cp310-manylinux_2_28_x86_64.whl - Mend

fbgemm-gpu-genai-nightly 2025.12.19__cp310-cp310-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of fbgemm-gpu-genai-nightly might be problematic. Click here for more details.

Files changed (127) hide show

fbgemm_gpu/__init__.py +186 -0
fbgemm_gpu/asmjit.so +0 -0
fbgemm_gpu/batched_unary_embeddings_ops.py +87 -0
fbgemm_gpu/config/__init__.py +9 -0
fbgemm_gpu/config/feature_list.py +88 -0
fbgemm_gpu/docs/__init__.py +18 -0
fbgemm_gpu/docs/common.py +9 -0
fbgemm_gpu/docs/examples.py +73 -0
fbgemm_gpu/docs/jagged_tensor_ops.py +259 -0
fbgemm_gpu/docs/merge_pooled_embedding_ops.py +36 -0
fbgemm_gpu/docs/permute_pooled_embedding_ops.py +108 -0
fbgemm_gpu/docs/quantize_ops.py +41 -0
fbgemm_gpu/docs/sparse_ops.py +616 -0
fbgemm_gpu/docs/target.genai.json.py +6 -0
fbgemm_gpu/enums.py +24 -0
fbgemm_gpu/experimental/example/__init__.py +29 -0
fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so +0 -0
fbgemm_gpu/experimental/example/utils.py +20 -0
fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py +15 -0
fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py +5654 -0
fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py +4422 -0
fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py +1192 -0
fbgemm_gpu/experimental/gemm/triton_gemm/matmul_perf_model.py +232 -0
fbgemm_gpu/experimental/gemm/triton_gemm/utils.py +130 -0
fbgemm_gpu/experimental/gen_ai/__init__.py +56 -0
fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py +46 -0
fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py +333 -0
fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_interface.py +552 -0
fbgemm_gpu/experimental/gen_ai/bench/__init__.py +13 -0
fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py +257 -0
fbgemm_gpu/experimental/gen_ai/bench/gather_scatter_bench.py +348 -0
fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py +707 -0
fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py +3483 -0
fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so +0 -0
fbgemm_gpu/experimental/gen_ai/moe/README.md +15 -0
fbgemm_gpu/experimental/gen_ai/moe/__init__.py +66 -0
fbgemm_gpu/experimental/gen_ai/moe/activation.py +292 -0
fbgemm_gpu/experimental/gen_ai/moe/gather_scatter.py +740 -0
fbgemm_gpu/experimental/gen_ai/moe/layers.py +1272 -0
fbgemm_gpu/experimental/gen_ai/moe/shuffling.py +421 -0
fbgemm_gpu/experimental/gen_ai/quantize.py +307 -0
fbgemm_gpu/fbgemm.so +0 -0
fbgemm_gpu/metrics.py +160 -0
fbgemm_gpu/permute_pooled_embedding_modules.py +142 -0
fbgemm_gpu/permute_pooled_embedding_modules_split.py +85 -0
fbgemm_gpu/quantize/__init__.py +43 -0
fbgemm_gpu/quantize/quantize_ops.py +64 -0
fbgemm_gpu/quantize_comm.py +315 -0
fbgemm_gpu/quantize_utils.py +246 -0
fbgemm_gpu/runtime_monitor.py +237 -0
fbgemm_gpu/sll/__init__.py +189 -0
fbgemm_gpu/sll/cpu/__init__.py +80 -0
fbgemm_gpu/sll/cpu/cpu_sll.py +1001 -0
fbgemm_gpu/sll/meta/__init__.py +35 -0
fbgemm_gpu/sll/meta/meta_sll.py +337 -0
fbgemm_gpu/sll/triton/__init__.py +127 -0
fbgemm_gpu/sll/triton/common.py +38 -0
fbgemm_gpu/sll/triton/triton_dense_jagged_cat_jagged_out.py +72 -0
fbgemm_gpu/sll/triton/triton_jagged2_to_padded_dense.py +221 -0
fbgemm_gpu/sll/triton/triton_jagged_bmm.py +418 -0
fbgemm_gpu/sll/triton/triton_jagged_bmm_jagged_out.py +553 -0
fbgemm_gpu/sll/triton/triton_jagged_dense_elementwise_add.py +52 -0
fbgemm_gpu/sll/triton/triton_jagged_dense_elementwise_mul_jagged_out.py +175 -0
fbgemm_gpu/sll/triton/triton_jagged_dense_flash_attention.py +861 -0
fbgemm_gpu/sll/triton/triton_jagged_flash_attention_basic.py +667 -0
fbgemm_gpu/sll/triton/triton_jagged_self_substraction_jagged_out.py +73 -0
fbgemm_gpu/sll/triton/triton_jagged_softmax.py +463 -0
fbgemm_gpu/sll/triton/triton_multi_head_jagged_flash_attention.py +751 -0
fbgemm_gpu/sparse_ops.py +1455 -0
fbgemm_gpu/split_embedding_configs.py +452 -0
fbgemm_gpu/split_embedding_inference_converter.py +175 -0
fbgemm_gpu/split_embedding_optimizer_ops.py +21 -0
fbgemm_gpu/split_embedding_utils.py +29 -0
fbgemm_gpu/split_table_batched_embeddings_ops.py +73 -0
fbgemm_gpu/split_table_batched_embeddings_ops_common.py +484 -0
fbgemm_gpu/split_table_batched_embeddings_ops_inference.py +2042 -0
fbgemm_gpu/split_table_batched_embeddings_ops_training.py +4600 -0
fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py +146 -0
fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py +26 -0
fbgemm_gpu/tbe/__init__.py +6 -0
fbgemm_gpu/tbe/bench/__init__.py +55 -0
fbgemm_gpu/tbe/bench/bench_config.py +156 -0
fbgemm_gpu/tbe/bench/bench_runs.py +709 -0
fbgemm_gpu/tbe/bench/benchmark_click_interface.py +187 -0
fbgemm_gpu/tbe/bench/eeg_cli.py +137 -0
fbgemm_gpu/tbe/bench/embedding_ops_common_config.py +149 -0
fbgemm_gpu/tbe/bench/eval_compression.py +119 -0
fbgemm_gpu/tbe/bench/reporter.py +35 -0
fbgemm_gpu/tbe/bench/tbe_data_config.py +137 -0
fbgemm_gpu/tbe/bench/tbe_data_config_bench_helper.py +323 -0
fbgemm_gpu/tbe/bench/tbe_data_config_loader.py +289 -0
fbgemm_gpu/tbe/bench/tbe_data_config_param_models.py +170 -0
fbgemm_gpu/tbe/bench/utils.py +48 -0
fbgemm_gpu/tbe/cache/__init__.py +11 -0
fbgemm_gpu/tbe/cache/kv_embedding_ops_inference.py +385 -0
fbgemm_gpu/tbe/cache/split_embeddings_cache_ops.py +48 -0
fbgemm_gpu/tbe/ssd/__init__.py +15 -0
fbgemm_gpu/tbe/ssd/common.py +46 -0
fbgemm_gpu/tbe/ssd/inference.py +586 -0
fbgemm_gpu/tbe/ssd/training.py +4908 -0
fbgemm_gpu/tbe/ssd/utils/__init__.py +7 -0
fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py +273 -0
fbgemm_gpu/tbe/stats/__init__.py +10 -0
fbgemm_gpu/tbe/stats/bench_params_reporter.py +339 -0
fbgemm_gpu/tbe/utils/__init__.py +13 -0
fbgemm_gpu/tbe/utils/common.py +42 -0
fbgemm_gpu/tbe/utils/offsets.py +65 -0
fbgemm_gpu/tbe/utils/quantize.py +251 -0
fbgemm_gpu/tbe/utils/requests.py +556 -0
fbgemm_gpu/tbe_input_multiplexer.py +108 -0
fbgemm_gpu/triton/__init__.py +22 -0
fbgemm_gpu/triton/common.py +77 -0
fbgemm_gpu/triton/jagged/__init__.py +8 -0
fbgemm_gpu/triton/jagged/triton_jagged_tensor_ops.py +824 -0
fbgemm_gpu/triton/quantize.py +647 -0
fbgemm_gpu/triton/quantize_ref.py +286 -0
fbgemm_gpu/utils/__init__.py +11 -0
fbgemm_gpu/utils/filestore.py +211 -0
fbgemm_gpu/utils/loader.py +36 -0
fbgemm_gpu/utils/torch_library.py +132 -0
fbgemm_gpu/uvm.py +40 -0
fbgemm_gpu_genai_nightly-2025.12.19.dist-info/METADATA +62 -0
fbgemm_gpu_genai_nightly-2025.12.19.dist-info/RECORD +127 -0
fbgemm_gpu_genai_nightly-2025.12.19.dist-info/WHEEL +5 -0
fbgemm_gpu_genai_nightly-2025.12.19.dist-info/top_level.txt +2 -0
list_versions/__init__.py +12 -0
list_versions/cli_run.py +163 -0

fbgemm_gpu/docs/jagged_tensor_ops.py ADDED Viewed

@@ -0,0 +1,259 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# [fbgemm-gpu.autogen.docs.examples.docstring.start]
+import torch
+from .common import add_docs
+add_docs(
+    torch.ops.fbgemm.jagged_2d_to_dense,
+    """
+jagged_2d_to_dense(values, x_offsets, max_sequence_length) -> Tensor
+Converts a jagged tensor, with a 2D values array into a dense tensor, padding with zeros.
+Args:
+    values (Tensor): 2D tensor containing the values of the jagged tensor.
+    x_offsets (Tensor): 1D tensor containing the starting point of each jagged row in the values tensor.
+    max_sequence_length (int): Maximum length of any row in the jagged dimension.
+Returns:
+    Tensor: The padded dense tensor
+Example:
+    >>> values = torch.tensor([[1,1],[2,2],[3,3],[4,4]])
+    >>> x_offsets = torch.tensor([0, 1, 3])
+    >>> torch.ops.fbgemm.jagged_2d_to_dense(values, x_offsets, 3)
+    tensor([[[1, 1],
+             [0, 0],
+             [0, 0]],
+            [[2, 2],
+             [3, 3],
+             [0, 0]]])
+""",
+)
+# [fbgemm-gpu.autogen.docs.examples.docstring.end]
+add_docs(
+    torch.ops.fbgemm.jagged_1d_to_dense,
+    """
+jagged_1d_to_dense(values, offsets, max_sequence_length, padding_value) -> Tensor)
+Converts a jagged tensor, with a 1D values array, into a dense tensor, padding with a specified padding value.
+Args:
+    values (Tensor): 1D tensor containing the values of the jagged tensor.
+    offsets (Tensor): 1D tensor containing the starting point of each jagged row in the values tensor.
+    max_sequence_length (int): Maximum length of any row in the jagged dimension.
+    padding_value (int): Value to set in the empty areas of the dense output, outside of the jagged tensor coverage.
+Returns:
+    Tensor: the padded dense tensor
+Example:
+    >>> values = torch.tensor([1,2,3,4])
+    >>> offsets = torch.tensor([0, 1, 3])
+    >>> torch.ops.fbgemm.jagged_1d_to_dense(values, x_offsets, 3, 0)
+    tensor([[1, 0, 0],
+            [2, 3, 0]])
+""",
+)
+add_docs(
+    torch.ops.fbgemm.dense_to_jagged,
+    """
+dense_to_jagged(dense, x_offsets, total_L) -> (Tensor, Tensor[])
+Converts a dense tensor into a jagged tensor, given the desired offsets of the resulting dense tensor.
+Args:
+    dense (Tensor): A dense input tensor to be converted
+    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
+    total_L (int, Optional): Total number of values in the resulting jagged tensor.
+Returns:
+    (Tensor, Tensor[]): Values and offsets of the resulting jagged tensor. Offsets are identital to those that were input.
+Example:
+    >>> dense = torch.tensor([[[1, 1], [0, 0], [0, 0]], [[2, 2], [3, 3], [0, 0]]])
+    >>> x_offsets = torch.tensor([0, 1, 3])
+    >>> torch.ops.fbgemm.dense_to_jagged(dense, [x_offsets])
+    (tensor([[1, 1],
+             [2, 2],
+             [3, 3]]), [tensor([0, 1, 3])])
+""",
+)
+add_docs(
+    torch.ops.fbgemm.jagged_to_padded_dense,
+    """
+jagged_to_padded_dense(values, offsets, max_lengths, padding_value=0) -> Tensor
+Converts a jagged tensor into a dense tensor, padding with a specified padding value.
+Args:
+    values (Tensor): Jagged tensor values
+    offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
+    max_lengths (int[]): A list with max_length for each jagged dimension.
+    padding_value (float): Value to set in the empty areas of the dense output, outside of the jagged tensor coverage.
+Returns:
+    Tensor: the padded dense tensor
+Example:
+    >>> values = torch.tensor([[1,1],[2,2],[3,3],[4,4]])
+    >>> offsets = torch.tensor([0, 1, 3])
+    >>> torch.ops.fbgemm.jagged_to_padded_dense(values, [offsets], [3], 7)
+    tensor([[[1, 1],
+             [7, 7],
+             [7, 7]],
+            [[2, 2],
+             [3, 3],
+             [7, 7]]])
+""",
+)
+add_docs(
+    torch.ops.fbgemm.jagged_dense_elementwise_add,
+    """
+jagged_dense_elementwise_add(x_values, x_offsets, y) -> Tensor
+Adds a jagged tensor to a dense tensor, resulting in dense tensor. Jagged
+tensor input will be padded with zeros for the purposes of the addition.
+Args:
+    x_values (Tensor): Jagged tensor values
+    offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
+    y (Tensor): A dense tensor
+Returns:
+    Tensor: The sum of jagged input tensor + y
+""",
+)
+add_docs(
+    torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output,
+    """
+jagged_dense_elementwise_add_jagged_output(x_values, x_offsets, y) -> (Tensor, Tensor[])
+Adds a jagged tensor to a dense tensor and, resulting in a jagged tensor with the same structure as the input jagged tensor.
+Args:
+    x_values (Tensor): Jagged tensor values
+    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
+    y (Tensor): A dense tensor
+Returns:
+    (Tensor, Tensor[]): Values and offsets of the resulting jagged tensor. Offsets are identital to those that were input.
+""",
+)
+add_docs(
+    torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output,
+    """
+jagged_dense_dense_elementwise_add_jagged_output(x_values, x_offsets, y_0, y_1) -> (Tensor, Tensor[])
+Adds a jagged tensor to the sum of two dense tensors, resulting in a jagged tensor with the same structure as the input jagged tensor.
+Args:
+    x_values (Tensor): Jagged tensor values
+    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
+    y_0 (Tensor): A dense tensor
+    y_1 (Tensor): A dense tensor
+Returns:
+    (Tensor, Tensor[]): Values and offsets of the resulting jagged tensor. Offsets are identital to those that were input.
+""",
+)
+add_docs(
+    torch.ops.fbgemm.jagged_dense_elementwise_mul,
+    """
+jagged_dense_elementwise_mul(x_values, x_offsets, y) -> (Tensor, Tensor[])
+Elementwise-multiplies a jagged tensor a dense tensor and, resulting in a jagged tensor with the same structure as the input jagged tensor.
+Args:
+    x_values (Tensor): Jagged tensor values
+    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
+    y (Tensor): A dense tensor
+Returns:
+    (Tensor, Tensor[]): Values and offsets of the resulting jagged tensor. Offsets are identital to those that were input.
+""",
+)
+add_docs(
+    torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul,
+    """
+batched_dense_vec_jagged_2d_mul(Tensor v, Tensor a_values, Tensor a_offsets) -> Tensor
+Batched vector matrix multiplication of a batched dense vector with a jagged tensor, dense vector is in
+size (B * H, max_N) and jagged tensor is in size (B, max_N, H * D) where max_N is the maximum size of
+jagged dimension. B * H is the batch size and each multiplies is max_N with [max_N, D]
+Args:
+    v (Tensor): dense vector tensor
+    a_values (Tensor): Jagged tensor values
+    a_offsets (Tensor []): A list of jagged offset tensors, one for each jagged dimension.
+Returns:
+    Tensor: output of batch matmul in size (B * H, D)
+""",
+)
+# add_docs(
+#    torch.ops.fbgemm.stacked_jagged_1d_to_dense,
+#    """Args:
+#                {input}
+#            Keyword args:
+#                {out}""",
+# )
+#
+#
+# add_docs(
+#    torch.ops.fbgemm.stacked_jagged_2d_to_dense,
+#    """Args:
+#                {input}
+#            Keyword args:
+#                {out}""",
+# )

fbgemm_gpu/docs/merge_pooled_embedding_ops.py ADDED Viewed

@@ -0,0 +1,36 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from .common import add_docs
+add_docs(
+    torch.ops.fbgemm.merge_pooled_embeddings,
+    """
+merge_pooled_embeddings(pooled_embeddings, uncat_dim_size, target_device, cat_dim=1) -> Tensor
+Concatenate embedding outputs from different devices (on the same host)
+on to the target device.
+Args:
+    pooled_embeddings (List[Tensor]): A list of embedding outputs from
+        different devices on the same host. Each output has 2
+        dimensions.
+    uncat_dim_size (int): The size of the dimension that is not
+        concatenated, i.e., if `cat_dim=0`, `uncat_dim_size` is the size
+        of dim 1 and vice versa.
+    target_device (torch.device): The target device that aggregates all
+        the embedding outputs.
+    cat_dim (int = 1): The dimension that the tensors are concatenated
+Returns:
+    The concatenated embedding output (2D) on the target device
+    """,
+)

fbgemm_gpu/docs/permute_pooled_embedding_ops.py ADDED Viewed

@@ -0,0 +1,108 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from .common import add_docs
+add_docs(
+    torch.ops.fbgemm.permute_pooled_embs,
+    """
+permute_pooled_embs(pooled_embs, offset_dim_list, permute_list, inv_offset_dim_list, inv_permute_list) -> Tensor
+Permute embedding outputs along the feature dimension.
+The embedding output tensor `pooled_embs` contains the embedding outputs
+for all features in a batch. It is represented in a 2D format, where the
+rows are the batch size dimension and the columns are the feature *
+embedding dimension. Permuting along the feature dimension is
+essentially permuting along the second dimension (dim 1).
+Args:
+    pooled_embs (Tensor): The embedding outputs to permute. Shape is
+        `(B_local, total_global_D)`, where `B_local` = a local batch size
+        and `total_global_D` is the total embedding dimension across all
+        features (global)
+    offset_dim_list (Tensor): The complete cumulative sum of embedding
+        dimensions of all features. Shape is `T + 1` where `T` is the
+        total number of features
+    permute_list (Tensor): A tensor that describes how each feature is
+        permuted.  `permute_list[i]` indicates that the feature
+        `permute_list[i]` is permuted to position `i`
+    inv_offset_dim_list (Tensor): The complete cumulative sum of inverse
+        embedding dimensions, which are the permuted embedding dimensions.
+        `inv_offset_dim_list[i]` represents the starting embedding position of
+        feature `permute_list[i]`
+    inv_permute_list (Tensor): The inverse permute list, which contains the
+        permuted positions of each feature. `inv_permute_list[i]` represents
+        the permuted position of feature `i`
+Returns:
+    Permuted embedding outputs (Tensor). Same shape as `pooled_embs`
+**Example:**
+    >>> import torch
+    >>> from itertools import accumulate
+    >>>
+    >>> # Suppose batch size = 3 and there are 3 features
+    >>> batch_size = 3
+    >>>
+    >>> # Embedding dimensions for each feature
+    >>> embs_dims = torch.tensor([4, 4, 8], dtype=torch.int64, device="cuda")
+    >>>
+    >>> # Permute list, i.e., move feature 2 to position 0, move feature 0
+    >>> # to position 1, so on
+    >>> permute = torch.tensor([2, 0, 1], dtype=torch.int64, device="cuda")
+    >>>
+    >>> # Compute embedding dim offsets
+    >>> offset_dim_list = torch.tensor([0] + list(accumulate(embs_dims)), dtype=torch.int64, device="cuda")
+    >>> print(offset_dim_list)
+    >>>
+    tensor([ 0,  4,  8, 16], device='cuda:0')
+    >>>
+    >>> # Compute inverse embedding dims
+    >>> inv_embs_dims = [embs_dims[p] for p in permute]
+    >>> # Compute complete cumulative sum of inverse embedding dims
+    >>> inv_offset_dim_list = torch.tensor([0] + list(accumulate(inv_embs_dims)), dtype=torch.int64, device="cuda")
+    >>> print(inv_offset_dim_list)
+    >>>
+    tensor([ 0,  8, 12, 16], device='cuda:0')
+    >>>
+    >>> # Compute inverse permutes
+    >>> inv_permute = [0] * len(permute)
+    >>> for i, p in enumerate(permute):
+    >>>     inv_permute[p] = i
+    >>> inv_permute_list = torch.tensor([inv_permute], dtype=torch.int64, device="cuda")
+    >>> print(inv_permute_list)
+    >>>
+    tensor([[1, 2, 0]], device='cuda:0')
+    >>>
+    >>> # Generate an example input
+    >>> pooled_embs = torch.arange(embs_dims.sum().item() * batch_size, dtype=torch.float32, device="cuda").reshape(batch_size, -1)
+    >>> print(pooled_embs)
+    >>>
+    tensor([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
+             14., 15.],
+            [16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29.,
+             30., 31.],
+            [32., 33., 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45.,
+             46., 47.]], device='cuda:0')
+    >>>
+    >>> torch.ops.fbgemm.permute_pooled_embs_auto_grad(pooled_embs, offset_dim_list, permute, inv_offset_dim_list, inv_permute_list)
+    >>>
+    tensor([[ 8.,  9., 10., 11., 12., 13., 14., 15.,  0.,  1.,  2.,  3.,  4.,  5.,
+              6.,  7.],
+            [24., 25., 26., 27., 28., 29., 30., 31., 16., 17., 18., 19., 20., 21.,
+             22., 23.],
+            [40., 41., 42., 43., 44., 45., 46., 47., 32., 33., 34., 35., 36., 37.,
+             38., 39.]], device='cuda:0')
+    """,
+)

fbgemm_gpu/docs/quantize_ops.py ADDED Viewed

@@ -0,0 +1,41 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from .common import add_docs
+add_docs(
+    torch.ops.fbgemm.FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf,
+    """
+FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf(input, bit_rate) -> Tensor
+Convert FP32/16 to INT8/4/2 using rowwise quantization.
+Args:
+    input (Tensor): An input tensor. Must be either FP32 (`torch.float`)
+        or FP16 (`torch.half`) and must be 2 dimensions.
+    bit_rate (int): Quantized bit rate (2 for INT2, 4 for INT4, or 8 for
+        INT8)
+Returns:
+    Quantized output (Tensor). Data type is `torch.uint8` (byte type)
+**Example:**
+    >>> # Randomize input
+    >>> input = torch.randn(2, 4, dtype=torch.float32, device="cuda")
+    >>> print(input)
+    tensor([[ 0.8247,  0.0031, -1.0068, -1.2081],
+            [ 0.5427,  1.5772,  1.0291, -0.7626]], device='cuda:0')
+    >>> # Quantize
+    >>> output = torch.ops.fbgemm.FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf(input, bit_rate=4)
+    >>> print(output)
+    tensor([[159,   1,  86,  48, 213, 188],
+            [248,  11, 254,  48,  26, 186]], device='cuda:0', dtype=torch.uint8)
+    """,
+)