fbgemm-gpu-genai-nightly 2025.9.20__cp313-cp313-manylinux_2_28_x86_64.whl → 2025.11.4__cp313-cp313-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fbgemm_gpu/__init__.py +36 -18
- fbgemm_gpu/asmjit.so +0 -0
- fbgemm_gpu/batched_unary_embeddings_ops.py +2 -3
- fbgemm_gpu/config/feature_list.py +1 -1
- fbgemm_gpu/docs/target.genai.json.py +6 -0
- fbgemm_gpu/enums.py +3 -4
- fbgemm_gpu/experimental/example/__init__.py +0 -4
- fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so +0 -0
- fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py +0 -4
- fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py +166 -45
- fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py +94 -276
- fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py +71 -2
- fbgemm_gpu/experimental/gen_ai/__init__.py +12 -4
- fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py +0 -4
- fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py +5 -5
- fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_interface.py +34 -8
- fbgemm_gpu/experimental/gen_ai/bench/__init__.py +0 -4
- fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py +1 -2
- fbgemm_gpu/experimental/gen_ai/bench/gather_scatter_bench.py +7 -7
- fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py +51 -19
- fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py +387 -9
- fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so +0 -0
- fbgemm_gpu/experimental/gen_ai/moe/__init__.py +0 -4
- fbgemm_gpu/experimental/gen_ai/moe/activation.py +2 -2
- fbgemm_gpu/experimental/gen_ai/moe/gather_scatter.py +2 -2
- fbgemm_gpu/experimental/gen_ai/moe/layers.py +6 -9
- fbgemm_gpu/experimental/gen_ai/moe/shuffling.py +3 -3
- fbgemm_gpu/experimental/gen_ai/quantize.py +6 -7
- fbgemm_gpu/fbgemm.so +0 -0
- fbgemm_gpu/permute_pooled_embedding_modules.py +4 -4
- fbgemm_gpu/permute_pooled_embedding_modules_split.py +4 -4
- fbgemm_gpu/quantize_comm.py +4 -4
- fbgemm_gpu/runtime_monitor.py +3 -3
- fbgemm_gpu/sll/cpu/cpu_sll.py +6 -6
- fbgemm_gpu/sll/triton/triton_jagged2_to_padded_dense.py +1 -2
- fbgemm_gpu/sll/triton/triton_jagged_dense_flash_attention.py +3 -4
- fbgemm_gpu/sll/triton/triton_jagged_flash_attention_basic.py +1 -2
- fbgemm_gpu/sll/triton/triton_multi_head_jagged_flash_attention.py +1 -2
- fbgemm_gpu/sparse_ops.py +55 -54
- fbgemm_gpu/split_embedding_configs.py +18 -18
- fbgemm_gpu/split_embedding_inference_converter.py +4 -4
- fbgemm_gpu/split_table_batched_embeddings_ops_common.py +50 -24
- fbgemm_gpu/split_table_batched_embeddings_ops_inference.py +37 -37
- fbgemm_gpu/split_table_batched_embeddings_ops_training.py +349 -101
- fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py +2 -2
- fbgemm_gpu/tbe/bench/bench_config.py +3 -3
- fbgemm_gpu/tbe/bench/bench_runs.py +13 -13
- fbgemm_gpu/tbe/bench/eeg_cli.py +2 -3
- fbgemm_gpu/tbe/bench/embedding_ops_common_config.py +2 -2
- fbgemm_gpu/tbe/bench/eval_compression.py +3 -3
- fbgemm_gpu/tbe/bench/tbe_data_config.py +6 -6
- fbgemm_gpu/tbe/bench/tbe_data_config_bench_helper.py +13 -13
- fbgemm_gpu/tbe/bench/tbe_data_config_param_models.py +8 -8
- fbgemm_gpu/tbe/cache/kv_embedding_ops_inference.py +21 -16
- fbgemm_gpu/tbe/cache/split_embeddings_cache_ops.py +4 -4
- fbgemm_gpu/tbe/ssd/inference.py +13 -13
- fbgemm_gpu/tbe/ssd/training.py +311 -89
- fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py +2 -2
- fbgemm_gpu/tbe/stats/bench_params_reporter.py +3 -3
- fbgemm_gpu/tbe/utils/offsets.py +3 -3
- fbgemm_gpu/tbe/utils/quantize.py +2 -2
- fbgemm_gpu/tbe/utils/requests.py +14 -14
- fbgemm_gpu/tbe_input_multiplexer.py +10 -10
- fbgemm_gpu/triton/jagged/triton_jagged_tensor_ops.py +11 -11
- fbgemm_gpu/utils/torch_library.py +2 -2
- {fbgemm_gpu_genai_nightly-2025.9.20.dist-info → fbgemm_gpu_genai_nightly-2025.11.4.dist-info}/METADATA +1 -1
- {fbgemm_gpu_genai_nightly-2025.9.20.dist-info → fbgemm_gpu_genai_nightly-2025.11.4.dist-info}/RECORD +70 -70
- list_versions/cli_run.py +5 -6
- fbgemm_gpu/docs/version.py +0 -11
- {fbgemm_gpu_genai_nightly-2025.9.20.dist-info → fbgemm_gpu_genai_nightly-2025.11.4.dist-info}/WHEEL +0 -0
- {fbgemm_gpu_genai_nightly-2025.9.20.dist-info → fbgemm_gpu_genai_nightly-2025.11.4.dist-info}/top_level.txt +0 -0
fbgemm_gpu/__init__.py
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
# This source code is licensed under the BSD-style license found in the
|
|
6
6
|
# LICENSE file in the root directory of this source tree.
|
|
7
7
|
|
|
8
|
+
import json
|
|
8
9
|
import logging
|
|
9
10
|
import os
|
|
10
11
|
import re
|
|
@@ -26,6 +27,19 @@ _fbgemm_torch_compat_table = {
|
|
|
26
27
|
}
|
|
27
28
|
|
|
28
29
|
|
|
30
|
+
def _load_target_info(target: str) -> dict[str, str]:
|
|
31
|
+
try:
|
|
32
|
+
filepath = os.path.join(
|
|
33
|
+
os.path.dirname(__file__), "docs", f"target.{target}.json.py"
|
|
34
|
+
)
|
|
35
|
+
with open(filepath, "r") as file:
|
|
36
|
+
data = json.load(file)
|
|
37
|
+
except Exception:
|
|
38
|
+
data = {}
|
|
39
|
+
|
|
40
|
+
return data
|
|
41
|
+
|
|
42
|
+
|
|
29
43
|
def _load_library(filename: str, version: str, no_throw: bool = False) -> None:
|
|
30
44
|
"""Load a shared library from the given filename."""
|
|
31
45
|
|
|
@@ -98,13 +112,16 @@ open_source: bool = True
|
|
|
98
112
|
# Trigger the manual addition of docstrings to pybind11-generated operators
|
|
99
113
|
import fbgemm_gpu.docs # noqa: F401, E402
|
|
100
114
|
|
|
115
|
+
|
|
116
|
+
__targets_infos__ = {
|
|
117
|
+
target: _load_target_info(target) for target in ["default", "genai", "hstu"]
|
|
118
|
+
}
|
|
119
|
+
__targets_infos__ = {k: v for (k, v) in __targets_infos__.items() if v}
|
|
120
|
+
|
|
101
121
|
try:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
__variant__,
|
|
106
|
-
__version__,
|
|
107
|
-
)
|
|
122
|
+
__target__, __info__ = next(iter(__targets_infos__.items()))
|
|
123
|
+
__variant__ = __info__["variant"]
|
|
124
|
+
__version__ = __info__["version"]
|
|
108
125
|
except Exception:
|
|
109
126
|
__variant__: str = "INTERNAL"
|
|
110
127
|
__version__: str = "INTERNAL"
|
|
@@ -145,18 +162,19 @@ libraries_to_load = {
|
|
|
145
162
|
"genai": fbgemm_genai_libraries,
|
|
146
163
|
}
|
|
147
164
|
|
|
148
|
-
for
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
165
|
+
for target, info in __targets_infos__.items():
|
|
166
|
+
for library in libraries_to_load.get(target, []):
|
|
167
|
+
# NOTE: In all cases, we want to throw an error if we cannot load the
|
|
168
|
+
# library. However, this appears to break the OSS documentation build,
|
|
169
|
+
# where the Python documentation doesn't show up in the generated docs.
|
|
170
|
+
#
|
|
171
|
+
# To work around this problem, we introduce a fake build variant called
|
|
172
|
+
# `docs` and we only throw a library load error when the variant is not
|
|
173
|
+
# `docs`. For more information, see:
|
|
174
|
+
#
|
|
175
|
+
# https://github.com/pytorch/FBGEMM/pull/3477
|
|
176
|
+
# https://github.com/pytorch/FBGEMM/pull/3717
|
|
177
|
+
_load_library(f"{library}.so", info["version"], info["variant"] == "docs")
|
|
160
178
|
|
|
161
179
|
try:
|
|
162
180
|
# Trigger meta operator registrations
|
fbgemm_gpu/asmjit.so
CHANGED
|
Binary file
|
|
@@ -9,7 +9,6 @@
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
from math import sqrt
|
|
12
|
-
from typing import List
|
|
13
12
|
|
|
14
13
|
import torch
|
|
15
14
|
|
|
@@ -22,7 +21,7 @@ except Exception:
|
|
|
22
21
|
load_torch_module("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
|
|
23
22
|
|
|
24
23
|
|
|
25
|
-
def wrap_weight_to_parameter(weights:
|
|
24
|
+
def wrap_weight_to_parameter(weights: list[torch.Tensor]) -> list[torch.Tensor]:
|
|
26
25
|
for i, v in enumerate(weights):
|
|
27
26
|
if not isinstance(v, torch.nn.Parameter):
|
|
28
27
|
weights[i] = torch.nn.Parameter(v)
|
|
@@ -31,7 +30,7 @@ def wrap_weight_to_parameter(weights: List[torch.Tensor]) -> List[torch.Tensor]:
|
|
|
31
30
|
|
|
32
31
|
class BatchedUnaryEmbeddingBag(torch.nn.Module):
|
|
33
32
|
# pyre-fixme[3]: Return type must be annotated.
|
|
34
|
-
def __init__(self, num_tasks: int, hash_sizes:
|
|
33
|
+
def __init__(self, num_tasks: int, hash_sizes: list[int], long_index: bool = False):
|
|
35
34
|
super().__init__()
|
|
36
35
|
self.num_tasks = num_tasks
|
|
37
36
|
self.hash_sizes = hash_sizes
|
|
@@ -11,7 +11,7 @@ from enum import auto, Enum
|
|
|
11
11
|
import torch
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
|
-
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:
|
|
14
|
+
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:config_cpp_torch_op")
|
|
15
15
|
except Exception:
|
|
16
16
|
import fbgemm_gpu # noqa F401
|
|
17
17
|
|
fbgemm_gpu/enums.py
CHANGED
|
@@ -8,14 +8,13 @@
|
|
|
8
8
|
# pyre-strict
|
|
9
9
|
|
|
10
10
|
import enum
|
|
11
|
-
import
|
|
12
|
-
from typing import Any, Callable, List, Tuple
|
|
11
|
+
from typing import Any, Callable
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
# Create enums in given namespace with information from query_op
|
|
16
15
|
def create_enums(
|
|
17
|
-
namespace:
|
|
18
|
-
query_op: Callable[[],
|
|
16
|
+
namespace: dict[str, Any],
|
|
17
|
+
query_op: Callable[[], list[tuple[str, list[tuple[str, int]]]]],
|
|
19
18
|
) -> None:
|
|
20
19
|
for enum_name, items in query_op():
|
|
21
20
|
# Create matching python enumeration
|
|
@@ -15,10 +15,6 @@ try:
|
|
|
15
15
|
# pyre-ignore[21]
|
|
16
16
|
# @manual=//deeplearning/fbgemm/fbgemm_gpu:test_utils
|
|
17
17
|
from fbgemm_gpu import open_source
|
|
18
|
-
|
|
19
|
-
# pyre-ignore[21]
|
|
20
|
-
# @manual=//deeplearning/fbgemm/fbgemm_gpu:test_utils
|
|
21
|
-
from fbgemm_gpu.docs.version import __version__ # noqa: F401
|
|
22
18
|
except Exception:
|
|
23
19
|
open_source: bool = False
|
|
24
20
|
|
|
Binary file
|
|
@@ -11,9 +11,5 @@ try:
|
|
|
11
11
|
# pyre-ignore[21]
|
|
12
12
|
# @manual=//deeplearning/fbgemm/fbgemm_gpu:test_utils
|
|
13
13
|
from fbgemm_gpu import open_source
|
|
14
|
-
|
|
15
|
-
# pyre-ignore[21]
|
|
16
|
-
# @manual=//deeplearning/fbgemm/fbgemm_gpu:test_utils
|
|
17
|
-
from fbgemm_gpu.docs.version import __version__ # noqa: F401
|
|
18
14
|
except Exception:
|
|
19
15
|
open_source: bool = False
|