fbgemm-gpu-genai-nightly 2025.12.19__cp310-cp310-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fbgemm-gpu-genai-nightly might be problematic. Click here for more details.
- fbgemm_gpu/__init__.py +186 -0
- fbgemm_gpu/asmjit.so +0 -0
- fbgemm_gpu/batched_unary_embeddings_ops.py +87 -0
- fbgemm_gpu/config/__init__.py +9 -0
- fbgemm_gpu/config/feature_list.py +88 -0
- fbgemm_gpu/docs/__init__.py +18 -0
- fbgemm_gpu/docs/common.py +9 -0
- fbgemm_gpu/docs/examples.py +73 -0
- fbgemm_gpu/docs/jagged_tensor_ops.py +259 -0
- fbgemm_gpu/docs/merge_pooled_embedding_ops.py +36 -0
- fbgemm_gpu/docs/permute_pooled_embedding_ops.py +108 -0
- fbgemm_gpu/docs/quantize_ops.py +41 -0
- fbgemm_gpu/docs/sparse_ops.py +616 -0
- fbgemm_gpu/docs/target.genai.json.py +6 -0
- fbgemm_gpu/enums.py +24 -0
- fbgemm_gpu/experimental/example/__init__.py +29 -0
- fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so +0 -0
- fbgemm_gpu/experimental/example/utils.py +20 -0
- fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py +15 -0
- fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py +5654 -0
- fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py +4422 -0
- fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py +1192 -0
- fbgemm_gpu/experimental/gemm/triton_gemm/matmul_perf_model.py +232 -0
- fbgemm_gpu/experimental/gemm/triton_gemm/utils.py +130 -0
- fbgemm_gpu/experimental/gen_ai/__init__.py +56 -0
- fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py +46 -0
- fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py +333 -0
- fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_interface.py +552 -0
- fbgemm_gpu/experimental/gen_ai/bench/__init__.py +13 -0
- fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py +257 -0
- fbgemm_gpu/experimental/gen_ai/bench/gather_scatter_bench.py +348 -0
- fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py +707 -0
- fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py +3483 -0
- fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so +0 -0
- fbgemm_gpu/experimental/gen_ai/moe/README.md +15 -0
- fbgemm_gpu/experimental/gen_ai/moe/__init__.py +66 -0
- fbgemm_gpu/experimental/gen_ai/moe/activation.py +292 -0
- fbgemm_gpu/experimental/gen_ai/moe/gather_scatter.py +740 -0
- fbgemm_gpu/experimental/gen_ai/moe/layers.py +1272 -0
- fbgemm_gpu/experimental/gen_ai/moe/shuffling.py +421 -0
- fbgemm_gpu/experimental/gen_ai/quantize.py +307 -0
- fbgemm_gpu/fbgemm.so +0 -0
- fbgemm_gpu/metrics.py +160 -0
- fbgemm_gpu/permute_pooled_embedding_modules.py +142 -0
- fbgemm_gpu/permute_pooled_embedding_modules_split.py +85 -0
- fbgemm_gpu/quantize/__init__.py +43 -0
- fbgemm_gpu/quantize/quantize_ops.py +64 -0
- fbgemm_gpu/quantize_comm.py +315 -0
- fbgemm_gpu/quantize_utils.py +246 -0
- fbgemm_gpu/runtime_monitor.py +237 -0
- fbgemm_gpu/sll/__init__.py +189 -0
- fbgemm_gpu/sll/cpu/__init__.py +80 -0
- fbgemm_gpu/sll/cpu/cpu_sll.py +1001 -0
- fbgemm_gpu/sll/meta/__init__.py +35 -0
- fbgemm_gpu/sll/meta/meta_sll.py +337 -0
- fbgemm_gpu/sll/triton/__init__.py +127 -0
- fbgemm_gpu/sll/triton/common.py +38 -0
- fbgemm_gpu/sll/triton/triton_dense_jagged_cat_jagged_out.py +72 -0
- fbgemm_gpu/sll/triton/triton_jagged2_to_padded_dense.py +221 -0
- fbgemm_gpu/sll/triton/triton_jagged_bmm.py +418 -0
- fbgemm_gpu/sll/triton/triton_jagged_bmm_jagged_out.py +553 -0
- fbgemm_gpu/sll/triton/triton_jagged_dense_elementwise_add.py +52 -0
- fbgemm_gpu/sll/triton/triton_jagged_dense_elementwise_mul_jagged_out.py +175 -0
- fbgemm_gpu/sll/triton/triton_jagged_dense_flash_attention.py +861 -0
- fbgemm_gpu/sll/triton/triton_jagged_flash_attention_basic.py +667 -0
- fbgemm_gpu/sll/triton/triton_jagged_self_substraction_jagged_out.py +73 -0
- fbgemm_gpu/sll/triton/triton_jagged_softmax.py +463 -0
- fbgemm_gpu/sll/triton/triton_multi_head_jagged_flash_attention.py +751 -0
- fbgemm_gpu/sparse_ops.py +1455 -0
- fbgemm_gpu/split_embedding_configs.py +452 -0
- fbgemm_gpu/split_embedding_inference_converter.py +175 -0
- fbgemm_gpu/split_embedding_optimizer_ops.py +21 -0
- fbgemm_gpu/split_embedding_utils.py +29 -0
- fbgemm_gpu/split_table_batched_embeddings_ops.py +73 -0
- fbgemm_gpu/split_table_batched_embeddings_ops_common.py +484 -0
- fbgemm_gpu/split_table_batched_embeddings_ops_inference.py +2042 -0
- fbgemm_gpu/split_table_batched_embeddings_ops_training.py +4600 -0
- fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py +146 -0
- fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py +26 -0
- fbgemm_gpu/tbe/__init__.py +6 -0
- fbgemm_gpu/tbe/bench/__init__.py +55 -0
- fbgemm_gpu/tbe/bench/bench_config.py +156 -0
- fbgemm_gpu/tbe/bench/bench_runs.py +709 -0
- fbgemm_gpu/tbe/bench/benchmark_click_interface.py +187 -0
- fbgemm_gpu/tbe/bench/eeg_cli.py +137 -0
- fbgemm_gpu/tbe/bench/embedding_ops_common_config.py +149 -0
- fbgemm_gpu/tbe/bench/eval_compression.py +119 -0
- fbgemm_gpu/tbe/bench/reporter.py +35 -0
- fbgemm_gpu/tbe/bench/tbe_data_config.py +137 -0
- fbgemm_gpu/tbe/bench/tbe_data_config_bench_helper.py +323 -0
- fbgemm_gpu/tbe/bench/tbe_data_config_loader.py +289 -0
- fbgemm_gpu/tbe/bench/tbe_data_config_param_models.py +170 -0
- fbgemm_gpu/tbe/bench/utils.py +48 -0
- fbgemm_gpu/tbe/cache/__init__.py +11 -0
- fbgemm_gpu/tbe/cache/kv_embedding_ops_inference.py +385 -0
- fbgemm_gpu/tbe/cache/split_embeddings_cache_ops.py +48 -0
- fbgemm_gpu/tbe/ssd/__init__.py +15 -0
- fbgemm_gpu/tbe/ssd/common.py +46 -0
- fbgemm_gpu/tbe/ssd/inference.py +586 -0
- fbgemm_gpu/tbe/ssd/training.py +4908 -0
- fbgemm_gpu/tbe/ssd/utils/__init__.py +7 -0
- fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py +273 -0
- fbgemm_gpu/tbe/stats/__init__.py +10 -0
- fbgemm_gpu/tbe/stats/bench_params_reporter.py +339 -0
- fbgemm_gpu/tbe/utils/__init__.py +13 -0
- fbgemm_gpu/tbe/utils/common.py +42 -0
- fbgemm_gpu/tbe/utils/offsets.py +65 -0
- fbgemm_gpu/tbe/utils/quantize.py +251 -0
- fbgemm_gpu/tbe/utils/requests.py +556 -0
- fbgemm_gpu/tbe_input_multiplexer.py +108 -0
- fbgemm_gpu/triton/__init__.py +22 -0
- fbgemm_gpu/triton/common.py +77 -0
- fbgemm_gpu/triton/jagged/__init__.py +8 -0
- fbgemm_gpu/triton/jagged/triton_jagged_tensor_ops.py +824 -0
- fbgemm_gpu/triton/quantize.py +647 -0
- fbgemm_gpu/triton/quantize_ref.py +286 -0
- fbgemm_gpu/utils/__init__.py +11 -0
- fbgemm_gpu/utils/filestore.py +211 -0
- fbgemm_gpu/utils/loader.py +36 -0
- fbgemm_gpu/utils/torch_library.py +132 -0
- fbgemm_gpu/uvm.py +40 -0
- fbgemm_gpu_genai_nightly-2025.12.19.dist-info/METADATA +62 -0
- fbgemm_gpu_genai_nightly-2025.12.19.dist-info/RECORD +127 -0
- fbgemm_gpu_genai_nightly-2025.12.19.dist-info/WHEEL +5 -0
- fbgemm_gpu_genai_nightly-2025.12.19.dist-info/top_level.txt +2 -0
- list_versions/__init__.py +12 -0
- list_versions/cli_run.py +163 -0
fbgemm_gpu/__init__.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This source code is licensed under the BSD-style license found in the
|
|
6
|
+
# LICENSE file in the root directory of this source tree.
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
|
|
13
|
+
import torch
|
|
14
|
+
|
|
15
|
+
# Based on the FBGEMM-PyTorch compatibility table at
|
|
16
|
+
# https://docs.pytorch.org/FBGEMM/general/Releases.html#fbgemm-releases-compatibility
|
|
17
|
+
_fbgemm_torch_compat_table = {
|
|
18
|
+
"1.5": "2.10",
|
|
19
|
+
"1.4": "2.9",
|
|
20
|
+
"1.3": "2.8",
|
|
21
|
+
"1.2": "2.7",
|
|
22
|
+
"1.1": "2.6",
|
|
23
|
+
"1.0": "2.5",
|
|
24
|
+
"0.8": "2.4",
|
|
25
|
+
"0.7": "2.3",
|
|
26
|
+
"0.6": "2.2",
|
|
27
|
+
"0.5": "2.1",
|
|
28
|
+
"0.4": "2.0",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _load_target_info(target: str) -> dict[str, str]:
|
|
33
|
+
try:
|
|
34
|
+
filepath = os.path.join(
|
|
35
|
+
os.path.dirname(__file__), "docs", f"target.{target}.json.py"
|
|
36
|
+
)
|
|
37
|
+
with open(filepath, "r") as file:
|
|
38
|
+
data = json.load(file)
|
|
39
|
+
except Exception:
|
|
40
|
+
data = {}
|
|
41
|
+
|
|
42
|
+
return data
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _load_library(filename: str, version: str, no_throw: bool = False) -> None:
|
|
46
|
+
"""Load a shared library from the given filename."""
|
|
47
|
+
|
|
48
|
+
# Check if the version of PyTorch is compatible with the version of FBGEMM
|
|
49
|
+
# that we are trying to load, and print a loud warning if not. This is
|
|
50
|
+
# useful for the OSS build, where we have a single FBGEMM library that is
|
|
51
|
+
# compatible with multiple versions of PyTorch.
|
|
52
|
+
#
|
|
53
|
+
# Based on: https://github.com/pytorch/ao/blob/main/torchao/__init__.py#L30
|
|
54
|
+
|
|
55
|
+
keys = [
|
|
56
|
+
key
|
|
57
|
+
for key in _fbgemm_torch_compat_table.keys()
|
|
58
|
+
if version.startswith(f"{key}.")
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
if version == "INTERNAL" or "+git" in version:
|
|
62
|
+
# if FBGEMM version has "+git", assume it's locally built and we don't know
|
|
63
|
+
# anything about the PyTorch version used to build it
|
|
64
|
+
logging.info(
|
|
65
|
+
"FBGEMM version is INTERNAL or local, ignoring version compatibility check with PyTorch"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
elif re.match(r"^\d{4}\.\d{1,2}\.\d{1,2}.*$", version):
|
|
69
|
+
# if FBGEMM version is a date, assume it's a nightly build and that we
|
|
70
|
+
# know what we're doing
|
|
71
|
+
logging.info(
|
|
72
|
+
"FBGEMM version is a nightly version, ignoring version compatibility check with PyTorch"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
elif not keys:
|
|
76
|
+
logging.warning(
|
|
77
|
+
f"""
|
|
78
|
+
\033[33m
|
|
79
|
+
_fbgemm_torch_compat_table has no entry for {version} of FBGEMM;
|
|
80
|
+
cannot determine compatibility with PyTorch {torch.__version__}
|
|
81
|
+
\033[0m
|
|
82
|
+
"""
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
elif not str(torch.__version__).startswith(_fbgemm_torch_compat_table[keys[0]]):
|
|
86
|
+
logging.warning(
|
|
87
|
+
f"""
|
|
88
|
+
\033[31m
|
|
89
|
+
FBGEMM_GPU version is {version}, which is not guaranteed to be
|
|
90
|
+
compatible with PyTorch {torch.__version__}; library loading might
|
|
91
|
+
crash!
|
|
92
|
+
|
|
93
|
+
Please refer to
|
|
94
|
+
https://docs.pytorch.org/FBGEMM/general/Releases.html#fbgemm-releases-compatibility
|
|
95
|
+
for the FBGEMM-PyTorch compatibility table.
|
|
96
|
+
\033[0m
|
|
97
|
+
"""
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
torch.ops.load_library(os.path.join(os.path.dirname(__file__), filename))
|
|
102
|
+
logging.info(f"Successfully loaded: '{filename}'")
|
|
103
|
+
|
|
104
|
+
except Exception as error:
|
|
105
|
+
logging.error(f"Could not load the library '{filename}'!\n\n\n{error}\n\n\n")
|
|
106
|
+
if not no_throw:
|
|
107
|
+
raise error
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# Since __init__.py is only used in OSS context, we define `open_source` here
|
|
111
|
+
# and use its existence to determine whether or not we are in OSS context
|
|
112
|
+
open_source: bool = True
|
|
113
|
+
|
|
114
|
+
# Trigger the manual addition of docstrings to pybind11-generated operators
|
|
115
|
+
import fbgemm_gpu.docs # noqa: F401, E402
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
__targets_infos__ = {
|
|
119
|
+
target: _load_target_info(target) for target in ["default", "genai", "hstu"]
|
|
120
|
+
}
|
|
121
|
+
__targets_infos__ = {k: v for (k, v) in __targets_infos__.items() if v}
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
__target__, __info__ = next(iter(__targets_infos__.items()))
|
|
125
|
+
__variant__ = __info__["variant"]
|
|
126
|
+
__version__ = __info__["version"]
|
|
127
|
+
except Exception:
|
|
128
|
+
__variant__: str = "INTERNAL"
|
|
129
|
+
__version__: str = "INTERNAL"
|
|
130
|
+
__target__: str = "INTERNAL"
|
|
131
|
+
|
|
132
|
+
fbgemm_gpu_libraries = [
|
|
133
|
+
"fbgemm_gpu_config",
|
|
134
|
+
"fbgemm_gpu_tbe_utils",
|
|
135
|
+
"fbgemm_gpu_tbe_index_select",
|
|
136
|
+
"fbgemm_gpu_tbe_cache",
|
|
137
|
+
"fbgemm_gpu_tbe_optimizers",
|
|
138
|
+
"fbgemm_gpu_tbe_inference",
|
|
139
|
+
"fbgemm_gpu_tbe_training_forward",
|
|
140
|
+
"fbgemm_gpu_tbe_training_backward",
|
|
141
|
+
"fbgemm_gpu_tbe_training_backward_pt2",
|
|
142
|
+
"fbgemm_gpu_tbe_training_backward_dense",
|
|
143
|
+
"fbgemm_gpu_tbe_training_backward_split_host",
|
|
144
|
+
"fbgemm_gpu_tbe_training_backward_gwd",
|
|
145
|
+
"fbgemm_gpu_tbe_training_backward_vbe",
|
|
146
|
+
"fbgemm_gpu_py",
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
fbgemm_genai_libraries = [
|
|
150
|
+
"experimental/gen_ai/fbgemm_gpu_experimental_gen_ai",
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
# NOTE: While FBGEMM_GPU GenAI is not available for ROCm yet, we would like to
|
|
154
|
+
# be able to install the existing CUDA variant of the package onto ROCm systems,
|
|
155
|
+
# so that we can at least use the Triton GEMM libraries from experimental/gemm.
|
|
156
|
+
# But loading fbgemm_gpu package will trigger load-checking the .SO file for the
|
|
157
|
+
# GenAI libraries, which will fail. This workaround ignores check-loading the
|
|
158
|
+
# .SO file for the ROCm case, so that clients can import
|
|
159
|
+
# fbgemm_gpu.experimental.gemm without triggering an error.
|
|
160
|
+
if torch.cuda.is_available() and torch.version.hip:
|
|
161
|
+
fbgemm_genai_libraries = []
|
|
162
|
+
|
|
163
|
+
libraries_to_load = {
|
|
164
|
+
"default": fbgemm_gpu_libraries,
|
|
165
|
+
"genai": fbgemm_genai_libraries,
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
for target, info in __targets_infos__.items():
|
|
169
|
+
for library in libraries_to_load.get(target, []):
|
|
170
|
+
# NOTE: In all cases, we want to throw an error if we cannot load the
|
|
171
|
+
# library. However, this appears to break the OSS documentation build,
|
|
172
|
+
# where the Python documentation doesn't show up in the generated docs.
|
|
173
|
+
#
|
|
174
|
+
# To work around this problem, we introduce a fake build variant called
|
|
175
|
+
# `docs` and we only throw a library load error when the variant is not
|
|
176
|
+
# `docs`. For more information, see:
|
|
177
|
+
#
|
|
178
|
+
# https://github.com/pytorch/FBGEMM/pull/3477
|
|
179
|
+
# https://github.com/pytorch/FBGEMM/pull/3717
|
|
180
|
+
_load_library(f"{library}.so", info["version"], info["variant"] == "docs")
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
# Trigger meta operator registrations
|
|
184
|
+
from . import sparse_ops # noqa: F401, E402
|
|
185
|
+
except Exception:
|
|
186
|
+
pass
|
fbgemm_gpu/asmjit.so
ADDED
|
Binary file
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This source code is licensed under the BSD-style license found in the
|
|
6
|
+
# LICENSE file in the root directory of this source tree.
|
|
7
|
+
|
|
8
|
+
# pyre-strict
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from math import sqrt
|
|
12
|
+
|
|
13
|
+
import torch
|
|
14
|
+
|
|
15
|
+
from fbgemm_gpu.utils.loader import load_torch_module
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
# pyre-ignore[21]
|
|
19
|
+
from fbgemm_gpu import open_source # noqa: F401
|
|
20
|
+
except Exception:
|
|
21
|
+
load_torch_module("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def wrap_weight_to_parameter(weights: list[torch.Tensor]) -> list[torch.Tensor]:
|
|
25
|
+
for i, v in enumerate(weights):
|
|
26
|
+
if not isinstance(v, torch.nn.Parameter):
|
|
27
|
+
weights[i] = torch.nn.Parameter(v)
|
|
28
|
+
return weights
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class BatchedUnaryEmbeddingBag(torch.nn.Module):
|
|
32
|
+
# pyre-fixme[3]: Return type must be annotated.
|
|
33
|
+
def __init__(self, num_tasks: int, hash_sizes: list[int], long_index: bool = False):
|
|
34
|
+
super().__init__()
|
|
35
|
+
self.num_tasks = num_tasks
|
|
36
|
+
self.hash_sizes = hash_sizes
|
|
37
|
+
# [N][sum(E)][1]
|
|
38
|
+
embedding_data = torch.randn(size=(num_tasks, sum(self.hash_sizes), 1))
|
|
39
|
+
self.weight = torch.nn.Parameter(embedding_data)
|
|
40
|
+
index_dtype = torch.int64 if long_index else torch.int32
|
|
41
|
+
table_offsets_tensor = torch.cat(
|
|
42
|
+
[
|
|
43
|
+
torch.tensor([0], dtype=index_dtype),
|
|
44
|
+
torch.cumsum(
|
|
45
|
+
torch.tensor(hash_sizes),
|
|
46
|
+
dim=0,
|
|
47
|
+
dtype=index_dtype,
|
|
48
|
+
),
|
|
49
|
+
]
|
|
50
|
+
)
|
|
51
|
+
self.register_buffer("table_offsets_tensor", table_offsets_tensor)
|
|
52
|
+
self.init_parameters()
|
|
53
|
+
|
|
54
|
+
# pyre-fixme[3]: Return type must be annotated.
|
|
55
|
+
def forward(self, offsets: torch.Tensor, input: torch.Tensor):
|
|
56
|
+
# output is [N][B][T]
|
|
57
|
+
return torch.ops.fbgemm.batched_unary_embeddings(
|
|
58
|
+
self.weight,
|
|
59
|
+
self.table_offsets_tensor,
|
|
60
|
+
offsets,
|
|
61
|
+
input,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
@torch.jit.export
|
|
65
|
+
# pyre-fixme[3]: Return type must be annotated.
|
|
66
|
+
def split_embedding_weights(self):
|
|
67
|
+
embedding_weights = []
|
|
68
|
+
for n in range(self.num_tasks):
|
|
69
|
+
for t in range(len(self.hash_sizes)):
|
|
70
|
+
embedding_weights.append(
|
|
71
|
+
self.weight.detach()[
|
|
72
|
+
n,
|
|
73
|
+
self.table_offsets_tensor[t] : self.table_offsets_tensor[t + 1],
|
|
74
|
+
:,
|
|
75
|
+
]
|
|
76
|
+
)
|
|
77
|
+
return embedding_weights
|
|
78
|
+
|
|
79
|
+
@torch.jit.export
|
|
80
|
+
# pyre-fixme[3]: Return type must be annotated.
|
|
81
|
+
def init_parameters(self):
|
|
82
|
+
for num_emb, param in zip(
|
|
83
|
+
self.hash_sizes * self.num_tasks,
|
|
84
|
+
wrap_weight_to_parameter(self.split_embedding_weights()),
|
|
85
|
+
):
|
|
86
|
+
assert param.shape == (num_emb, 1)
|
|
87
|
+
param.data.uniform_(-sqrt(1 / num_emb), sqrt(1 / num_emb))
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
9
|
+
from .feature_list import FeatureGate, FeatureGateName # noqa F401
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
9
|
+
from enum import auto, Enum
|
|
10
|
+
|
|
11
|
+
import torch
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:config_cpp_torch_op")
|
|
15
|
+
except Exception:
|
|
16
|
+
import fbgemm_gpu # noqa F401
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Note: ENUM name must match EXACTLY with the JK knob name in the UI
|
|
20
|
+
class FeatureGateName(Enum):
|
|
21
|
+
"""
|
|
22
|
+
FBGEMM_GPU feature gates enum (Python).
|
|
23
|
+
|
|
24
|
+
**Code Example:**
|
|
25
|
+
|
|
26
|
+
.. code-block:: python
|
|
27
|
+
|
|
28
|
+
from fbgemm_gpu.config import FeatureGateName
|
|
29
|
+
|
|
30
|
+
def foo():
|
|
31
|
+
if FeatureGateName.TBE_V2.is_enabled():
|
|
32
|
+
|
|
33
|
+
# Do something if feature is enabled
|
|
34
|
+
...
|
|
35
|
+
else:
|
|
36
|
+
# Do something different if feature is disabled
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
Note:
|
|
40
|
+
While not required, it is best to mirror the enum values in C++,
|
|
41
|
+
in `fbgemm_gpu/config/feature_gates.h`.
|
|
42
|
+
|
|
43
|
+
For fbcode: The ENUM name must match EXACTLY with the JK knob name in the UI
|
|
44
|
+
For OSS: The environment variable will be evaluated as f"FBGEMM_{ENUM}"
|
|
45
|
+
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
# Enable TBE V2 APIs
|
|
49
|
+
TBE_V2 = auto()
|
|
50
|
+
|
|
51
|
+
# Enable Ensemble Rowwise Adagrad (D60189486 stack)
|
|
52
|
+
TBE_ENSEMBLE_ROWWISE_ADAGRAD = auto()
|
|
53
|
+
|
|
54
|
+
# Enable ROCm packed bags optimization in TBE inference
|
|
55
|
+
TBE_ROCM_INFERENCE_PACKED_BAGS = auto()
|
|
56
|
+
|
|
57
|
+
# Enable HIP-based backward kernels in TBE training
|
|
58
|
+
TBE_ROCM_HIP_BACKWARD_KERNEL = auto()
|
|
59
|
+
|
|
60
|
+
# Enable bounds_check_indices_v2
|
|
61
|
+
BOUNDS_CHECK_INDICES_V2 = auto()
|
|
62
|
+
|
|
63
|
+
# Enable TBE input parameters extraction
|
|
64
|
+
TBE_REPORT_INPUT_PARAMS = auto()
|
|
65
|
+
|
|
66
|
+
def is_enabled(self) -> bool:
|
|
67
|
+
return FeatureGate.is_enabled(self)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class FeatureGate:
|
|
71
|
+
"""
|
|
72
|
+
FBGEMM_GPU feature gate.
|
|
73
|
+
|
|
74
|
+
This class exists because methods defined on enums cannot be invoked when
|
|
75
|
+
the enum is packaged into a model (the mechanism is unclear).
|
|
76
|
+
|
|
77
|
+
**Code Example:**
|
|
78
|
+
|
|
79
|
+
.. code-block:: python
|
|
80
|
+
|
|
81
|
+
from deeplearning.fbgemm.fbgemm_gpu.config import FeatureGate, FeatureGateName
|
|
82
|
+
|
|
83
|
+
FeatureGate.is_enabled(FeatureGateName.TBE_V2)
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def is_enabled(cls, feature: FeatureGateName) -> bool:
|
|
88
|
+
return torch.ops.fbgemm.check_feature_gate_key(feature.name)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This source code is licensed under the BSD-style license found in the
|
|
6
|
+
# LICENSE file in the root directory of this source tree.
|
|
7
|
+
|
|
8
|
+
# Trigger the manual addition of docstrings to pybind11-generated operators
|
|
9
|
+
try:
|
|
10
|
+
from . import ( # noqa: F401
|
|
11
|
+
jagged_tensor_ops,
|
|
12
|
+
merge_pooled_embedding_ops,
|
|
13
|
+
permute_pooled_embedding_ops,
|
|
14
|
+
quantize_ops,
|
|
15
|
+
sparse_ops,
|
|
16
|
+
)
|
|
17
|
+
except Exception:
|
|
18
|
+
pass
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def add_docs(method, docstr: str):
|
|
9
|
+
method.__doc__ = docstr
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from ctypes import c_size_t
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# [fbgemm-gpu.python.docs.examples.docstring.start]
|
|
11
|
+
def example_method(alignment: c_size_t, param: float) -> int:
|
|
12
|
+
"""
|
|
13
|
+
This class is an example of how you can write docstrings.
|
|
14
|
+
You can add multiple lines of those descriptions. Make sure to include
|
|
15
|
+
useful information about your method.
|
|
16
|
+
|
|
17
|
+
**Code Example:**
|
|
18
|
+
|
|
19
|
+
.. code-block:: cpp
|
|
20
|
+
|
|
21
|
+
// Here is a C++ code block
|
|
22
|
+
std::vector<int32_t> foo(const std::vector<int32_t> &lst) {
|
|
23
|
+
std::vector<int32_t> ret;
|
|
24
|
+
for (const auto x : lst) {
|
|
25
|
+
ret.emplace_back(x * 2);
|
|
26
|
+
}
|
|
27
|
+
return ret;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
And here is a verbatim-text diagram example:
|
|
31
|
+
|
|
32
|
+
.. code-block:: text
|
|
33
|
+
|
|
34
|
+
.------+---------------------------------.-----------------------------
|
|
35
|
+
| Block A (first) | Block B (second)
|
|
36
|
+
+------+------+--------------------------+------+------+---------------
|
|
37
|
+
| Next | Prev | usable space | Next | Prev | usable space..
|
|
38
|
+
+------+------+--------------------------+------+--+---+---------------
|
|
39
|
+
^ | ^ |
|
|
40
|
+
| '-------------------------------------' |
|
|
41
|
+
| |
|
|
42
|
+
'----------- Block B's prev points to Block A -----'
|
|
43
|
+
|
|
44
|
+
Todo:
|
|
45
|
+
* This is a TODO item.
|
|
46
|
+
* And a second TODO item.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
alignment (c_size_t): Description of the `alignment` value.
|
|
50
|
+
param (float): Description of `param1`.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Description of the method's return value.
|
|
54
|
+
|
|
55
|
+
Raises:
|
|
56
|
+
AttributeError: If there is an error with the attributes.
|
|
57
|
+
ValueError: If `param` is equal to 3.14.
|
|
58
|
+
|
|
59
|
+
Example:
|
|
60
|
+
This is how you can use this function
|
|
61
|
+
|
|
62
|
+
>>> print("Code blocks are supported")
|
|
63
|
+
|
|
64
|
+
Note:
|
|
65
|
+
For more info on reStructuredText docstrings, see
|
|
66
|
+
`here <https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html>`__
|
|
67
|
+
and
|
|
68
|
+
`here <https://peps.python.org/pep-0287/>`__.
|
|
69
|
+
"""
|
|
70
|
+
return 42
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# [fbgemm-gpu.python.docs.examples.docstring.end]
|