fbgemm-gpu-genai-nightly 2025.12.19__cp310-cp310-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fbgemm-gpu-genai-nightly might be problematic. Click here for more details.

Files changed (127) hide show
  1. fbgemm_gpu/__init__.py +186 -0
  2. fbgemm_gpu/asmjit.so +0 -0
  3. fbgemm_gpu/batched_unary_embeddings_ops.py +87 -0
  4. fbgemm_gpu/config/__init__.py +9 -0
  5. fbgemm_gpu/config/feature_list.py +88 -0
  6. fbgemm_gpu/docs/__init__.py +18 -0
  7. fbgemm_gpu/docs/common.py +9 -0
  8. fbgemm_gpu/docs/examples.py +73 -0
  9. fbgemm_gpu/docs/jagged_tensor_ops.py +259 -0
  10. fbgemm_gpu/docs/merge_pooled_embedding_ops.py +36 -0
  11. fbgemm_gpu/docs/permute_pooled_embedding_ops.py +108 -0
  12. fbgemm_gpu/docs/quantize_ops.py +41 -0
  13. fbgemm_gpu/docs/sparse_ops.py +616 -0
  14. fbgemm_gpu/docs/target.genai.json.py +6 -0
  15. fbgemm_gpu/enums.py +24 -0
  16. fbgemm_gpu/experimental/example/__init__.py +29 -0
  17. fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so +0 -0
  18. fbgemm_gpu/experimental/example/utils.py +20 -0
  19. fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py +15 -0
  20. fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py +5654 -0
  21. fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py +4422 -0
  22. fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py +1192 -0
  23. fbgemm_gpu/experimental/gemm/triton_gemm/matmul_perf_model.py +232 -0
  24. fbgemm_gpu/experimental/gemm/triton_gemm/utils.py +130 -0
  25. fbgemm_gpu/experimental/gen_ai/__init__.py +56 -0
  26. fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py +46 -0
  27. fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py +333 -0
  28. fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_interface.py +552 -0
  29. fbgemm_gpu/experimental/gen_ai/bench/__init__.py +13 -0
  30. fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py +257 -0
  31. fbgemm_gpu/experimental/gen_ai/bench/gather_scatter_bench.py +348 -0
  32. fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py +707 -0
  33. fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py +3483 -0
  34. fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so +0 -0
  35. fbgemm_gpu/experimental/gen_ai/moe/README.md +15 -0
  36. fbgemm_gpu/experimental/gen_ai/moe/__init__.py +66 -0
  37. fbgemm_gpu/experimental/gen_ai/moe/activation.py +292 -0
  38. fbgemm_gpu/experimental/gen_ai/moe/gather_scatter.py +740 -0
  39. fbgemm_gpu/experimental/gen_ai/moe/layers.py +1272 -0
  40. fbgemm_gpu/experimental/gen_ai/moe/shuffling.py +421 -0
  41. fbgemm_gpu/experimental/gen_ai/quantize.py +307 -0
  42. fbgemm_gpu/fbgemm.so +0 -0
  43. fbgemm_gpu/metrics.py +160 -0
  44. fbgemm_gpu/permute_pooled_embedding_modules.py +142 -0
  45. fbgemm_gpu/permute_pooled_embedding_modules_split.py +85 -0
  46. fbgemm_gpu/quantize/__init__.py +43 -0
  47. fbgemm_gpu/quantize/quantize_ops.py +64 -0
  48. fbgemm_gpu/quantize_comm.py +315 -0
  49. fbgemm_gpu/quantize_utils.py +246 -0
  50. fbgemm_gpu/runtime_monitor.py +237 -0
  51. fbgemm_gpu/sll/__init__.py +189 -0
  52. fbgemm_gpu/sll/cpu/__init__.py +80 -0
  53. fbgemm_gpu/sll/cpu/cpu_sll.py +1001 -0
  54. fbgemm_gpu/sll/meta/__init__.py +35 -0
  55. fbgemm_gpu/sll/meta/meta_sll.py +337 -0
  56. fbgemm_gpu/sll/triton/__init__.py +127 -0
  57. fbgemm_gpu/sll/triton/common.py +38 -0
  58. fbgemm_gpu/sll/triton/triton_dense_jagged_cat_jagged_out.py +72 -0
  59. fbgemm_gpu/sll/triton/triton_jagged2_to_padded_dense.py +221 -0
  60. fbgemm_gpu/sll/triton/triton_jagged_bmm.py +418 -0
  61. fbgemm_gpu/sll/triton/triton_jagged_bmm_jagged_out.py +553 -0
  62. fbgemm_gpu/sll/triton/triton_jagged_dense_elementwise_add.py +52 -0
  63. fbgemm_gpu/sll/triton/triton_jagged_dense_elementwise_mul_jagged_out.py +175 -0
  64. fbgemm_gpu/sll/triton/triton_jagged_dense_flash_attention.py +861 -0
  65. fbgemm_gpu/sll/triton/triton_jagged_flash_attention_basic.py +667 -0
  66. fbgemm_gpu/sll/triton/triton_jagged_self_substraction_jagged_out.py +73 -0
  67. fbgemm_gpu/sll/triton/triton_jagged_softmax.py +463 -0
  68. fbgemm_gpu/sll/triton/triton_multi_head_jagged_flash_attention.py +751 -0
  69. fbgemm_gpu/sparse_ops.py +1455 -0
  70. fbgemm_gpu/split_embedding_configs.py +452 -0
  71. fbgemm_gpu/split_embedding_inference_converter.py +175 -0
  72. fbgemm_gpu/split_embedding_optimizer_ops.py +21 -0
  73. fbgemm_gpu/split_embedding_utils.py +29 -0
  74. fbgemm_gpu/split_table_batched_embeddings_ops.py +73 -0
  75. fbgemm_gpu/split_table_batched_embeddings_ops_common.py +484 -0
  76. fbgemm_gpu/split_table_batched_embeddings_ops_inference.py +2042 -0
  77. fbgemm_gpu/split_table_batched_embeddings_ops_training.py +4600 -0
  78. fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py +146 -0
  79. fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py +26 -0
  80. fbgemm_gpu/tbe/__init__.py +6 -0
  81. fbgemm_gpu/tbe/bench/__init__.py +55 -0
  82. fbgemm_gpu/tbe/bench/bench_config.py +156 -0
  83. fbgemm_gpu/tbe/bench/bench_runs.py +709 -0
  84. fbgemm_gpu/tbe/bench/benchmark_click_interface.py +187 -0
  85. fbgemm_gpu/tbe/bench/eeg_cli.py +137 -0
  86. fbgemm_gpu/tbe/bench/embedding_ops_common_config.py +149 -0
  87. fbgemm_gpu/tbe/bench/eval_compression.py +119 -0
  88. fbgemm_gpu/tbe/bench/reporter.py +35 -0
  89. fbgemm_gpu/tbe/bench/tbe_data_config.py +137 -0
  90. fbgemm_gpu/tbe/bench/tbe_data_config_bench_helper.py +323 -0
  91. fbgemm_gpu/tbe/bench/tbe_data_config_loader.py +289 -0
  92. fbgemm_gpu/tbe/bench/tbe_data_config_param_models.py +170 -0
  93. fbgemm_gpu/tbe/bench/utils.py +48 -0
  94. fbgemm_gpu/tbe/cache/__init__.py +11 -0
  95. fbgemm_gpu/tbe/cache/kv_embedding_ops_inference.py +385 -0
  96. fbgemm_gpu/tbe/cache/split_embeddings_cache_ops.py +48 -0
  97. fbgemm_gpu/tbe/ssd/__init__.py +15 -0
  98. fbgemm_gpu/tbe/ssd/common.py +46 -0
  99. fbgemm_gpu/tbe/ssd/inference.py +586 -0
  100. fbgemm_gpu/tbe/ssd/training.py +4908 -0
  101. fbgemm_gpu/tbe/ssd/utils/__init__.py +7 -0
  102. fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py +273 -0
  103. fbgemm_gpu/tbe/stats/__init__.py +10 -0
  104. fbgemm_gpu/tbe/stats/bench_params_reporter.py +339 -0
  105. fbgemm_gpu/tbe/utils/__init__.py +13 -0
  106. fbgemm_gpu/tbe/utils/common.py +42 -0
  107. fbgemm_gpu/tbe/utils/offsets.py +65 -0
  108. fbgemm_gpu/tbe/utils/quantize.py +251 -0
  109. fbgemm_gpu/tbe/utils/requests.py +556 -0
  110. fbgemm_gpu/tbe_input_multiplexer.py +108 -0
  111. fbgemm_gpu/triton/__init__.py +22 -0
  112. fbgemm_gpu/triton/common.py +77 -0
  113. fbgemm_gpu/triton/jagged/__init__.py +8 -0
  114. fbgemm_gpu/triton/jagged/triton_jagged_tensor_ops.py +824 -0
  115. fbgemm_gpu/triton/quantize.py +647 -0
  116. fbgemm_gpu/triton/quantize_ref.py +286 -0
  117. fbgemm_gpu/utils/__init__.py +11 -0
  118. fbgemm_gpu/utils/filestore.py +211 -0
  119. fbgemm_gpu/utils/loader.py +36 -0
  120. fbgemm_gpu/utils/torch_library.py +132 -0
  121. fbgemm_gpu/uvm.py +40 -0
  122. fbgemm_gpu_genai_nightly-2025.12.19.dist-info/METADATA +62 -0
  123. fbgemm_gpu_genai_nightly-2025.12.19.dist-info/RECORD +127 -0
  124. fbgemm_gpu_genai_nightly-2025.12.19.dist-info/WHEEL +5 -0
  125. fbgemm_gpu_genai_nightly-2025.12.19.dist-info/top_level.txt +2 -0
  126. list_versions/__init__.py +12 -0
  127. list_versions/cli_run.py +163 -0
fbgemm_gpu/__init__.py ADDED
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ import json
9
+ import logging
10
+ import os
11
+ import re
12
+
13
+ import torch
14
+
15
+ # Based on the FBGEMM-PyTorch compatibility table at
16
+ # https://docs.pytorch.org/FBGEMM/general/Releases.html#fbgemm-releases-compatibility
17
+ _fbgemm_torch_compat_table = {
18
+ "1.5": "2.10",
19
+ "1.4": "2.9",
20
+ "1.3": "2.8",
21
+ "1.2": "2.7",
22
+ "1.1": "2.6",
23
+ "1.0": "2.5",
24
+ "0.8": "2.4",
25
+ "0.7": "2.3",
26
+ "0.6": "2.2",
27
+ "0.5": "2.1",
28
+ "0.4": "2.0",
29
+ }
30
+
31
+
32
+ def _load_target_info(target: str) -> dict[str, str]:
33
+ try:
34
+ filepath = os.path.join(
35
+ os.path.dirname(__file__), "docs", f"target.{target}.json.py"
36
+ )
37
+ with open(filepath, "r") as file:
38
+ data = json.load(file)
39
+ except Exception:
40
+ data = {}
41
+
42
+ return data
43
+
44
+
45
+ def _load_library(filename: str, version: str, no_throw: bool = False) -> None:
46
+ """Load a shared library from the given filename."""
47
+
48
+ # Check if the version of PyTorch is compatible with the version of FBGEMM
49
+ # that we are trying to load, and print a loud warning if not. This is
50
+ # useful for the OSS build, where we have a single FBGEMM library that is
51
+ # compatible with multiple versions of PyTorch.
52
+ #
53
+ # Based on: https://github.com/pytorch/ao/blob/main/torchao/__init__.py#L30
54
+
55
+ keys = [
56
+ key
57
+ for key in _fbgemm_torch_compat_table.keys()
58
+ if version.startswith(f"{key}.")
59
+ ]
60
+
61
+ if version == "INTERNAL" or "+git" in version:
62
+ # if FBGEMM version has "+git", assume it's locally built and we don't know
63
+ # anything about the PyTorch version used to build it
64
+ logging.info(
65
+ "FBGEMM version is INTERNAL or local, ignoring version compatibility check with PyTorch"
66
+ )
67
+
68
+ elif re.match(r"^\d{4}\.\d{1,2}\.\d{1,2}.*$", version):
69
+ # if FBGEMM version is a date, assume it's a nightly build and that we
70
+ # know what we're doing
71
+ logging.info(
72
+ "FBGEMM version is a nightly version, ignoring version compatibility check with PyTorch"
73
+ )
74
+
75
+ elif not keys:
76
+ logging.warning(
77
+ f"""
78
+ \033[33m
79
+ _fbgemm_torch_compat_table has no entry for {version} of FBGEMM;
80
+ cannot determine compatibility with PyTorch {torch.__version__}
81
+ \033[0m
82
+ """
83
+ )
84
+
85
+ elif not str(torch.__version__).startswith(_fbgemm_torch_compat_table[keys[0]]):
86
+ logging.warning(
87
+ f"""
88
+ \033[31m
89
+ FBGEMM_GPU version is {version}, which is not guaranteed to be
90
+ compatible with PyTorch {torch.__version__}; library loading might
91
+ crash!
92
+
93
+ Please refer to
94
+ https://docs.pytorch.org/FBGEMM/general/Releases.html#fbgemm-releases-compatibility
95
+ for the FBGEMM-PyTorch compatibility table.
96
+ \033[0m
97
+ """
98
+ )
99
+
100
+ try:
101
+ torch.ops.load_library(os.path.join(os.path.dirname(__file__), filename))
102
+ logging.info(f"Successfully loaded: '{filename}'")
103
+
104
+ except Exception as error:
105
+ logging.error(f"Could not load the library '{filename}'!\n\n\n{error}\n\n\n")
106
+ if not no_throw:
107
+ raise error
108
+
109
+
110
+ # Since __init__.py is only used in OSS context, we define `open_source` here
111
+ # and use its existence to determine whether or not we are in OSS context
112
+ open_source: bool = True
113
+
114
+ # Trigger the manual addition of docstrings to pybind11-generated operators
115
+ import fbgemm_gpu.docs # noqa: F401, E402
116
+
117
+
118
+ __targets_infos__ = {
119
+ target: _load_target_info(target) for target in ["default", "genai", "hstu"]
120
+ }
121
+ __targets_infos__ = {k: v for (k, v) in __targets_infos__.items() if v}
122
+
123
+ try:
124
+ __target__, __info__ = next(iter(__targets_infos__.items()))
125
+ __variant__ = __info__["variant"]
126
+ __version__ = __info__["version"]
127
+ except Exception:
128
+ __variant__: str = "INTERNAL"
129
+ __version__: str = "INTERNAL"
130
+ __target__: str = "INTERNAL"
131
+
132
+ fbgemm_gpu_libraries = [
133
+ "fbgemm_gpu_config",
134
+ "fbgemm_gpu_tbe_utils",
135
+ "fbgemm_gpu_tbe_index_select",
136
+ "fbgemm_gpu_tbe_cache",
137
+ "fbgemm_gpu_tbe_optimizers",
138
+ "fbgemm_gpu_tbe_inference",
139
+ "fbgemm_gpu_tbe_training_forward",
140
+ "fbgemm_gpu_tbe_training_backward",
141
+ "fbgemm_gpu_tbe_training_backward_pt2",
142
+ "fbgemm_gpu_tbe_training_backward_dense",
143
+ "fbgemm_gpu_tbe_training_backward_split_host",
144
+ "fbgemm_gpu_tbe_training_backward_gwd",
145
+ "fbgemm_gpu_tbe_training_backward_vbe",
146
+ "fbgemm_gpu_py",
147
+ ]
148
+
149
+ fbgemm_genai_libraries = [
150
+ "experimental/gen_ai/fbgemm_gpu_experimental_gen_ai",
151
+ ]
152
+
153
+ # NOTE: While FBGEMM_GPU GenAI is not available for ROCm yet, we would like to
154
+ # be able to install the existing CUDA variant of the package onto ROCm systems,
155
+ # so that we can at least use the Triton GEMM libraries from experimental/gemm.
156
+ # But loading fbgemm_gpu package will trigger load-checking the .SO file for the
157
+ # GenAI libraries, which will fail. This workaround ignores check-loading the
158
+ # .SO file for the ROCm case, so that clients can import
159
+ # fbgemm_gpu.experimental.gemm without triggering an error.
160
+ if torch.cuda.is_available() and torch.version.hip:
161
+ fbgemm_genai_libraries = []
162
+
163
+ libraries_to_load = {
164
+ "default": fbgemm_gpu_libraries,
165
+ "genai": fbgemm_genai_libraries,
166
+ }
167
+
168
+ for target, info in __targets_infos__.items():
169
+ for library in libraries_to_load.get(target, []):
170
+ # NOTE: In all cases, we want to throw an error if we cannot load the
171
+ # library. However, this appears to break the OSS documentation build,
172
+ # where the Python documentation doesn't show up in the generated docs.
173
+ #
174
+ # To work around this problem, we introduce a fake build variant called
175
+ # `docs` and we only throw a library load error when the variant is not
176
+ # `docs`. For more information, see:
177
+ #
178
+ # https://github.com/pytorch/FBGEMM/pull/3477
179
+ # https://github.com/pytorch/FBGEMM/pull/3717
180
+ _load_library(f"{library}.so", info["version"], info["variant"] == "docs")
181
+
182
+ try:
183
+ # Trigger meta operator registrations
184
+ from . import sparse_ops # noqa: F401, E402
185
+ except Exception:
186
+ pass
fbgemm_gpu/asmjit.so ADDED
Binary file
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # pyre-strict
9
+
10
+
11
+ from math import sqrt
12
+
13
+ import torch
14
+
15
+ from fbgemm_gpu.utils.loader import load_torch_module
16
+
17
+ try:
18
+ # pyre-ignore[21]
19
+ from fbgemm_gpu import open_source # noqa: F401
20
+ except Exception:
21
+ load_torch_module("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
22
+
23
+
24
+ def wrap_weight_to_parameter(weights: list[torch.Tensor]) -> list[torch.Tensor]:
25
+ for i, v in enumerate(weights):
26
+ if not isinstance(v, torch.nn.Parameter):
27
+ weights[i] = torch.nn.Parameter(v)
28
+ return weights
29
+
30
+
31
+ class BatchedUnaryEmbeddingBag(torch.nn.Module):
32
+ # pyre-fixme[3]: Return type must be annotated.
33
+ def __init__(self, num_tasks: int, hash_sizes: list[int], long_index: bool = False):
34
+ super().__init__()
35
+ self.num_tasks = num_tasks
36
+ self.hash_sizes = hash_sizes
37
+ # [N][sum(E)][1]
38
+ embedding_data = torch.randn(size=(num_tasks, sum(self.hash_sizes), 1))
39
+ self.weight = torch.nn.Parameter(embedding_data)
40
+ index_dtype = torch.int64 if long_index else torch.int32
41
+ table_offsets_tensor = torch.cat(
42
+ [
43
+ torch.tensor([0], dtype=index_dtype),
44
+ torch.cumsum(
45
+ torch.tensor(hash_sizes),
46
+ dim=0,
47
+ dtype=index_dtype,
48
+ ),
49
+ ]
50
+ )
51
+ self.register_buffer("table_offsets_tensor", table_offsets_tensor)
52
+ self.init_parameters()
53
+
54
+ # pyre-fixme[3]: Return type must be annotated.
55
+ def forward(self, offsets: torch.Tensor, input: torch.Tensor):
56
+ # output is [N][B][T]
57
+ return torch.ops.fbgemm.batched_unary_embeddings(
58
+ self.weight,
59
+ self.table_offsets_tensor,
60
+ offsets,
61
+ input,
62
+ )
63
+
64
+ @torch.jit.export
65
+ # pyre-fixme[3]: Return type must be annotated.
66
+ def split_embedding_weights(self):
67
+ embedding_weights = []
68
+ for n in range(self.num_tasks):
69
+ for t in range(len(self.hash_sizes)):
70
+ embedding_weights.append(
71
+ self.weight.detach()[
72
+ n,
73
+ self.table_offsets_tensor[t] : self.table_offsets_tensor[t + 1],
74
+ :,
75
+ ]
76
+ )
77
+ return embedding_weights
78
+
79
+ @torch.jit.export
80
+ # pyre-fixme[3]: Return type must be annotated.
81
+ def init_parameters(self):
82
+ for num_emb, param in zip(
83
+ self.hash_sizes * self.num_tasks,
84
+ wrap_weight_to_parameter(self.split_embedding_weights()),
85
+ ):
86
+ assert param.shape == (num_emb, 1)
87
+ param.data.uniform_(-sqrt(1 / num_emb), sqrt(1 / num_emb))
@@ -0,0 +1,9 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # pyre-strict
8
+
9
+ from .feature_list import FeatureGate, FeatureGateName # noqa F401
@@ -0,0 +1,88 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # pyre-strict
8
+
9
+ from enum import auto, Enum
10
+
11
+ import torch
12
+
13
+ try:
14
+ torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:config_cpp_torch_op")
15
+ except Exception:
16
+ import fbgemm_gpu # noqa F401
17
+
18
+
19
+ # Note: ENUM name must match EXACTLY with the JK knob name in the UI
20
+ class FeatureGateName(Enum):
21
+ """
22
+ FBGEMM_GPU feature gates enum (Python).
23
+
24
+ **Code Example:**
25
+
26
+ .. code-block:: python
27
+
28
+ from fbgemm_gpu.config import FeatureGateName
29
+
30
+ def foo():
31
+ if FeatureGateName.TBE_V2.is_enabled():
32
+
33
+ # Do something if feature is enabled
34
+ ...
35
+ else:
36
+ # Do something different if feature is disabled
37
+ ...
38
+
39
+ Note:
40
+ While not required, it is best to mirror the enum values in C++,
41
+ in `fbgemm_gpu/config/feature_gates.h`.
42
+
43
+ For fbcode: The ENUM name must match EXACTLY with the JK knob name in the UI
44
+ For OSS: The environment variable will be evaluated as f"FBGEMM_{ENUM}"
45
+
46
+ """
47
+
48
+ # Enable TBE V2 APIs
49
+ TBE_V2 = auto()
50
+
51
+ # Enable Ensemble Rowwise Adagrad (D60189486 stack)
52
+ TBE_ENSEMBLE_ROWWISE_ADAGRAD = auto()
53
+
54
+ # Enable ROCm packed bags optimization in TBE inference
55
+ TBE_ROCM_INFERENCE_PACKED_BAGS = auto()
56
+
57
+ # Enable HIP-based backward kernels in TBE training
58
+ TBE_ROCM_HIP_BACKWARD_KERNEL = auto()
59
+
60
+ # Enable bounds_check_indices_v2
61
+ BOUNDS_CHECK_INDICES_V2 = auto()
62
+
63
+ # Enable TBE input parameters extraction
64
+ TBE_REPORT_INPUT_PARAMS = auto()
65
+
66
+ def is_enabled(self) -> bool:
67
+ return FeatureGate.is_enabled(self)
68
+
69
+
70
+ class FeatureGate:
71
+ """
72
+ FBGEMM_GPU feature gate.
73
+
74
+ This class exists because methods defined on enums cannot be invoked when
75
+ the enum is packaged into a model (the mechanism is unclear).
76
+
77
+ **Code Example:**
78
+
79
+ .. code-block:: python
80
+
81
+ from deeplearning.fbgemm.fbgemm_gpu.config import FeatureGate, FeatureGateName
82
+
83
+ FeatureGate.is_enabled(FeatureGateName.TBE_V2)
84
+ """
85
+
86
+ @classmethod
87
+ def is_enabled(cls, feature: FeatureGateName) -> bool:
88
+ return torch.ops.fbgemm.check_feature_gate_key(feature.name)
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # Trigger the manual addition of docstrings to pybind11-generated operators
9
+ try:
10
+ from . import ( # noqa: F401
11
+ jagged_tensor_ops,
12
+ merge_pooled_embedding_ops,
13
+ permute_pooled_embedding_ops,
14
+ quantize_ops,
15
+ sparse_ops,
16
+ )
17
+ except Exception:
18
+ pass
@@ -0,0 +1,9 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+
8
+ def add_docs(method, docstr: str):
9
+ method.__doc__ = docstr
@@ -0,0 +1,73 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from ctypes import c_size_t
8
+
9
+
10
+ # [fbgemm-gpu.python.docs.examples.docstring.start]
11
+ def example_method(alignment: c_size_t, param: float) -> int:
12
+ """
13
+ This class is an example of how you can write docstrings.
14
+ You can add multiple lines of those descriptions. Make sure to include
15
+ useful information about your method.
16
+
17
+ **Code Example:**
18
+
19
+ .. code-block:: cpp
20
+
21
+ // Here is a C++ code block
22
+ std::vector<int32_t> foo(const std::vector<int32_t> &lst) {
23
+ std::vector<int32_t> ret;
24
+ for (const auto x : lst) {
25
+ ret.emplace_back(x * 2);
26
+ }
27
+ return ret;
28
+ }
29
+
30
+ And here is a verbatim-text diagram example:
31
+
32
+ .. code-block:: text
33
+
34
+ .------+---------------------------------.-----------------------------
35
+ | Block A (first) | Block B (second)
36
+ +------+------+--------------------------+------+------+---------------
37
+ | Next | Prev | usable space | Next | Prev | usable space..
38
+ +------+------+--------------------------+------+--+---+---------------
39
+ ^ | ^ |
40
+ | '-------------------------------------' |
41
+ | |
42
+ '----------- Block B's prev points to Block A -----'
43
+
44
+ Todo:
45
+ * This is a TODO item.
46
+ * And a second TODO item.
47
+
48
+ Args:
49
+ alignment (c_size_t): Description of the `alignment` value.
50
+ param (float): Description of `param1`.
51
+
52
+ Returns:
53
+ Description of the method's return value.
54
+
55
+ Raises:
56
+ AttributeError: If there is an error with the attributes.
57
+ ValueError: If `param` is equal to 3.14.
58
+
59
+ Example:
60
+ This is how you can use this function
61
+
62
+ >>> print("Code blocks are supported")
63
+
64
+ Note:
65
+ For more info on reStructuredText docstrings, see
66
+ `here <https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html>`__
67
+ and
68
+ `here <https://peps.python.org/pep-0287/>`__.
69
+ """
70
+ return 42
71
+
72
+
73
+ # [fbgemm-gpu.python.docs.examples.docstring.end]