fbgemm-gpu-genai-nightly 2025.9.20__cp313-cp313-manylinux_2_28_x86_64.whl → 2025.11.4__cp313-cp313-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. fbgemm_gpu/__init__.py +36 -18
  2. fbgemm_gpu/asmjit.so +0 -0
  3. fbgemm_gpu/batched_unary_embeddings_ops.py +2 -3
  4. fbgemm_gpu/config/feature_list.py +1 -1
  5. fbgemm_gpu/docs/target.genai.json.py +6 -0
  6. fbgemm_gpu/enums.py +3 -4
  7. fbgemm_gpu/experimental/example/__init__.py +0 -4
  8. fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so +0 -0
  9. fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py +0 -4
  10. fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py +166 -45
  11. fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py +94 -276
  12. fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py +71 -2
  13. fbgemm_gpu/experimental/gen_ai/__init__.py +12 -4
  14. fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py +0 -4
  15. fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py +5 -5
  16. fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_interface.py +34 -8
  17. fbgemm_gpu/experimental/gen_ai/bench/__init__.py +0 -4
  18. fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py +1 -2
  19. fbgemm_gpu/experimental/gen_ai/bench/gather_scatter_bench.py +7 -7
  20. fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py +51 -19
  21. fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py +387 -9
  22. fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so +0 -0
  23. fbgemm_gpu/experimental/gen_ai/moe/__init__.py +0 -4
  24. fbgemm_gpu/experimental/gen_ai/moe/activation.py +2 -2
  25. fbgemm_gpu/experimental/gen_ai/moe/gather_scatter.py +2 -2
  26. fbgemm_gpu/experimental/gen_ai/moe/layers.py +6 -9
  27. fbgemm_gpu/experimental/gen_ai/moe/shuffling.py +3 -3
  28. fbgemm_gpu/experimental/gen_ai/quantize.py +6 -7
  29. fbgemm_gpu/fbgemm.so +0 -0
  30. fbgemm_gpu/permute_pooled_embedding_modules.py +4 -4
  31. fbgemm_gpu/permute_pooled_embedding_modules_split.py +4 -4
  32. fbgemm_gpu/quantize_comm.py +4 -4
  33. fbgemm_gpu/runtime_monitor.py +3 -3
  34. fbgemm_gpu/sll/cpu/cpu_sll.py +6 -6
  35. fbgemm_gpu/sll/triton/triton_jagged2_to_padded_dense.py +1 -2
  36. fbgemm_gpu/sll/triton/triton_jagged_dense_flash_attention.py +3 -4
  37. fbgemm_gpu/sll/triton/triton_jagged_flash_attention_basic.py +1 -2
  38. fbgemm_gpu/sll/triton/triton_multi_head_jagged_flash_attention.py +1 -2
  39. fbgemm_gpu/sparse_ops.py +55 -54
  40. fbgemm_gpu/split_embedding_configs.py +18 -18
  41. fbgemm_gpu/split_embedding_inference_converter.py +4 -4
  42. fbgemm_gpu/split_table_batched_embeddings_ops_common.py +50 -24
  43. fbgemm_gpu/split_table_batched_embeddings_ops_inference.py +37 -37
  44. fbgemm_gpu/split_table_batched_embeddings_ops_training.py +349 -101
  45. fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py +2 -2
  46. fbgemm_gpu/tbe/bench/bench_config.py +3 -3
  47. fbgemm_gpu/tbe/bench/bench_runs.py +13 -13
  48. fbgemm_gpu/tbe/bench/eeg_cli.py +2 -3
  49. fbgemm_gpu/tbe/bench/embedding_ops_common_config.py +2 -2
  50. fbgemm_gpu/tbe/bench/eval_compression.py +3 -3
  51. fbgemm_gpu/tbe/bench/tbe_data_config.py +6 -6
  52. fbgemm_gpu/tbe/bench/tbe_data_config_bench_helper.py +13 -13
  53. fbgemm_gpu/tbe/bench/tbe_data_config_param_models.py +8 -8
  54. fbgemm_gpu/tbe/cache/kv_embedding_ops_inference.py +21 -16
  55. fbgemm_gpu/tbe/cache/split_embeddings_cache_ops.py +4 -4
  56. fbgemm_gpu/tbe/ssd/inference.py +13 -13
  57. fbgemm_gpu/tbe/ssd/training.py +311 -89
  58. fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py +2 -2
  59. fbgemm_gpu/tbe/stats/bench_params_reporter.py +3 -3
  60. fbgemm_gpu/tbe/utils/offsets.py +3 -3
  61. fbgemm_gpu/tbe/utils/quantize.py +2 -2
  62. fbgemm_gpu/tbe/utils/requests.py +14 -14
  63. fbgemm_gpu/tbe_input_multiplexer.py +10 -10
  64. fbgemm_gpu/triton/jagged/triton_jagged_tensor_ops.py +11 -11
  65. fbgemm_gpu/utils/torch_library.py +2 -2
  66. {fbgemm_gpu_genai_nightly-2025.9.20.dist-info → fbgemm_gpu_genai_nightly-2025.11.4.dist-info}/METADATA +1 -1
  67. {fbgemm_gpu_genai_nightly-2025.9.20.dist-info → fbgemm_gpu_genai_nightly-2025.11.4.dist-info}/RECORD +70 -70
  68. list_versions/cli_run.py +5 -6
  69. fbgemm_gpu/docs/version.py +0 -11
  70. {fbgemm_gpu_genai_nightly-2025.9.20.dist-info → fbgemm_gpu_genai_nightly-2025.11.4.dist-info}/WHEEL +0 -0
  71. {fbgemm_gpu_genai_nightly-2025.9.20.dist-info → fbgemm_gpu_genai_nightly-2025.11.4.dist-info}/top_level.txt +0 -0
fbgemm_gpu/__init__.py CHANGED
@@ -5,6 +5,7 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ import json
8
9
  import logging
9
10
  import os
10
11
  import re
@@ -26,6 +27,19 @@ _fbgemm_torch_compat_table = {
26
27
  }
27
28
 
28
29
 
30
+ def _load_target_info(target: str) -> dict[str, str]:
31
+ try:
32
+ filepath = os.path.join(
33
+ os.path.dirname(__file__), "docs", f"target.{target}.json.py"
34
+ )
35
+ with open(filepath, "r") as file:
36
+ data = json.load(file)
37
+ except Exception:
38
+ data = {}
39
+
40
+ return data
41
+
42
+
29
43
  def _load_library(filename: str, version: str, no_throw: bool = False) -> None:
30
44
  """Load a shared library from the given filename."""
31
45
 
@@ -98,13 +112,16 @@ open_source: bool = True
98
112
  # Trigger the manual addition of docstrings to pybind11-generated operators
99
113
  import fbgemm_gpu.docs # noqa: F401, E402
100
114
 
115
+
116
+ __targets_infos__ = {
117
+ target: _load_target_info(target) for target in ["default", "genai", "hstu"]
118
+ }
119
+ __targets_infos__ = {k: v for (k, v) in __targets_infos__.items() if v}
120
+
101
121
  try:
102
- # Export the version string from the version file auto-generated by setup.py
103
- from fbgemm_gpu.docs.version import ( # noqa: F401, E402
104
- __target__,
105
- __variant__,
106
- __version__,
107
- )
122
+ __target__, __info__ = next(iter(__targets_infos__.items()))
123
+ __variant__ = __info__["variant"]
124
+ __version__ = __info__["version"]
108
125
  except Exception:
109
126
  __variant__: str = "INTERNAL"
110
127
  __version__: str = "INTERNAL"
@@ -145,18 +162,19 @@ libraries_to_load = {
145
162
  "genai": fbgemm_genai_libraries,
146
163
  }
147
164
 
148
- for library in libraries_to_load.get(__target__, []):
149
- # NOTE: In all cases, we want to throw an error if we cannot load the
150
- # library. However, this appears to break the OSS documentation build,
151
- # where the Python documentation doesn't show up in the generated docs.
152
- #
153
- # To work around this problem, we introduce a fake build variant called
154
- # `docs` and we only throw a library load error when the variant is not
155
- # `docs`. For more information, see:
156
- #
157
- # https://github.com/pytorch/FBGEMM/pull/3477
158
- # https://github.com/pytorch/FBGEMM/pull/3717
159
- _load_library(f"{library}.so", __version__, __variant__ == "docs")
165
+ for target, info in __targets_infos__.items():
166
+ for library in libraries_to_load.get(target, []):
167
+ # NOTE: In all cases, we want to throw an error if we cannot load the
168
+ # library. However, this appears to break the OSS documentation build,
169
+ # where the Python documentation doesn't show up in the generated docs.
170
+ #
171
+ # To work around this problem, we introduce a fake build variant called
172
+ # `docs` and we only throw a library load error when the variant is not
173
+ # `docs`. For more information, see:
174
+ #
175
+ # https://github.com/pytorch/FBGEMM/pull/3477
176
+ # https://github.com/pytorch/FBGEMM/pull/3717
177
+ _load_library(f"{library}.so", info["version"], info["variant"] == "docs")
160
178
 
161
179
  try:
162
180
  # Trigger meta operator registrations
fbgemm_gpu/asmjit.so CHANGED
Binary file
@@ -9,7 +9,6 @@
9
9
 
10
10
 
11
11
  from math import sqrt
12
- from typing import List
13
12
 
14
13
  import torch
15
14
 
@@ -22,7 +21,7 @@ except Exception:
22
21
  load_torch_module("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
23
22
 
24
23
 
25
- def wrap_weight_to_parameter(weights: List[torch.Tensor]) -> List[torch.Tensor]:
24
+ def wrap_weight_to_parameter(weights: list[torch.Tensor]) -> list[torch.Tensor]:
26
25
  for i, v in enumerate(weights):
27
26
  if not isinstance(v, torch.nn.Parameter):
28
27
  weights[i] = torch.nn.Parameter(v)
@@ -31,7 +30,7 @@ def wrap_weight_to_parameter(weights: List[torch.Tensor]) -> List[torch.Tensor]:
31
30
 
32
31
  class BatchedUnaryEmbeddingBag(torch.nn.Module):
33
32
  # pyre-fixme[3]: Return type must be annotated.
34
- def __init__(self, num_tasks: int, hash_sizes: List[int], long_index: bool = False):
33
+ def __init__(self, num_tasks: int, hash_sizes: list[int], long_index: bool = False):
35
34
  super().__init__()
36
35
  self.num_tasks = num_tasks
37
36
  self.hash_sizes = hash_sizes
@@ -11,7 +11,7 @@ from enum import auto, Enum
11
11
  import torch
12
12
 
13
13
  try:
14
- torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:config_cpp")
14
+ torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:config_cpp_torch_op")
15
15
  except Exception:
16
16
  import fbgemm_gpu # noqa F401
17
17
 
@@ -0,0 +1,6 @@
1
+
2
+ {
3
+ "version": "2025.11.4",
4
+ "target": "genai",
5
+ "variant": "cuda"
6
+ }
fbgemm_gpu/enums.py CHANGED
@@ -8,14 +8,13 @@
8
8
  # pyre-strict
9
9
 
10
10
  import enum
11
- import typing
12
- from typing import Any, Callable, List, Tuple
11
+ from typing import Any, Callable
13
12
 
14
13
 
15
14
  # Create enums in given namespace with information from query_op
16
15
  def create_enums(
17
- namespace: typing.Dict[str, Any],
18
- query_op: Callable[[], List[Tuple[str, List[Tuple[str, int]]]]],
16
+ namespace: dict[str, Any],
17
+ query_op: Callable[[], list[tuple[str, list[tuple[str, int]]]]],
19
18
  ) -> None:
20
19
  for enum_name, items in query_op():
21
20
  # Create matching python enumeration
@@ -15,10 +15,6 @@ try:
15
15
  # pyre-ignore[21]
16
16
  # @manual=//deeplearning/fbgemm/fbgemm_gpu:test_utils
17
17
  from fbgemm_gpu import open_source
18
-
19
- # pyre-ignore[21]
20
- # @manual=//deeplearning/fbgemm/fbgemm_gpu:test_utils
21
- from fbgemm_gpu.docs.version import __version__ # noqa: F401
22
18
  except Exception:
23
19
  open_source: bool = False
24
20
 
@@ -11,9 +11,5 @@ try:
11
11
  # pyre-ignore[21]
12
12
  # @manual=//deeplearning/fbgemm/fbgemm_gpu:test_utils
13
13
  from fbgemm_gpu import open_source
14
-
15
- # pyre-ignore[21]
16
- # @manual=//deeplearning/fbgemm/fbgemm_gpu:test_utils
17
- from fbgemm_gpu.docs.version import __version__ # noqa: F401
18
14
  except Exception:
19
15
  open_source: bool = False