fbgemm-gpu-genai-nightly 2025.12.19__cp310-cp310-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fbgemm-gpu-genai-nightly might be problematic. Click here for more details.

Files changed (127) hide show
  1. fbgemm_gpu/__init__.py +186 -0
  2. fbgemm_gpu/asmjit.so +0 -0
  3. fbgemm_gpu/batched_unary_embeddings_ops.py +87 -0
  4. fbgemm_gpu/config/__init__.py +9 -0
  5. fbgemm_gpu/config/feature_list.py +88 -0
  6. fbgemm_gpu/docs/__init__.py +18 -0
  7. fbgemm_gpu/docs/common.py +9 -0
  8. fbgemm_gpu/docs/examples.py +73 -0
  9. fbgemm_gpu/docs/jagged_tensor_ops.py +259 -0
  10. fbgemm_gpu/docs/merge_pooled_embedding_ops.py +36 -0
  11. fbgemm_gpu/docs/permute_pooled_embedding_ops.py +108 -0
  12. fbgemm_gpu/docs/quantize_ops.py +41 -0
  13. fbgemm_gpu/docs/sparse_ops.py +616 -0
  14. fbgemm_gpu/docs/target.genai.json.py +6 -0
  15. fbgemm_gpu/enums.py +24 -0
  16. fbgemm_gpu/experimental/example/__init__.py +29 -0
  17. fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so +0 -0
  18. fbgemm_gpu/experimental/example/utils.py +20 -0
  19. fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py +15 -0
  20. fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py +5654 -0
  21. fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py +4422 -0
  22. fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py +1192 -0
  23. fbgemm_gpu/experimental/gemm/triton_gemm/matmul_perf_model.py +232 -0
  24. fbgemm_gpu/experimental/gemm/triton_gemm/utils.py +130 -0
  25. fbgemm_gpu/experimental/gen_ai/__init__.py +56 -0
  26. fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py +46 -0
  27. fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py +333 -0
  28. fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_interface.py +552 -0
  29. fbgemm_gpu/experimental/gen_ai/bench/__init__.py +13 -0
  30. fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py +257 -0
  31. fbgemm_gpu/experimental/gen_ai/bench/gather_scatter_bench.py +348 -0
  32. fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py +707 -0
  33. fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py +3483 -0
  34. fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so +0 -0
  35. fbgemm_gpu/experimental/gen_ai/moe/README.md +15 -0
  36. fbgemm_gpu/experimental/gen_ai/moe/__init__.py +66 -0
  37. fbgemm_gpu/experimental/gen_ai/moe/activation.py +292 -0
  38. fbgemm_gpu/experimental/gen_ai/moe/gather_scatter.py +740 -0
  39. fbgemm_gpu/experimental/gen_ai/moe/layers.py +1272 -0
  40. fbgemm_gpu/experimental/gen_ai/moe/shuffling.py +421 -0
  41. fbgemm_gpu/experimental/gen_ai/quantize.py +307 -0
  42. fbgemm_gpu/fbgemm.so +0 -0
  43. fbgemm_gpu/metrics.py +160 -0
  44. fbgemm_gpu/permute_pooled_embedding_modules.py +142 -0
  45. fbgemm_gpu/permute_pooled_embedding_modules_split.py +85 -0
  46. fbgemm_gpu/quantize/__init__.py +43 -0
  47. fbgemm_gpu/quantize/quantize_ops.py +64 -0
  48. fbgemm_gpu/quantize_comm.py +315 -0
  49. fbgemm_gpu/quantize_utils.py +246 -0
  50. fbgemm_gpu/runtime_monitor.py +237 -0
  51. fbgemm_gpu/sll/__init__.py +189 -0
  52. fbgemm_gpu/sll/cpu/__init__.py +80 -0
  53. fbgemm_gpu/sll/cpu/cpu_sll.py +1001 -0
  54. fbgemm_gpu/sll/meta/__init__.py +35 -0
  55. fbgemm_gpu/sll/meta/meta_sll.py +337 -0
  56. fbgemm_gpu/sll/triton/__init__.py +127 -0
  57. fbgemm_gpu/sll/triton/common.py +38 -0
  58. fbgemm_gpu/sll/triton/triton_dense_jagged_cat_jagged_out.py +72 -0
  59. fbgemm_gpu/sll/triton/triton_jagged2_to_padded_dense.py +221 -0
  60. fbgemm_gpu/sll/triton/triton_jagged_bmm.py +418 -0
  61. fbgemm_gpu/sll/triton/triton_jagged_bmm_jagged_out.py +553 -0
  62. fbgemm_gpu/sll/triton/triton_jagged_dense_elementwise_add.py +52 -0
  63. fbgemm_gpu/sll/triton/triton_jagged_dense_elementwise_mul_jagged_out.py +175 -0
  64. fbgemm_gpu/sll/triton/triton_jagged_dense_flash_attention.py +861 -0
  65. fbgemm_gpu/sll/triton/triton_jagged_flash_attention_basic.py +667 -0
  66. fbgemm_gpu/sll/triton/triton_jagged_self_substraction_jagged_out.py +73 -0
  67. fbgemm_gpu/sll/triton/triton_jagged_softmax.py +463 -0
  68. fbgemm_gpu/sll/triton/triton_multi_head_jagged_flash_attention.py +751 -0
  69. fbgemm_gpu/sparse_ops.py +1455 -0
  70. fbgemm_gpu/split_embedding_configs.py +452 -0
  71. fbgemm_gpu/split_embedding_inference_converter.py +175 -0
  72. fbgemm_gpu/split_embedding_optimizer_ops.py +21 -0
  73. fbgemm_gpu/split_embedding_utils.py +29 -0
  74. fbgemm_gpu/split_table_batched_embeddings_ops.py +73 -0
  75. fbgemm_gpu/split_table_batched_embeddings_ops_common.py +484 -0
  76. fbgemm_gpu/split_table_batched_embeddings_ops_inference.py +2042 -0
  77. fbgemm_gpu/split_table_batched_embeddings_ops_training.py +4600 -0
  78. fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py +146 -0
  79. fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py +26 -0
  80. fbgemm_gpu/tbe/__init__.py +6 -0
  81. fbgemm_gpu/tbe/bench/__init__.py +55 -0
  82. fbgemm_gpu/tbe/bench/bench_config.py +156 -0
  83. fbgemm_gpu/tbe/bench/bench_runs.py +709 -0
  84. fbgemm_gpu/tbe/bench/benchmark_click_interface.py +187 -0
  85. fbgemm_gpu/tbe/bench/eeg_cli.py +137 -0
  86. fbgemm_gpu/tbe/bench/embedding_ops_common_config.py +149 -0
  87. fbgemm_gpu/tbe/bench/eval_compression.py +119 -0
  88. fbgemm_gpu/tbe/bench/reporter.py +35 -0
  89. fbgemm_gpu/tbe/bench/tbe_data_config.py +137 -0
  90. fbgemm_gpu/tbe/bench/tbe_data_config_bench_helper.py +323 -0
  91. fbgemm_gpu/tbe/bench/tbe_data_config_loader.py +289 -0
  92. fbgemm_gpu/tbe/bench/tbe_data_config_param_models.py +170 -0
  93. fbgemm_gpu/tbe/bench/utils.py +48 -0
  94. fbgemm_gpu/tbe/cache/__init__.py +11 -0
  95. fbgemm_gpu/tbe/cache/kv_embedding_ops_inference.py +385 -0
  96. fbgemm_gpu/tbe/cache/split_embeddings_cache_ops.py +48 -0
  97. fbgemm_gpu/tbe/ssd/__init__.py +15 -0
  98. fbgemm_gpu/tbe/ssd/common.py +46 -0
  99. fbgemm_gpu/tbe/ssd/inference.py +586 -0
  100. fbgemm_gpu/tbe/ssd/training.py +4908 -0
  101. fbgemm_gpu/tbe/ssd/utils/__init__.py +7 -0
  102. fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py +273 -0
  103. fbgemm_gpu/tbe/stats/__init__.py +10 -0
  104. fbgemm_gpu/tbe/stats/bench_params_reporter.py +339 -0
  105. fbgemm_gpu/tbe/utils/__init__.py +13 -0
  106. fbgemm_gpu/tbe/utils/common.py +42 -0
  107. fbgemm_gpu/tbe/utils/offsets.py +65 -0
  108. fbgemm_gpu/tbe/utils/quantize.py +251 -0
  109. fbgemm_gpu/tbe/utils/requests.py +556 -0
  110. fbgemm_gpu/tbe_input_multiplexer.py +108 -0
  111. fbgemm_gpu/triton/__init__.py +22 -0
  112. fbgemm_gpu/triton/common.py +77 -0
  113. fbgemm_gpu/triton/jagged/__init__.py +8 -0
  114. fbgemm_gpu/triton/jagged/triton_jagged_tensor_ops.py +824 -0
  115. fbgemm_gpu/triton/quantize.py +647 -0
  116. fbgemm_gpu/triton/quantize_ref.py +286 -0
  117. fbgemm_gpu/utils/__init__.py +11 -0
  118. fbgemm_gpu/utils/filestore.py +211 -0
  119. fbgemm_gpu/utils/loader.py +36 -0
  120. fbgemm_gpu/utils/torch_library.py +132 -0
  121. fbgemm_gpu/uvm.py +40 -0
  122. fbgemm_gpu_genai_nightly-2025.12.19.dist-info/METADATA +62 -0
  123. fbgemm_gpu_genai_nightly-2025.12.19.dist-info/RECORD +127 -0
  124. fbgemm_gpu_genai_nightly-2025.12.19.dist-info/WHEEL +5 -0
  125. fbgemm_gpu_genai_nightly-2025.12.19.dist-info/top_level.txt +2 -0
  126. list_versions/__init__.py +12 -0
  127. list_versions/cli_run.py +163 -0
fbgemm_gpu/uvm.py ADDED
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # pyre-strict
9
+
10
+ from enum import Enum
11
+ from typing import Optional
12
+
13
+ import torch
14
+
15
+ from fbgemm_gpu.enums import create_enums
16
+
17
+ try:
18
+ # pyre-ignore[21]
19
+ from fbgemm_gpu import open_source # noqa: F401
20
+ except Exception:
21
+ torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:cumem_utils")
22
+
23
+ # Import all uvm enums from c++ library
24
+ # pyre-fixme[6]: For 2nd argument expected `() -> List[Tuple[str, List[Tuple[str,
25
+ # int]]]]` but got `OpOverloadPacket`.
26
+ create_enums(globals(), torch.ops.fbgemm.fbgemm_gpu_uvm_enum_query)
27
+
28
+
29
+ def cudaMemAdvise(
30
+ t: torch.Tensor,
31
+ advice: Enum,
32
+ ) -> None:
33
+ torch.ops.fbgemm.cuda_mem_advise(t, advice.value)
34
+
35
+
36
+ def cudaMemPrefetchAsync(
37
+ t: torch.Tensor,
38
+ device_t: Optional[torch.Tensor] = None,
39
+ ) -> None:
40
+ torch.ops.fbgemm.cuda_mem_prefetch_async(t, device_t)
@@ -0,0 +1,62 @@
1
+ Metadata-Version: 2.4
2
+ Name: fbgemm_gpu_genai_nightly
3
+ Version: 2025.12.19
4
+ Home-page: https://github.com/pytorch/fbgemm
5
+ Author: FBGEMM Team
6
+ Author-email: packages@pytorch.org
7
+ License: BSD-3
8
+ Keywords: PyTorch,Recommendation Models,High Performance Computing,GPU,CUDA
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: BSD License
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Programming Language :: Python :: 3.14
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: numpy
22
+ Dynamic: author
23
+ Dynamic: author-email
24
+ Dynamic: classifier
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: keywords
29
+ Dynamic: license
30
+ Dynamic: requires-dist
31
+
32
+ # FBGEMM_GPU
33
+
34
+ [![FBGEMM_GPU-CPU CI](https://github.com/pytorch/FBGEMM/actions/workflows/fbgemm_gpu_ci_cpu.yml/badge.svg)](https://github.com/pytorch/FBGEMM/actions/workflows/fbgemm_gpu_ci_cpu.yml)
35
+ [![FBGEMM_GPU-CUDA CI](https://github.com/pytorch/FBGEMM/actions/workflows/fbgemm_gpu_ci_cuda.yml/badge.svg)](https://github.com/pytorch/FBGEMM/actions/workflows/fbgemm_gpu_ci_cuda.yml)
36
+ [![FBGEMM_GPU-ROCm CI](https://github.com/pytorch/FBGEMM/actions/workflows/fbgemm_gpu_ci_rocm.yml/badge.svg)](https://github.com/pytorch/FBGEMM/actions/workflows/fbgemm_gpu_ci_rocm.yml)
37
+
38
+ FBGEMM_GPU (FBGEMM GPU Kernels Library) is a collection of high-performance
39
+ PyTorch GPU operator libraries for training and inference. The library provides
40
+ efficient table batched embedding bag, data layout transformation, and
41
+ quantization supports.
42
+
43
+ See the full [Documentation](https://pytorch.org/FBGEMM) for more information
44
+ on building, installing, and developing with FBGEMM_GPU, as well as the most
45
+ up-to-date support matrix for this library.
46
+
47
+
48
+ ## Join the FBGEMM_GPU Community
49
+
50
+ For questions, support, news updates, or feature requests, please feel free to:
51
+
52
+ * File a ticket in [GitHub Issues](https://github.com/pytorch/FBGEMM/issues)
53
+ * Post a discussion in [GitHub Discussions](https://github.com/pytorch/FBGEMM/discussions)
54
+ * Reach out to us on the `#fbgemm` channel in [PyTorch Slack](https://bit.ly/ptslack)
55
+
56
+ For contributions, please see the [`CONTRIBUTING`](../CONTRIBUTING.md) file for
57
+ ways to help out.
58
+
59
+
60
+ ## License
61
+
62
+ FBGEMM_GPU is BSD licensed, as found in the [`LICENSE`](../LICENSE) file.
@@ -0,0 +1,127 @@
1
+ fbgemm_gpu/__init__.py,sha256=bL2dL7uYeXb1GvdjIDUTcLXLRGNfmnI4MQoE3-Gg5m8,6361
2
+ fbgemm_gpu/asmjit.so,sha256=s2majpI_vsFgc4Vh8szCK4xFYXyT5A0K6HwepGBwrqQ,501728
3
+ fbgemm_gpu/batched_unary_embeddings_ops.py,sha256=GYeJ9pg-Wc9FokXVci_npDsL6UV18-pJXID2xzrJ9O8,2904
4
+ fbgemm_gpu/enums.py,sha256=37ewGSfO1x7sO31ZkRiqV1yKuklfHXT5qZIxzeeGogo,755
5
+ fbgemm_gpu/fbgemm.so,sha256=U864UANx-CVyFYk5ADawCd0uWRfntHaVcyl6AVty_3Q,5642616
6
+ fbgemm_gpu/metrics.py,sha256=TsurFLJf0nJvPDN7urWb4LMQlf5RgdWPTTTDO7S4wtI,5663
7
+ fbgemm_gpu/permute_pooled_embedding_modules.py,sha256=vOXMYclaGnwSt0St_SOAlAe18kz6WjMyTeHnC9jLhcE,5130
8
+ fbgemm_gpu/permute_pooled_embedding_modules_split.py,sha256=f3VJvH_kw9Ltd_DXtaf_PJPHmlmEWrQgzQ7MDkhh5Nw,2746
9
+ fbgemm_gpu/quantize_comm.py,sha256=ZfXtRHfqpVpV6k2PDL6oTUkKYzopqAV2M6vavp_RLSM,12022
10
+ fbgemm_gpu/quantize_utils.py,sha256=q8Aokk6nlHbXF6HcDBbhBCAGSZV4klM8uPF-MUFFtAw,8324
11
+ fbgemm_gpu/runtime_monitor.py,sha256=YXRUv6nXCsoTgh5_RzailTGvCYzwoYDb-eR4rlGwtaw,7619
12
+ fbgemm_gpu/sparse_ops.py,sha256=_EJC1pAbNnAnVQQ5JBg4DAV2TboIj-4XQkiKMmg1vXI,50417
13
+ fbgemm_gpu/split_embedding_configs.py,sha256=fv29efZGD_cvh5KwdvTFD6GZtqJLYjWXW_0vMeyT_6k,15483
14
+ fbgemm_gpu/split_embedding_inference_converter.py,sha256=AghGW22MgMsdHzdwdPMPYDjgas5AE_estckY8rMgXVU,7056
15
+ fbgemm_gpu/split_embedding_optimizer_ops.py,sha256=wXuGazClBMk62yL_r9udUIKaPgQP7SlkSb5ugB75wrQ,711
16
+ fbgemm_gpu/split_embedding_utils.py,sha256=Gb40ZKeATxIKEKI3aVQMgDDBanNpKMc53Z43mnzdR_I,851
17
+ fbgemm_gpu/split_table_batched_embeddings_ops.py,sha256=_MIp6uHYHLn4GxGdrGsfddfSsZ2Z9mjsYIrih3ncI1I,2339
18
+ fbgemm_gpu/split_table_batched_embeddings_ops_common.py,sha256=eFxb_bDfBV8G76pmd-SxDXXXnqgbuGYOS4pSU8JS5dg,19295
19
+ fbgemm_gpu/split_table_batched_embeddings_ops_inference.py,sha256=dGC85xjQiRUrequBibSf9oMAVHT5Q49zsVo2zW4n_88,81679
20
+ fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=D72laY5iFC3_6f_qHnPMizDDxwI0QW7-21RyY0ZikK4,187705
21
+ fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py,sha256=e3O9ElaWBGvG7TdT3Ok_8cB06jhskXuyCQ0t40dzsEY,5449
22
+ fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py,sha256=7qGkO8FARku38mFYl4Bc4qL8dS1wrfyorS9l1m5ZAVA,718
23
+ fbgemm_gpu/tbe_input_multiplexer.py,sha256=TQjwkJ2JkOaQsMYuRdk9RbNa9759EPEtx8bYclChtZY,3063
24
+ fbgemm_gpu/uvm.py,sha256=guNK8ZzR80jmv-CyRgEhxhVYhjz3R9d6tB8Hu1uWDUo,1047
25
+ fbgemm_gpu/config/__init__.py,sha256=yN0KAneCICgF2BTfOYGsd0qU1PvZX_6msC6YHHZKLMg,292
26
+ fbgemm_gpu/config/feature_list.py,sha256=iDOGr9nwTqUhWsqOefRIqIo1jwLSeSII4jGnLeU01kg,2359
27
+ fbgemm_gpu/docs/__init__.py,sha256=DR6hMSQrsZALfH2AnuJQ4Zq2CfBUUhMN8YjD6APjiAE,523
28
+ fbgemm_gpu/docs/common.py,sha256=8ipXTwVb222X-aZ71O6n8fhxHCHPNhJEHMFiO7epcIs,273
29
+ fbgemm_gpu/docs/examples.py,sha256=ZMN_6sL74LH_hrp2bF_hmg8gi29GhcgvwV3kCMjxkoE,2377
30
+ fbgemm_gpu/docs/jagged_tensor_ops.py,sha256=Bsx-ZxvvdMv5CaldSvuw9GPR-HRcLbRR2IEXCOCm9r0,7381
31
+ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65_3E8xSJaWSj_Jbo8,1102
32
+ fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
33
+ fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
34
+ fbgemm_gpu/docs/sparse_ops.py,sha256=gSLUFdnu8lle_6gLewFkM20wL3ek2jKLvDGMKR6POaY,27292
35
+ fbgemm_gpu/docs/target.genai.json.py,sha256=JuJdpCsTce8VdugbwGqbb-C4cE7fulf58ZhJxZjxjaI,79
36
+ fbgemm_gpu/experimental/example/__init__.py,sha256=OvJHZgWnycL1gWKyCXFJCTKuys3KAqx4iadjx3R-tBQ,723
37
+ fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=9kTOBz3_72tWokyxEixPdgiNRuhEBcQ2LMbKzfAaq-o,190656
38
+ fbgemm_gpu/experimental/example/utils.py,sha256=Je__VkMlBMLOhh7NXOocOdvaa2gz9kl9Dkqeu25tpFA,562
39
+ fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py,sha256=1CqUfzlYyXTvU-BNaUq4RZpLV-2lKAVCAHeJzSIZFWw,419
40
+ fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py,sha256=R4VNZdPSgmRmwDfTt2CShED2SGUF6dCXSUW2C4LISgE,215713
41
+ fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py,sha256=KrI-wZeIf4AqcjXo5XoxAUWzOeM5MHTvhKBKzbQ-Hc0,153178
42
+ fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py,sha256=5ClZ-GDrx6q0uaqWOOmKGVANBQfAd1KFBt0LneFeZDY,42364
43
+ fbgemm_gpu/experimental/gemm/triton_gemm/matmul_perf_model.py,sha256=SltbY_dsit5e7B8lDIB_VYPrEq0t9kckthj9mQaVNfA,7571
44
+ fbgemm_gpu/experimental/gemm/triton_gemm/utils.py,sha256=rULXIpVaaRS3GKUZ1RHcWUrUyy0xMVREwS1SFShGgcw,4302
45
+ fbgemm_gpu/experimental/gen_ai/__init__.py,sha256=r3NlNCXuIh0pfKwKU5v14y6AZkpoIkKWbtzxSprgeKA,1713
46
+ fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=OcjMKexbtQFgnhsPFuAGe6m_gi1nJf4a2UZ4zAd7QFo,65238760
47
+ fbgemm_gpu/experimental/gen_ai/quantize.py,sha256=KAljWSdN-1_c5DWfT-3MDxWLMULK49Yu36t6TmQI9Tw,12599
48
+ fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py,sha256=-R_LxyHpdXMILU9TNuYoRisBCkfK0_VLyixefaeZf4g,1463
49
+ fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py,sha256=gbhNU3mDTKJb3yt3inIDbiUjX_SG1oZfzgDygtHvMpk,10101
50
+ fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_interface.py,sha256=fD39_WH7TfNCiP5Vl46ToX6PsLMLUFLhizT26Qe7TWg,17282
51
+ fbgemm_gpu/experimental/gen_ai/bench/__init__.py,sha256=XpAK_eyqDSKeFC5J9KpnKtbZG07mrDh9d2j1LFKzr-8,404
52
+ fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py,sha256=ApEyJOf_rdIo8V_EgvhZXBGNov8ITC_dnB95v8szulI,8515
53
+ fbgemm_gpu/experimental/gen_ai/bench/gather_scatter_bench.py,sha256=K9Nib6D7xJbw1QwEVuCJrVyI1qs988moo3cieVKYuFY,12057
54
+ fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py,sha256=BWl6t-4acbuRSEX2aVNDlFrSWZkqMWK2sI3VONaMd3Q,24047
55
+ fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py,sha256=Kq4zSfxrzmSL75RWWdhPSTWq3AxClu_RO3onn5vzx8s,104983
56
+ fbgemm_gpu/experimental/gen_ai/moe/README.md,sha256=z9ybHmv4KFJ1drj5OByuFaOY0tRQwwiIW3Q22TB_2-k,904
57
+ fbgemm_gpu/experimental/gen_ai/moe/__init__.py,sha256=lwSvff07yEav024B1XyfgW8r8hwNe--aEDywcO7rnbM,1905
58
+ fbgemm_gpu/experimental/gen_ai/moe/activation.py,sha256=NiXhWyCNagI3P9N3N89iSX7xKuShdkq9DxEUAzoV6y0,7892
59
+ fbgemm_gpu/experimental/gen_ai/moe/gather_scatter.py,sha256=8inrE4dkpfO9NFkrmXyXOCM262LMcTA3SQldxPoosT8,21044
60
+ fbgemm_gpu/experimental/gen_ai/moe/layers.py,sha256=QLwoKjyYUHT5vXAvp_maRSxyruwGXaNURgtW8ataVyg,42693
61
+ fbgemm_gpu/experimental/gen_ai/moe/shuffling.py,sha256=VDGEUdLZyj6mblJkAIReLICxU5BGnvmUjgZDP0VVqt8,11077
62
+ fbgemm_gpu/quantize/__init__.py,sha256=pftciXHE7csekDFkl7Ui1AWglVMMnSrOO04mREnUdb0,921
63
+ fbgemm_gpu/quantize/quantize_ops.py,sha256=25AIOv9n2UoxamMUaI6EK1Ur4gSHxbZIReHBtgOjjCs,2228
64
+ fbgemm_gpu/sll/__init__.py,sha256=rgXh35-OFUE54E9gGBq3NGxouGvgMv2ccY2bWUTxONY,4191
65
+ fbgemm_gpu/sll/cpu/__init__.py,sha256=glsukNpXtf47VRIdBktILD-4CmVcf4621SGB55lT_ho,2692
66
+ fbgemm_gpu/sll/cpu/cpu_sll.py,sha256=3zRsDZKCFPly1EZWl4LNB3ABJVy4JM4RVwmDuUeJZzc,27870
67
+ fbgemm_gpu/sll/meta/__init__.py,sha256=2sMcD67XGsweBZ-UV2AEJmM4ELPsHeRAYED6kqfgAd4,1077
68
+ fbgemm_gpu/sll/meta/meta_sll.py,sha256=Jk14EOW9VPFwawD7Bwky0R0A5rmbcLWMo52oH8J6Koc,8305
69
+ fbgemm_gpu/sll/triton/__init__.py,sha256=dW_cEW0R8635sKLozsL88SP0Cch5QnBGvfnAmoqWMic,4109
70
+ fbgemm_gpu/sll/triton/common.py,sha256=hISlX4Y-7FtGof-Xx4_B8-2vlF27F9t4p2qyLMUnJ8A,798
71
+ fbgemm_gpu/sll/triton/triton_dense_jagged_cat_jagged_out.py,sha256=J9qOqjNJ72LUBqs-pGI9wrFzzzBpsZ5fzYjgfKc2YhY,1885
72
+ fbgemm_gpu/sll/triton/triton_jagged2_to_padded_dense.py,sha256=M_AMJfW9D67xa4ezhmBViKsrt_n9EiX-Ki_drI5K3Bo,5925
73
+ fbgemm_gpu/sll/triton/triton_jagged_bmm.py,sha256=QFhaIQc8g-TRHr7wjm-Wd-atNJS1fDDkImHXXB3v-gU,11789
74
+ fbgemm_gpu/sll/triton/triton_jagged_bmm_jagged_out.py,sha256=hccLxsKoSZKiWid5P_yl-IVdBSXw1Rt0WeiRsjLD2Iw,13864
75
+ fbgemm_gpu/sll/triton/triton_jagged_dense_elementwise_add.py,sha256=_0hke_aaAdKQJpGUYX20NLss1_cXDIKxqblX4QQb7Io,1592
76
+ fbgemm_gpu/sll/triton/triton_jagged_dense_elementwise_mul_jagged_out.py,sha256=9R7BOOe8SJiko1PgbiuHlFyPKtGaaCFSlZ1RaEQyICE,4198
77
+ fbgemm_gpu/sll/triton/triton_jagged_dense_flash_attention.py,sha256=nebxJ7-1muDn-1oEuE46NbYbr6BcsPcuTOsQ49nCchI,22783
78
+ fbgemm_gpu/sll/triton/triton_jagged_flash_attention_basic.py,sha256=po9Nx4uAGVu_YIZ9CWvrmzSwxDsnDuNAtnk9VR7-Ems,17750
79
+ fbgemm_gpu/sll/triton/triton_jagged_self_substraction_jagged_out.py,sha256=VaOIxQn4Obvna2Co5VNDGILCDfKuYwkhVxK2oCi5mPI,1754
80
+ fbgemm_gpu/sll/triton/triton_jagged_softmax.py,sha256=odN66XGPc5VWmMZ34FRBsodpUtbpEILDpOgPtpCNrY4,14225
81
+ fbgemm_gpu/sll/triton/triton_multi_head_jagged_flash_attention.py,sha256=nEo5I-bba1XlG59qoACGB18OrA1LISs-e7Lasgys1s8,19572
82
+ fbgemm_gpu/tbe/__init__.py,sha256=fE0IHi1JJpxsNVBNzWNee2thrNXFFRhY94c80RxNSIE,231
83
+ fbgemm_gpu/tbe/bench/__init__.py,sha256=wgPBmxtQMmbA39cbQ2nO4PGAk5lXjFGjG8-9FoAXg34,1589
84
+ fbgemm_gpu/tbe/bench/bench_config.py,sha256=xgtlGLCeZVW6jBYwkKsiQeCslCrWDgJbV2NLLwCRSn4,5452
85
+ fbgemm_gpu/tbe/bench/bench_runs.py,sha256=vCblxjwvpzZ5oBxd6Z9fYy2KYmI--ySYlqRw_PLPX3k,23507
86
+ fbgemm_gpu/tbe/bench/benchmark_click_interface.py,sha256=Ey-3Rx4jfzam4QnYs-pNIe-UJvgmoeeM0zZ4C5j5ZuU,6891
87
+ fbgemm_gpu/tbe/bench/eeg_cli.py,sha256=DuF0pjy1wjrGaqsf1Bo9IP_q5nNx237cv9j80pG5aCk,3569
88
+ fbgemm_gpu/tbe/bench/embedding_ops_common_config.py,sha256=CXwupJIhtDQiOedqSYhJyXbiMOikML5torrXb5hqt2Y,4967
89
+ fbgemm_gpu/tbe/bench/eval_compression.py,sha256=ulFMaNZF2g_vfkXLWZSh02ibotg1zpTz3swVU484mzU,3486
90
+ fbgemm_gpu/tbe/bench/reporter.py,sha256=ZK5RFolUmZEcsEaife270_iOdXAQD5EjTUkuxctnAbY,804
91
+ fbgemm_gpu/tbe/bench/tbe_data_config.py,sha256=M0lK6m3S7Kl34prQcC3z8POr93FgX1oEUZ6MdVXZq5M,4794
92
+ fbgemm_gpu/tbe/bench/tbe_data_config_bench_helper.py,sha256=tgNB_3qWqWpjR86BhgRSU74bdW_ilRjtG61Cxmy1_Vk,10923
93
+ fbgemm_gpu/tbe/bench/tbe_data_config_loader.py,sha256=MNddYzoRlu0mNhnsVVG57JN7pBAepfaRL7UCEzS2KoI,10007
94
+ fbgemm_gpu/tbe/bench/tbe_data_config_param_models.py,sha256=sptdqcNE9JlgyIJ17neZaMxagKG469_ynX0mVx_JKBY,6090
95
+ fbgemm_gpu/tbe/bench/utils.py,sha256=cq_6FJHlgZ5femAK6XKpj7nJ9jc03qXI16N1ht1CcLg,1721
96
+ fbgemm_gpu/tbe/cache/__init__.py,sha256=lrYwhvqX2eWN0vAPe89HYgMW_O1vccoOcoFHJ9cyM-s,398
97
+ fbgemm_gpu/tbe/cache/kv_embedding_ops_inference.py,sha256=VmG9EennGcq2By8Tj8VkFsJG0oOCGw8EhlPo8-t--Fk,14604
98
+ fbgemm_gpu/tbe/cache/split_embeddings_cache_ops.py,sha256=vZHj7KIe1DoJDy5eft29XtGg6I-tRx60tjKOcTHRAYI,1321
99
+ fbgemm_gpu/tbe/ssd/__init__.py,sha256=wzfMT10cp_dqK2lrebC449hOdexBnizcf_98lA1NyHs,483
100
+ fbgemm_gpu/tbe/ssd/common.py,sha256=1J8K7sTQswgCYWaVwF-ZdCJj7mNN6O9GI70AaZWzJGE,1044
101
+ fbgemm_gpu/tbe/ssd/inference.py,sha256=B_uX66ajGA9YKGlFa5TmGWs7b-b1RFigzwxmENZ9Oio,22816
102
+ fbgemm_gpu/tbe/ssd/training.py,sha256=ElFvQHF5wQBzrqU34F6ZR2IEBVzKO3j3symntP15S3E,211380
103
+ fbgemm_gpu/tbe/ssd/utils/__init__.py,sha256=5DgmR2HA6NtmYh2ddkUgpDsZ6a7hF0DPedA1gMpdh18,250
104
+ fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py,sha256=SFg2-29b-i49LWm-FlaWUkTz2XzXbicYi_AzVj4jKNE,7601
105
+ fbgemm_gpu/tbe/stats/__init__.py,sha256=on29iDtq7cVNh90JR9aeFNG-K9DDoYq0JryzoplL49I,322
106
+ fbgemm_gpu/tbe/stats/bench_params_reporter.py,sha256=PMcaf27LpnflA7LMsuj1OpqTN3mPqddDoSeUnzKxLCs,13040
107
+ fbgemm_gpu/tbe/utils/__init__.py,sha256=rlXFm-kTByFZO4SS5C5zMzANRiQmM1NT__eWBayncYg,549
108
+ fbgemm_gpu/tbe/utils/common.py,sha256=KBCyBT-7ShhTRRd1Rs5sEU4g8JggEM7Es6wQ0qhWY-o,1313
109
+ fbgemm_gpu/tbe/utils/offsets.py,sha256=_4Z6TaRygBCeU_BZem_c4VPi5jxLButpy3pv_7OqrMc,1910
110
+ fbgemm_gpu/tbe/utils/quantize.py,sha256=icN2MXnl5rNqtKhGKkjpelx5pYBMYUv-6CrghxeVmD4,9178
111
+ fbgemm_gpu/tbe/utils/requests.py,sha256=rQkEoaUUWEYCQM-1K_Lxg1wPcyIVw8sbdaGFTpsaE5I,18040
112
+ fbgemm_gpu/triton/__init__.py,sha256=kPn_Ye6J9DAzWtqi76KYGwfKSqw0IhqG3Bir5aUpkWM,658
113
+ fbgemm_gpu/triton/common.py,sha256=wnkLd2a8fKpefymLL-LjNKEL4hDVSxFiF5g3aF8mzsw,2131
114
+ fbgemm_gpu/triton/quantize.py,sha256=z3y74-DCbGcQDsO70b2jK_HQDIYC0UJ7IEG2vvMu0_Y,26816
115
+ fbgemm_gpu/triton/quantize_ref.py,sha256=q4RBmFaqPVPELU52lbSgB0n26Aun7apeK7bRF2MWS80,11553
116
+ fbgemm_gpu/triton/jagged/__init__.py,sha256=om0yhjuzKuE1UQakFMWHsXN4WNb8mvNkZtYofQ8hdn4,246
117
+ fbgemm_gpu/triton/jagged/triton_jagged_tensor_ops.py,sha256=F2eQWjkWMR5RWQ48oIr-8OU_CRZyLazDpT7DFrDWS6g,29871
118
+ fbgemm_gpu/utils/__init__.py,sha256=JQQNdcTTaEU6ptK-OW-ZQBwTFxEZZpWOtBXWwEZm39o,354
119
+ fbgemm_gpu/utils/filestore.py,sha256=oVtbKGaPQki1JgbJCkrkElukOFVyxntQpSC0lYBKgho,6455
120
+ fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,990
121
+ fbgemm_gpu/utils/torch_library.py,sha256=ywsAHjbuwesj50LjEu99WkAH17FlaVgePZ9OmFg6YE4,4193
122
+ list_versions/__init__.py,sha256=UmTeqCk-UJWFtlZQWvZao3xvui2w9E3X_JdOXVjRaNw,315
123
+ list_versions/cli_run.py,sha256=CChZoXQ-tiKaWboXAYlPVJ5w8K5zAKiKcncA087I1sc,4508
124
+ fbgemm_gpu_genai_nightly-2025.12.19.dist-info/METADATA,sha256=88E4grcXfAZguiTo9sUFti3RP963zBFTAoI98uvxnBY,2657
125
+ fbgemm_gpu_genai_nightly-2025.12.19.dist-info/WHEEL,sha256=k9CVMKlTmOLLXq_OyiiJFbPd6UKfogV4yIUezgPmplE,108
126
+ fbgemm_gpu_genai_nightly-2025.12.19.dist-info/top_level.txt,sha256=_2s1Aa08r_eDn0JP4FjOhzK09Q8bVlEI7q8pMep51UY,25
127
+ fbgemm_gpu_genai_nightly-2025.12.19.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: skbuild 0.18.1
3
+ Root-Is-Purelib: false
4
+ Tag: cp310-cp310-manylinux_2_28_x86_64
5
+
@@ -0,0 +1,2 @@
1
+ fbgemm_gpu
2
+ list_versions
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # pyre-strict
9
+
10
+ from .cli_run import CLI, CLIOutput
11
+
12
+ __all__ = ["CLI", "CLIOutput"]
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # pyre-strict
9
+
10
+ import logging
11
+ import subprocess
12
+ from datetime import datetime
13
+ from typing import Union
14
+
15
+ import click
16
+
17
+ import pandas as pd
18
+
19
+ import torch
20
+
21
+
22
+ class CLIOutput:
23
+ def __init__(
24
+ self,
25
+ cli: str = "",
26
+ stdout: str = "",
27
+ stderr: str = "",
28
+ returncode: int = 0,
29
+ timestamp: str = "2025-01-01T20:00:00.00000",
30
+ visible: bool = True,
31
+ ) -> None:
32
+ self._cli = cli
33
+ self._stdout = stdout
34
+ self._stderr = stderr
35
+ self._returncode = returncode
36
+ self._timestamp = timestamp
37
+ self._visible = visible
38
+
39
+ def to_dict(self) -> dict[str, Union[int, str]]:
40
+ return {
41
+ "cli": self._cli,
42
+ "stdout": self._stdout,
43
+ "stderr": self._stderr,
44
+ "returncode": self._returncode,
45
+ "timestamp": self._timestamp,
46
+ "visible": self._visible,
47
+ }
48
+
49
+
50
+ class CLI:
51
+ def __init__(self) -> None:
52
+ pd.options.display.max_rows
53
+ pd.set_option("display.max_colwidth", None)
54
+ self._cli_outputs: list[CLIOutput] = [
55
+ CLIOutput(
56
+ cli="python –c “import torch; print(torch.__version__)”",
57
+ stdout="{}".format(torch.__version__),
58
+ stderr="",
59
+ returncode=0,
60
+ timestamp=datetime.now().isoformat(),
61
+ visible=True,
62
+ )
63
+ ]
64
+
65
+ def run(
66
+ self,
67
+ cli: Union[str, list[str]],
68
+ visible: bool = True,
69
+ input: str = "",
70
+ capture_output: bool = True,
71
+ ) -> CLIOutput:
72
+ if isinstance(cli, str):
73
+ cli = cli.split()
74
+ result = CLIOutput()
75
+ try:
76
+ completed = subprocess.run(
77
+ cli, text=True, check=False, capture_output=capture_output, input=input
78
+ )
79
+ result = CLIOutput(
80
+ cli=" ".join(cli),
81
+ stdout=completed.stdout,
82
+ stderr=completed.stderr,
83
+ returncode=completed.returncode,
84
+ timestamp=datetime.now().isoformat(),
85
+ visible=visible,
86
+ )
87
+ if visible:
88
+ self._cli_outputs.append(result)
89
+ except Exception as e:
90
+ logging.error(f'For cli {" ".join(cli)} we got exception {e}')
91
+ result = CLIOutput(
92
+ cli=" ".join(cli),
93
+ stdout="",
94
+ stderr=str(e),
95
+ returncode=-1,
96
+ visible=visible,
97
+ timestamp=datetime.now().isoformat(),
98
+ )
99
+ if visible:
100
+ self._cli_outputs.append(result)
101
+ return result
102
+
103
+ def run_piped(self, clis: list[str]) -> None:
104
+ the_input = ""
105
+ for cli in clis[:-1]:
106
+ result = self.run(
107
+ cli=cli, visible=False, input=the_input, capture_output=True
108
+ )
109
+ the_input = result._stdout
110
+ self.run(cli=clis[-1], visible=True, input=the_input, capture_output=True)
111
+
112
+ def to_dataframe(self) -> pd.DataFrame:
113
+ return pd.DataFrame([output.to_dict() for output in self._cli_outputs])
114
+
115
+ def save(self, filename: str, format: str = "csv") -> None:
116
+ df = self.to_dataframe()
117
+ if format == "csv":
118
+ df.to_csv(filename, index=False)
119
+ elif format == "json":
120
+ df.to_json(filename, orient="records", lines=True)
121
+ else:
122
+ raise ValueError(f"Invalid format {format} : must be one of 'csv', 'json'")
123
+
124
+
125
+ @click.command()
126
+ @click.option("--json", default="")
127
+ @click.option("--csv", default="")
128
+ def cli_run(
129
+ json: str,
130
+ csv: str,
131
+ ) -> None:
132
+ cli = CLI()
133
+
134
+ the_rpm = "rpm -qa"
135
+ the_grep1 = "grep -E ^amdgpu-(dkms|kmod)"
136
+ the_grep2 = "grep -v firmware"
137
+ the_sed1 = "sed -E s/^[^-]-[^-]-//"
138
+ the_sed2 = "sed -E s/.[^.].[^.]$//"
139
+ cli.run_piped([the_rpm, the_grep1, the_grep2, the_sed1, the_sed2])
140
+
141
+ cli.run("uname -r")
142
+
143
+ cli.run("fw-util all --version")
144
+
145
+ cli.run("amd-smi firmware")
146
+ cli.run("amd-smi version")
147
+ cli.run("amd-smi static")
148
+
149
+ if len(csv):
150
+ cli.save(csv)
151
+
152
+ if len(json):
153
+ cli.save(json, format="json")
154
+
155
+ print(cli.to_dataframe())
156
+
157
+
158
+ def main() -> None:
159
+ cli_run()
160
+
161
+
162
+ if __name__ == "__main__":
163
+ main()