fbgemm-gpu-genai-nightly 2025.10.1__cp312-cp312-manylinux_2_28_x86_64.whl → 2025.10.2__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fbgemm-gpu-genai-nightly might be problematic. Click here for more details.

@@ -6,6 +6,6 @@
6
6
  # This source code is licensed under the BSD-style license found in the
7
7
  # LICENSE file in the root directory of this source tree.
8
8
 
9
- __version__: str = "2025.10.1"
9
+ __version__: str = "2025.10.2"
10
10
  __target__: str = "genai"
11
11
  __variant__: str = "cuda"
@@ -13,13 +13,13 @@ from torch.library import register_fake
13
13
  torch.library.define(
14
14
  "blackwell_fmha::fmha_fwd",
15
15
  "(Tensor q, Tensor k, Tensor v, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seq_len_q, int? max_seq_len_k, float? softmax_scale, bool? causal, Tensor? seqlen_kv) -> (Tensor, Tensor)",
16
- tags=[torch.Tag.pt2_compliant_tag],
16
+ tags=torch.Tag.pt2_compliant_tag,
17
17
  )
18
18
 
19
19
  torch.library.define(
20
20
  "blackwell_fmha::fmha_bwd",
21
21
  "(Tensor dout, Tensor q, Tensor k, Tensor v, Tensor out, Tensor softmax_lse, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seq_len_q, int? max_seq_len_k, bool? causal) -> (Tensor, Tensor, Tensor)",
22
- tags=[torch.Tag.pt2_compliant_tag],
22
+ tags=torch.Tag.pt2_compliant_tag,
23
23
  )
24
24
 
25
25
 
@@ -3971,8 +3971,8 @@ class SSDTableBatchedEmbeddingBags(nn.Module):
3971
3971
  self.step, stats_reporter.report_interval # pyre-ignore
3972
3972
  )
3973
3973
 
3974
- if len(dram_kv_perf_stats) != 23:
3975
- logging.error("dram cache perf stats should have 23 elements")
3974
+ if len(dram_kv_perf_stats) != 24:
3975
+ logging.error("dram cache perf stats should have 24 elements")
3976
3976
  return
3977
3977
 
3978
3978
  dram_read_duration = dram_kv_perf_stats[0]
@@ -4001,6 +4001,7 @@ class SSDTableBatchedEmbeddingBags(nn.Module):
4001
4001
  dram_kv_allocated_bytes = dram_kv_perf_stats[20]
4002
4002
  dram_kv_actual_used_chunk_bytes = dram_kv_perf_stats[21]
4003
4003
  dram_kv_num_rows = dram_kv_perf_stats[22]
4004
+ dram_kv_read_counts = dram_kv_perf_stats[23]
4004
4005
 
4005
4006
  stats_reporter.report_duration(
4006
4007
  iteration_step=self.step,
@@ -4142,6 +4143,13 @@ class SSDTableBatchedEmbeddingBags(nn.Module):
4142
4143
  enable_tb_metrics=True,
4143
4144
  )
4144
4145
 
4146
+ stats_reporter.report_data_amount(
4147
+ iteration_step=self.step,
4148
+ event_name="dram_kv.perf.get.dram_kv_read_counts",
4149
+ data_bytes=dram_kv_read_counts,
4150
+ enable_tb_metrics=True,
4151
+ )
4152
+
4145
4153
  stats_reporter.report_data_amount(
4146
4154
  iteration_step=self.step,
4147
4155
  event_name=self.dram_kv_allocated_bytes_stats_name,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fbgemm_gpu_genai_nightly
3
- Version: 2025.10.1
3
+ Version: 2025.10.2
4
4
  Home-page: https://github.com/pytorch/fbgemm
5
5
  Author: FBGEMM Team
6
6
  Author-email: packages@pytorch.org
@@ -32,7 +32,7 @@ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65
32
32
  fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
33
33
  fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
34
34
  fbgemm_gpu/docs/sparse_ops.py,sha256=gSLUFdnu8lle_6gLewFkM20wL3ek2jKLvDGMKR6POaY,27292
35
- fbgemm_gpu/docs/version.py,sha256=ilCARLAgozgEEPt89gQKF29FHl5O-EJpj_4tVp8g-1c,316
35
+ fbgemm_gpu/docs/version.py,sha256=U9HFTyqt_827sXJZ7N9Dik7e18vj0x7B38Go9HoScG4,316
36
36
  fbgemm_gpu/experimental/example/__init__.py,sha256=V_XrGMq2oNVMpzwe1srlaTaHeIcZJw5oAGbo3seM_Ks,870
37
37
  fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=ulMAWPIPFEKuwUOPT07silt7HSS5jcrHRf_NV7xcC84,232488
38
38
  fbgemm_gpu/experimental/example/utils.py,sha256=Je__VkMlBMLOhh7NXOocOdvaa2gz9kl9Dkqeu25tpFA,562
@@ -43,10 +43,10 @@ fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py,sha256=rbjxTMefjQWgJrWK
43
43
  fbgemm_gpu/experimental/gemm/triton_gemm/matmul_perf_model.py,sha256=SltbY_dsit5e7B8lDIB_VYPrEq0t9kckthj9mQaVNfA,7571
44
44
  fbgemm_gpu/experimental/gemm/triton_gemm/utils.py,sha256=rULXIpVaaRS3GKUZ1RHcWUrUyy0xMVREwS1SFShGgcw,4302
45
45
  fbgemm_gpu/experimental/gen_ai/__init__.py,sha256=qwfuF5E5K4oDiH7RJkpC7zth3kAsG7wv_glCl2A_G2A,1860
46
- fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=MgBtk8UgtFvUc3s_9B3peq_srmcnb8AZ1xMa-mx86aQ,78050824
46
+ fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=2O8U3DLjgoCq5PdhG44PWjCscn5QYFHxW2yVY22b-7k,78050824
47
47
  fbgemm_gpu/experimental/gen_ai/quantize.py,sha256=KAljWSdN-1_c5DWfT-3MDxWLMULK49Yu36t6TmQI9Tw,12599
48
48
  fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py,sha256=oExepXpjMOwM43gARZARY0UtR-EX2zqRnSrOaQPy448,1044
49
- fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py,sha256=D90VgPOxnx1NpnDnajIv_L7AHq4rrmFIch0iV2elAVU,7825
49
+ fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py,sha256=FADVTYzS2u8fA-3iChS5CbtWd0mWF8F3lnXcwr_7vDw,7821
50
50
  fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_interface.py,sha256=sWk5888_e7Qhfik0X1uZ1VsEUmY5uidxHcqtpoH42Is,9406
51
51
  fbgemm_gpu/experimental/gen_ai/bench/__init__.py,sha256=GvCUF6o7wCR3XSWingWKxn_Y3_F2GhZtOIRAB3pfqK0,551
52
52
  fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py,sha256=ApEyJOf_rdIo8V_EgvhZXBGNov8ITC_dnB95v8szulI,8515
@@ -99,7 +99,7 @@ fbgemm_gpu/tbe/cache/split_embeddings_cache_ops.py,sha256=vZHj7KIe1DoJDy5eft29Xt
99
99
  fbgemm_gpu/tbe/ssd/__init__.py,sha256=wzfMT10cp_dqK2lrebC449hOdexBnizcf_98lA1NyHs,483
100
100
  fbgemm_gpu/tbe/ssd/common.py,sha256=1J8K7sTQswgCYWaVwF-ZdCJj7mNN6O9GI70AaZWzJGE,1044
101
101
  fbgemm_gpu/tbe/ssd/inference.py,sha256=B_uX66ajGA9YKGlFa5TmGWs7b-b1RFigzwxmENZ9Oio,22816
102
- fbgemm_gpu/tbe/ssd/training.py,sha256=dnUHnl9FzPneXrWUCA_HKvLI5QHQnL0vd9neMrjZJ-Q,194623
102
+ fbgemm_gpu/tbe/ssd/training.py,sha256=iepmavrK6cSXrqWg3TqVXL5gVmSftfHJycesp7I0Nw4,194911
103
103
  fbgemm_gpu/tbe/ssd/utils/__init__.py,sha256=5DgmR2HA6NtmYh2ddkUgpDsZ6a7hF0DPedA1gMpdh18,250
104
104
  fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py,sha256=SFg2-29b-i49LWm-FlaWUkTz2XzXbicYi_AzVj4jKNE,7601
105
105
  fbgemm_gpu/tbe/stats/__init__.py,sha256=on29iDtq7cVNh90JR9aeFNG-K9DDoYq0JryzoplL49I,322
@@ -121,7 +121,7 @@ fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,99
121
121
  fbgemm_gpu/utils/torch_library.py,sha256=ywsAHjbuwesj50LjEu99WkAH17FlaVgePZ9OmFg6YE4,4193
122
122
  list_versions/__init__.py,sha256=UmTeqCk-UJWFtlZQWvZao3xvui2w9E3X_JdOXVjRaNw,315
123
123
  list_versions/cli_run.py,sha256=CChZoXQ-tiKaWboXAYlPVJ5w8K5zAKiKcncA087I1sc,4508
124
- fbgemm_gpu_genai_nightly-2025.10.1.dist-info/METADATA,sha256=ybTRQ-qhh1eUvC5tgzoUyyhhQW3pk-gwvumEFDqdlGk,2655
125
- fbgemm_gpu_genai_nightly-2025.10.1.dist-info/WHEEL,sha256=vUT1hK8fT5m5CAs5kDyQ_ABrvCmtd0TCp5-4vN9tR5A,108
126
- fbgemm_gpu_genai_nightly-2025.10.1.dist-info/top_level.txt,sha256=_2s1Aa08r_eDn0JP4FjOhzK09Q8bVlEI7q8pMep51UY,25
127
- fbgemm_gpu_genai_nightly-2025.10.1.dist-info/RECORD,,
124
+ fbgemm_gpu_genai_nightly-2025.10.2.dist-info/METADATA,sha256=KiCiRdg53J2HiyUZMdm_uIZHb-E8u0QQj9uRSc9oRIM,2655
125
+ fbgemm_gpu_genai_nightly-2025.10.2.dist-info/WHEEL,sha256=vUT1hK8fT5m5CAs5kDyQ_ABrvCmtd0TCp5-4vN9tR5A,108
126
+ fbgemm_gpu_genai_nightly-2025.10.2.dist-info/top_level.txt,sha256=_2s1Aa08r_eDn0JP4FjOhzK09Q8bVlEI7q8pMep51UY,25
127
+ fbgemm_gpu_genai_nightly-2025.10.2.dist-info/RECORD,,