fbgemm-gpu-genai-nightly 2025.10.17__cp311-cp311-manylinux_2_28_x86_64.whl → 2025.10.19__cp311-cp311-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fbgemm-gpu-genai-nightly might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
 
2
2
  {
3
- "version": "2025.10.17",
3
+ "version": "2025.10.19",
4
4
  "target": "genai",
5
5
  "variant": "cuda"
6
6
  }
@@ -175,7 +175,7 @@ class QuantizeOpBase(metaclass=abc.ABCMeta):
175
175
  if use_cuda_graph:
176
176
  with torch.cuda.stream(torch.cuda.Stream()):
177
177
  t = triton.testing.do_bench_cudagraph(
178
- lambda: self.quantize_and_compute(*args, **kwargs)
178
+ lambda: self.quantize_and_compute(*args, **kwargs), rep=200
179
179
  )
180
180
  else:
181
181
  t = triton.testing.do_bench(
@@ -188,7 +188,7 @@ class QuantizeOpBase(metaclass=abc.ABCMeta):
188
188
  if use_cuda_graph:
189
189
  with torch.cuda.stream(torch.cuda.Stream()):
190
190
  t = triton.testing.do_bench_cudagraph(
191
- lambda: self.compute(*args, **kwargs)
191
+ lambda: self.compute(*args, **kwargs), rep=200
192
192
  )
193
193
  else:
194
194
  t = triton.testing.do_bench(lambda: self.compute(*args, **kwargs))
@@ -1714,10 +1714,16 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
1714
1714
  tbe_id=self.uuid,
1715
1715
  )
1716
1716
 
1717
+ def _get_tensor_memory(self, tensor_name: str) -> int:
1718
+ """Get memory usage of a tensor in bytes."""
1719
+ if not hasattr(self, tensor_name):
1720
+ self.log(f"Tensor '{tensor_name}' not found, using 0 bytes")
1721
+ return 0
1722
+ tensor = getattr(self, tensor_name)
1723
+ return tensor.numel() * tensor.element_size()
1724
+
1717
1725
  @torch.jit.ignore
1718
- def _report_tbe_mem_usage(
1719
- self,
1720
- ) -> None:
1726
+ def _report_tbe_mem_usage(self) -> None:
1721
1727
  if self.stats_reporter is None:
1722
1728
  return
1723
1729
 
@@ -1726,18 +1732,17 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
1726
1732
  return
1727
1733
 
1728
1734
  total_mem_usage = sum(
1729
- param.numel() * param.element_size() for param in self.parameters()
1730
- ) + sum(buffer.numel() * buffer.element_size() for buffer in self.buffers())
1735
+ p.numel() * p.element_size() for p in self.parameters()
1736
+ ) + sum(b.numel() * b.element_size() for b in self.buffers())
1737
+
1731
1738
  if self.use_cpu:
1732
1739
  total_hbm_usage = 0
1733
1740
  total_uvm_usage = total_mem_usage
1734
1741
  else:
1735
- # hbm usage is total usage minus uvm usage
1736
1742
  total_uvm_usage = sum(
1737
- getattr(self, tensor_name).numel()
1738
- * getattr(self, tensor_name).element_size()
1739
- for tensor_name in self._uvm_tensors_log
1740
- if hasattr(self, tensor_name)
1743
+ self._get_tensor_memory(name)
1744
+ for name in self._uvm_tensors_log
1745
+ if hasattr(self, name)
1741
1746
  )
1742
1747
  total_hbm_usage = total_mem_usage - total_uvm_usage
1743
1748
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fbgemm_gpu_genai_nightly
3
- Version: 2025.10.17
3
+ Version: 2025.10.19
4
4
  Home-page: https://github.com/pytorch/fbgemm
5
5
  Author: FBGEMM Team
6
6
  Author-email: packages@pytorch.org
@@ -17,7 +17,7 @@ fbgemm_gpu/split_embedding_utils.py,sha256=Gb40ZKeATxIKEKI3aVQMgDDBanNpKMc53Z43m
17
17
  fbgemm_gpu/split_table_batched_embeddings_ops.py,sha256=_MIp6uHYHLn4GxGdrGsfddfSsZ2Z9mjsYIrih3ncI1I,2339
18
18
  fbgemm_gpu/split_table_batched_embeddings_ops_common.py,sha256=76ME0692CC691xpjiOsY3Xxy-LD_XKs8w9vq1gcm9tM,16440
19
19
  fbgemm_gpu/split_table_batched_embeddings_ops_inference.py,sha256=dGC85xjQiRUrequBibSf9oMAVHT5Q49zsVo2zW4n_88,81679
20
- fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=lF9eP6GDTyqbEJgl-SO6gNYUk2dv2YE2bMEtzGkY21c,173757
20
+ fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=nbrdPt2WYeVB1BDyToa4vfl_XiOza5dEGjSB8jCV_mY,173930
21
21
  fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py,sha256=e3O9ElaWBGvG7TdT3Ok_8cB06jhskXuyCQ0t40dzsEY,5449
22
22
  fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py,sha256=7qGkO8FARku38mFYl4Bc4qL8dS1wrfyorS9l1m5ZAVA,718
23
23
  fbgemm_gpu/tbe_input_multiplexer.py,sha256=TQjwkJ2JkOaQsMYuRdk9RbNa9759EPEtx8bYclChtZY,3063
@@ -32,9 +32,9 @@ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65
32
32
  fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
33
33
  fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
34
34
  fbgemm_gpu/docs/sparse_ops.py,sha256=gSLUFdnu8lle_6gLewFkM20wL3ek2jKLvDGMKR6POaY,27292
35
- fbgemm_gpu/docs/target.genai.json.py,sha256=xd3cFHZnulRjX-EiXgAyYhf3VwvdojE-oOnc-sgbRqw,79
35
+ fbgemm_gpu/docs/target.genai.json.py,sha256=jEkWy-JyNnwHPxd3flNvyzdyJLdp7YlavRyw46AFi2E,79
36
36
  fbgemm_gpu/experimental/example/__init__.py,sha256=OvJHZgWnycL1gWKyCXFJCTKuys3KAqx4iadjx3R-tBQ,723
37
- fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=IpKIkdaVVen5D3GfHn6WOCZN7BBlrHWOlNre4rwKpMA,232480
37
+ fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=7Weofm7WdsSt8h6bK6UJcZH9qYgc0jUpno4kaxAMveM,232488
38
38
  fbgemm_gpu/experimental/example/utils.py,sha256=Je__VkMlBMLOhh7NXOocOdvaa2gz9kl9Dkqeu25tpFA,562
39
39
  fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py,sha256=1CqUfzlYyXTvU-BNaUq4RZpLV-2lKAVCAHeJzSIZFWw,419
40
40
  fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py,sha256=2RjIDSzUXtoFoC2ryp-C-j5H83mbSjPwvsvTrThfrqE,215658
@@ -43,7 +43,7 @@ fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py,sha256=rbjxTMefjQWgJrWK
43
43
  fbgemm_gpu/experimental/gemm/triton_gemm/matmul_perf_model.py,sha256=SltbY_dsit5e7B8lDIB_VYPrEq0t9kckthj9mQaVNfA,7571
44
44
  fbgemm_gpu/experimental/gemm/triton_gemm/utils.py,sha256=rULXIpVaaRS3GKUZ1RHcWUrUyy0xMVREwS1SFShGgcw,4302
45
45
  fbgemm_gpu/experimental/gen_ai/__init__.py,sha256=r3NlNCXuIh0pfKwKU5v14y6AZkpoIkKWbtzxSprgeKA,1713
46
- fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=QdKzI_F7AFVX0RwN2IjmYHFDgYxzV4_0HaBXQbB1DHQ,72331560
46
+ fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=93bsfi3fOhgvRQwC2xEojm2psBZlitx6Xm6qBRM7mQI,72614088
47
47
  fbgemm_gpu/experimental/gen_ai/quantize.py,sha256=KAljWSdN-1_c5DWfT-3MDxWLMULK49Yu36t6TmQI9Tw,12599
48
48
  fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py,sha256=ntFgFs0foi6NQx8eqs5I3fCjzKSI0spXfEWiMhlcT00,897
49
49
  fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py,sha256=FADVTYzS2u8fA-3iChS5CbtWd0mWF8F3lnXcwr_7vDw,7821
@@ -52,7 +52,7 @@ fbgemm_gpu/experimental/gen_ai/bench/__init__.py,sha256=XpAK_eyqDSKeFC5J9KpnKtbZ
52
52
  fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py,sha256=ApEyJOf_rdIo8V_EgvhZXBGNov8ITC_dnB95v8szulI,8515
53
53
  fbgemm_gpu/experimental/gen_ai/bench/gather_scatter_bench.py,sha256=K9Nib6D7xJbw1QwEVuCJrVyI1qs988moo3cieVKYuFY,12057
54
54
  fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py,sha256=BWl6t-4acbuRSEX2aVNDlFrSWZkqMWK2sI3VONaMd3Q,24047
55
- fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py,sha256=H6AchejyZs76_snM_ae5vV0cPr_Q0h35OQ8qED0r1N4,104915
55
+ fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py,sha256=cDZS2rCb1W2IEQYxsnGjauhlUhg2PFZ-9LqJ_SEdbiQ,104933
56
56
  fbgemm_gpu/experimental/gen_ai/moe/README.md,sha256=z9ybHmv4KFJ1drj5OByuFaOY0tRQwwiIW3Q22TB_2-k,904
57
57
  fbgemm_gpu/experimental/gen_ai/moe/__init__.py,sha256=lwSvff07yEav024B1XyfgW8r8hwNe--aEDywcO7rnbM,1905
58
58
  fbgemm_gpu/experimental/gen_ai/moe/activation.py,sha256=NiXhWyCNagI3P9N3N89iSX7xKuShdkq9DxEUAzoV6y0,7892
@@ -121,7 +121,7 @@ fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,99
121
121
  fbgemm_gpu/utils/torch_library.py,sha256=ywsAHjbuwesj50LjEu99WkAH17FlaVgePZ9OmFg6YE4,4193
122
122
  list_versions/__init__.py,sha256=UmTeqCk-UJWFtlZQWvZao3xvui2w9E3X_JdOXVjRaNw,315
123
123
  list_versions/cli_run.py,sha256=CChZoXQ-tiKaWboXAYlPVJ5w8K5zAKiKcncA087I1sc,4508
124
- fbgemm_gpu_genai_nightly-2025.10.17.dist-info/METADATA,sha256=rqssfEibbl9IMyEp_swoBTJCA06TZCSJUU-jWi_-onw,2656
125
- fbgemm_gpu_genai_nightly-2025.10.17.dist-info/WHEEL,sha256=V2Q6mQKbouIadCxoRjt9FQ9oKfi45-uZUcoc77zzs0M,108
126
- fbgemm_gpu_genai_nightly-2025.10.17.dist-info/top_level.txt,sha256=_2s1Aa08r_eDn0JP4FjOhzK09Q8bVlEI7q8pMep51UY,25
127
- fbgemm_gpu_genai_nightly-2025.10.17.dist-info/RECORD,,
124
+ fbgemm_gpu_genai_nightly-2025.10.19.dist-info/METADATA,sha256=lB9fb2fZ26k7aR-2jJN6pPU1nuW0KX12jcQ72MOviR4,2656
125
+ fbgemm_gpu_genai_nightly-2025.10.19.dist-info/WHEEL,sha256=V2Q6mQKbouIadCxoRjt9FQ9oKfi45-uZUcoc77zzs0M,108
126
+ fbgemm_gpu_genai_nightly-2025.10.19.dist-info/top_level.txt,sha256=_2s1Aa08r_eDn0JP4FjOhzK09Q8bVlEI7q8pMep51UY,25
127
+ fbgemm_gpu_genai_nightly-2025.10.19.dist-info/RECORD,,