PyPI - fbgemm-gpu-genai-nightly - Versions diffs - 2025.10.17__cp311-cp311-manylinux_2_28_x86_64.whl → 2025.10.19__cp311-cp311-manylinux_2_28_x86_64.whl - Mend

fbgemm-gpu-genai-nightly 2025.10.17__cp311-cp311-manylinux_2_28_x86_64.whl → 2025.10.19__cp311-cp311-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of fbgemm-gpu-genai-nightly might be problematic. Click here for more details.

Files changed (9) hide show

fbgemm_gpu/docs/target.genai.json.py CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-    "version": "2025.10.17",
+    "version": "2025.10.19",
     "target": "genai",
     "variant": "cuda"
 }

fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so CHANGED Viewed

Binary file

fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py CHANGED Viewed

@@ -175,7 +175,7 @@ class QuantizeOpBase(metaclass=abc.ABCMeta):
             if use_cuda_graph:
                 with torch.cuda.stream(torch.cuda.Stream()):
                     t = triton.testing.do_bench_cudagraph(
-                        lambda: self.quantize_and_compute(*args, **kwargs)
+                        lambda: self.quantize_and_compute(*args, **kwargs), rep=200
                     )
             else:
                 t = triton.testing.do_bench(
@@ -188,7 +188,7 @@ class QuantizeOpBase(metaclass=abc.ABCMeta):
                 if use_cuda_graph:
                     with torch.cuda.stream(torch.cuda.Stream()):
                         t = triton.testing.do_bench_cudagraph(
-                            lambda: self.compute(*args, **kwargs)
+                            lambda: self.compute(*args, **kwargs), rep=200
                         )
                 else:
                     t = triton.testing.do_bench(lambda: self.compute(*args, **kwargs))

fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so CHANGED Viewed

Binary file

fbgemm_gpu/split_table_batched_embeddings_ops_training.py CHANGED Viewed

@@ -1714,10 +1714,16 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
             tbe_id=self.uuid,
         )
+    def _get_tensor_memory(self, tensor_name: str) -> int:
+        """Get memory usage of a tensor in bytes."""
+        if not hasattr(self, tensor_name):
+            self.log(f"Tensor '{tensor_name}' not found, using 0 bytes")
+            return 0
+        tensor = getattr(self, tensor_name)
+        return tensor.numel() * tensor.element_size()
     @torch.jit.ignore
-    def _report_tbe_mem_usage(
-        self,
-    ) -> None:
+    def _report_tbe_mem_usage(self) -> None:
         if self.stats_reporter is None:
             return
@@ -1726,18 +1732,17 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
             return
         total_mem_usage = sum(
-            param.numel() * param.element_size() for param in self.parameters()
-        ) + sum(buffer.numel() * buffer.element_size() for buffer in self.buffers())
+            p.numel() * p.element_size() for p in self.parameters()
+        ) + sum(b.numel() * b.element_size() for b in self.buffers())
         if self.use_cpu:
             total_hbm_usage = 0
             total_uvm_usage = total_mem_usage
         else:
-            # hbm usage is total usage minus uvm usage
             total_uvm_usage = sum(
-                getattr(self, tensor_name).numel()
-                * getattr(self, tensor_name).element_size()
-                for tensor_name in self._uvm_tensors_log
-                if hasattr(self, tensor_name)
+                self._get_tensor_memory(name)
+                for name in self._uvm_tensors_log
+                if hasattr(self, name)
             )
             total_hbm_usage = total_mem_usage - total_uvm_usage

{fbgemm_gpu_genai_nightly-2025.10.17.dist-info → fbgemm_gpu_genai_nightly-2025.10.19.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fbgemm_gpu_genai_nightly
-Version: 2025.10.17
+Version: 2025.10.19
 Home-page: https://github.com/pytorch/fbgemm
 Author: FBGEMM Team
 Author-email: packages@pytorch.org

{fbgemm_gpu_genai_nightly-2025.10.17.dist-info → fbgemm_gpu_genai_nightly-2025.10.19.dist-info}/RECORD RENAMED Viewed

@@ -17,7 +17,7 @@ fbgemm_gpu/split_embedding_utils.py,sha256=Gb40ZKeATxIKEKI3aVQMgDDBanNpKMc53Z43m
 fbgemm_gpu/split_table_batched_embeddings_ops.py,sha256=_MIp6uHYHLn4GxGdrGsfddfSsZ2Z9mjsYIrih3ncI1I,2339
 fbgemm_gpu/split_table_batched_embeddings_ops_common.py,sha256=76ME0692CC691xpjiOsY3Xxy-LD_XKs8w9vq1gcm9tM,16440
 fbgemm_gpu/split_table_batched_embeddings_ops_inference.py,sha256=dGC85xjQiRUrequBibSf9oMAVHT5Q49zsVo2zW4n_88,81679
-fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=lF9eP6GDTyqbEJgl-SO6gNYUk2dv2YE2bMEtzGkY21c,173757
+fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=nbrdPt2WYeVB1BDyToa4vfl_XiOza5dEGjSB8jCV_mY,173930
 fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py,sha256=e3O9ElaWBGvG7TdT3Ok_8cB06jhskXuyCQ0t40dzsEY,5449
 fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py,sha256=7qGkO8FARku38mFYl4Bc4qL8dS1wrfyorS9l1m5ZAVA,718
 fbgemm_gpu/tbe_input_multiplexer.py,sha256=TQjwkJ2JkOaQsMYuRdk9RbNa9759EPEtx8bYclChtZY,3063
@@ -32,9 +32,9 @@ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65
 fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
 fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
 fbgemm_gpu/docs/sparse_ops.py,sha256=gSLUFdnu8lle_6gLewFkM20wL3ek2jKLvDGMKR6POaY,27292
-fbgemm_gpu/docs/target.genai.json.py,sha256=xd3cFHZnulRjX-EiXgAyYhf3VwvdojE-oOnc-sgbRqw,79
+fbgemm_gpu/docs/target.genai.json.py,sha256=jEkWy-JyNnwHPxd3flNvyzdyJLdp7YlavRyw46AFi2E,79
 fbgemm_gpu/experimental/example/__init__.py,sha256=OvJHZgWnycL1gWKyCXFJCTKuys3KAqx4iadjx3R-tBQ,723
-fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=IpKIkdaVVen5D3GfHn6WOCZN7BBlrHWOlNre4rwKpMA,232480
+fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=7Weofm7WdsSt8h6bK6UJcZH9qYgc0jUpno4kaxAMveM,232488
 fbgemm_gpu/experimental/example/utils.py,sha256=Je__VkMlBMLOhh7NXOocOdvaa2gz9kl9Dkqeu25tpFA,562
 fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py,sha256=1CqUfzlYyXTvU-BNaUq4RZpLV-2lKAVCAHeJzSIZFWw,419
 fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py,sha256=2RjIDSzUXtoFoC2ryp-C-j5H83mbSjPwvsvTrThfrqE,215658
@@ -43,7 +43,7 @@ fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py,sha256=rbjxTMefjQWgJrWK
 fbgemm_gpu/experimental/gemm/triton_gemm/matmul_perf_model.py,sha256=SltbY_dsit5e7B8lDIB_VYPrEq0t9kckthj9mQaVNfA,7571
 fbgemm_gpu/experimental/gemm/triton_gemm/utils.py,sha256=rULXIpVaaRS3GKUZ1RHcWUrUyy0xMVREwS1SFShGgcw,4302
 fbgemm_gpu/experimental/gen_ai/__init__.py,sha256=r3NlNCXuIh0pfKwKU5v14y6AZkpoIkKWbtzxSprgeKA,1713
-fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=QdKzI_F7AFVX0RwN2IjmYHFDgYxzV4_0HaBXQbB1DHQ,72331560
+fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=93bsfi3fOhgvRQwC2xEojm2psBZlitx6Xm6qBRM7mQI,72614088
 fbgemm_gpu/experimental/gen_ai/quantize.py,sha256=KAljWSdN-1_c5DWfT-3MDxWLMULK49Yu36t6TmQI9Tw,12599
 fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py,sha256=ntFgFs0foi6NQx8eqs5I3fCjzKSI0spXfEWiMhlcT00,897
 fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py,sha256=FADVTYzS2u8fA-3iChS5CbtWd0mWF8F3lnXcwr_7vDw,7821
@@ -52,7 +52,7 @@ fbgemm_gpu/experimental/gen_ai/bench/__init__.py,sha256=XpAK_eyqDSKeFC5J9KpnKtbZ
 fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py,sha256=ApEyJOf_rdIo8V_EgvhZXBGNov8ITC_dnB95v8szulI,8515
 fbgemm_gpu/experimental/gen_ai/bench/gather_scatter_bench.py,sha256=K9Nib6D7xJbw1QwEVuCJrVyI1qs988moo3cieVKYuFY,12057
 fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py,sha256=BWl6t-4acbuRSEX2aVNDlFrSWZkqMWK2sI3VONaMd3Q,24047
-fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py,sha256=H6AchejyZs76_snM_ae5vV0cPr_Q0h35OQ8qED0r1N4,104915
+fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py,sha256=cDZS2rCb1W2IEQYxsnGjauhlUhg2PFZ-9LqJ_SEdbiQ,104933
 fbgemm_gpu/experimental/gen_ai/moe/README.md,sha256=z9ybHmv4KFJ1drj5OByuFaOY0tRQwwiIW3Q22TB_2-k,904
 fbgemm_gpu/experimental/gen_ai/moe/__init__.py,sha256=lwSvff07yEav024B1XyfgW8r8hwNe--aEDywcO7rnbM,1905
 fbgemm_gpu/experimental/gen_ai/moe/activation.py,sha256=NiXhWyCNagI3P9N3N89iSX7xKuShdkq9DxEUAzoV6y0,7892
@@ -121,7 +121,7 @@ fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,99
 fbgemm_gpu/utils/torch_library.py,sha256=ywsAHjbuwesj50LjEu99WkAH17FlaVgePZ9OmFg6YE4,4193
 list_versions/__init__.py,sha256=UmTeqCk-UJWFtlZQWvZao3xvui2w9E3X_JdOXVjRaNw,315
 list_versions/cli_run.py,sha256=CChZoXQ-tiKaWboXAYlPVJ5w8K5zAKiKcncA087I1sc,4508
-fbgemm_gpu_genai_nightly-2025.10.17.dist-info/METADATA,sha256=rqssfEibbl9IMyEp_swoBTJCA06TZCSJUU-jWi_-onw,2656
-fbgemm_gpu_genai_nightly-2025.10.17.dist-info/WHEEL,sha256=V2Q6mQKbouIadCxoRjt9FQ9oKfi45-uZUcoc77zzs0M,108
-fbgemm_gpu_genai_nightly-2025.10.17.dist-info/top_level.txt,sha256=_2s1Aa08r_eDn0JP4FjOhzK09Q8bVlEI7q8pMep51UY,25
-fbgemm_gpu_genai_nightly-2025.10.17.dist-info/RECORD,,
+fbgemm_gpu_genai_nightly-2025.10.19.dist-info/METADATA,sha256=lB9fb2fZ26k7aR-2jJN6pPU1nuW0KX12jcQ72MOviR4,2656
+fbgemm_gpu_genai_nightly-2025.10.19.dist-info/WHEEL,sha256=V2Q6mQKbouIadCxoRjt9FQ9oKfi45-uZUcoc77zzs0M,108
+fbgemm_gpu_genai_nightly-2025.10.19.dist-info/top_level.txt,sha256=_2s1Aa08r_eDn0JP4FjOhzK09Q8bVlEI7q8pMep51UY,25
+fbgemm_gpu_genai_nightly-2025.10.19.dist-info/RECORD,,

{fbgemm_gpu_genai_nightly-2025.10.17.dist-info → fbgemm_gpu_genai_nightly-2025.10.19.dist-info}/WHEEL RENAMED Viewed

File without changes

{fbgemm_gpu_genai_nightly-2025.10.17.dist-info → fbgemm_gpu_genai_nightly-2025.10.19.dist-info}/top_level.txt RENAMED Viewed

File without changes