fbgemm-gpu-genai-nightly 2025.10.16__cp310-cp310-manylinux_2_28_x86_64.whl → 2025.10.18__cp310-cp310-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fbgemm-gpu-genai-nightly might be problematic. Click here for more details.
- fbgemm_gpu/asmjit.so +0 -0
- fbgemm_gpu/docs/target.genai.json.py +1 -1
- fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so +0 -0
- fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py +2 -2
- fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so +0 -0
- fbgemm_gpu/fbgemm.so +0 -0
- fbgemm_gpu/split_table_batched_embeddings_ops_training.py +15 -10
- {fbgemm_gpu_genai_nightly-2025.10.16.dist-info → fbgemm_gpu_genai_nightly-2025.10.18.dist-info}/METADATA +1 -1
- {fbgemm_gpu_genai_nightly-2025.10.16.dist-info → fbgemm_gpu_genai_nightly-2025.10.18.dist-info}/RECORD +11 -11
- {fbgemm_gpu_genai_nightly-2025.10.16.dist-info → fbgemm_gpu_genai_nightly-2025.10.18.dist-info}/WHEEL +0 -0
- {fbgemm_gpu_genai_nightly-2025.10.16.dist-info → fbgemm_gpu_genai_nightly-2025.10.18.dist-info}/top_level.txt +0 -0
fbgemm_gpu/asmjit.so
CHANGED
|
Binary file
|
|
Binary file
|
|
@@ -175,7 +175,7 @@ class QuantizeOpBase(metaclass=abc.ABCMeta):
|
|
|
175
175
|
if use_cuda_graph:
|
|
176
176
|
with torch.cuda.stream(torch.cuda.Stream()):
|
|
177
177
|
t = triton.testing.do_bench_cudagraph(
|
|
178
|
-
lambda: self.quantize_and_compute(*args, **kwargs)
|
|
178
|
+
lambda: self.quantize_and_compute(*args, **kwargs), rep=200
|
|
179
179
|
)
|
|
180
180
|
else:
|
|
181
181
|
t = triton.testing.do_bench(
|
|
@@ -188,7 +188,7 @@ class QuantizeOpBase(metaclass=abc.ABCMeta):
|
|
|
188
188
|
if use_cuda_graph:
|
|
189
189
|
with torch.cuda.stream(torch.cuda.Stream()):
|
|
190
190
|
t = triton.testing.do_bench_cudagraph(
|
|
191
|
-
lambda: self.compute(*args, **kwargs)
|
|
191
|
+
lambda: self.compute(*args, **kwargs), rep=200
|
|
192
192
|
)
|
|
193
193
|
else:
|
|
194
194
|
t = triton.testing.do_bench(lambda: self.compute(*args, **kwargs))
|
|
Binary file
|
fbgemm_gpu/fbgemm.so
CHANGED
|
Binary file
|
|
@@ -1714,10 +1714,16 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
|
|
|
1714
1714
|
tbe_id=self.uuid,
|
|
1715
1715
|
)
|
|
1716
1716
|
|
|
1717
|
+
def _get_tensor_memory(self, tensor_name: str) -> int:
|
|
1718
|
+
"""Get memory usage of a tensor in bytes."""
|
|
1719
|
+
if not hasattr(self, tensor_name):
|
|
1720
|
+
self.log(f"Tensor '{tensor_name}' not found, using 0 bytes")
|
|
1721
|
+
return 0
|
|
1722
|
+
tensor = getattr(self, tensor_name)
|
|
1723
|
+
return tensor.numel() * tensor.element_size()
|
|
1724
|
+
|
|
1717
1725
|
@torch.jit.ignore
|
|
1718
|
-
def _report_tbe_mem_usage(
|
|
1719
|
-
self,
|
|
1720
|
-
) -> None:
|
|
1726
|
+
def _report_tbe_mem_usage(self) -> None:
|
|
1721
1727
|
if self.stats_reporter is None:
|
|
1722
1728
|
return
|
|
1723
1729
|
|
|
@@ -1726,18 +1732,17 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
|
|
|
1726
1732
|
return
|
|
1727
1733
|
|
|
1728
1734
|
total_mem_usage = sum(
|
|
1729
|
-
|
|
1730
|
-
) + sum(
|
|
1735
|
+
p.numel() * p.element_size() for p in self.parameters()
|
|
1736
|
+
) + sum(b.numel() * b.element_size() for b in self.buffers())
|
|
1737
|
+
|
|
1731
1738
|
if self.use_cpu:
|
|
1732
1739
|
total_hbm_usage = 0
|
|
1733
1740
|
total_uvm_usage = total_mem_usage
|
|
1734
1741
|
else:
|
|
1735
|
-
# hbm usage is total usage minus uvm usage
|
|
1736
1742
|
total_uvm_usage = sum(
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
if hasattr(self, tensor_name)
|
|
1743
|
+
self._get_tensor_memory(name)
|
|
1744
|
+
for name in self._uvm_tensors_log
|
|
1745
|
+
if hasattr(self, name)
|
|
1741
1746
|
)
|
|
1742
1747
|
total_hbm_usage = total_mem_usage - total_uvm_usage
|
|
1743
1748
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
fbgemm_gpu/__init__.py,sha256=A3DuseilQ-sEtBpeZsG0LOqN5Cl3e5DHI_YgCZEMhnE,6283
|
|
2
|
-
fbgemm_gpu/asmjit.so,sha256=
|
|
2
|
+
fbgemm_gpu/asmjit.so,sha256=s2majpI_vsFgc4Vh8szCK4xFYXyT5A0K6HwepGBwrqQ,501728
|
|
3
3
|
fbgemm_gpu/batched_unary_embeddings_ops.py,sha256=GYeJ9pg-Wc9FokXVci_npDsL6UV18-pJXID2xzrJ9O8,2904
|
|
4
4
|
fbgemm_gpu/enums.py,sha256=37ewGSfO1x7sO31ZkRiqV1yKuklfHXT5qZIxzeeGogo,755
|
|
5
|
-
fbgemm_gpu/fbgemm.so,sha256=
|
|
5
|
+
fbgemm_gpu/fbgemm.so,sha256=B9y6MDLC6Ou7Bw_pT07Xfw5g5Q4j2yn9Xsp96QVpgEU,5646712
|
|
6
6
|
fbgemm_gpu/metrics.py,sha256=TsurFLJf0nJvPDN7urWb4LMQlf5RgdWPTTTDO7S4wtI,5663
|
|
7
7
|
fbgemm_gpu/permute_pooled_embedding_modules.py,sha256=vOXMYclaGnwSt0St_SOAlAe18kz6WjMyTeHnC9jLhcE,5130
|
|
8
8
|
fbgemm_gpu/permute_pooled_embedding_modules_split.py,sha256=f3VJvH_kw9Ltd_DXtaf_PJPHmlmEWrQgzQ7MDkhh5Nw,2746
|
|
@@ -17,7 +17,7 @@ fbgemm_gpu/split_embedding_utils.py,sha256=Gb40ZKeATxIKEKI3aVQMgDDBanNpKMc53Z43m
|
|
|
17
17
|
fbgemm_gpu/split_table_batched_embeddings_ops.py,sha256=_MIp6uHYHLn4GxGdrGsfddfSsZ2Z9mjsYIrih3ncI1I,2339
|
|
18
18
|
fbgemm_gpu/split_table_batched_embeddings_ops_common.py,sha256=76ME0692CC691xpjiOsY3Xxy-LD_XKs8w9vq1gcm9tM,16440
|
|
19
19
|
fbgemm_gpu/split_table_batched_embeddings_ops_inference.py,sha256=dGC85xjQiRUrequBibSf9oMAVHT5Q49zsVo2zW4n_88,81679
|
|
20
|
-
fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=
|
|
20
|
+
fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=nbrdPt2WYeVB1BDyToa4vfl_XiOza5dEGjSB8jCV_mY,173930
|
|
21
21
|
fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py,sha256=e3O9ElaWBGvG7TdT3Ok_8cB06jhskXuyCQ0t40dzsEY,5449
|
|
22
22
|
fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py,sha256=7qGkO8FARku38mFYl4Bc4qL8dS1wrfyorS9l1m5ZAVA,718
|
|
23
23
|
fbgemm_gpu/tbe_input_multiplexer.py,sha256=TQjwkJ2JkOaQsMYuRdk9RbNa9759EPEtx8bYclChtZY,3063
|
|
@@ -32,9 +32,9 @@ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65
|
|
|
32
32
|
fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
|
|
33
33
|
fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
|
|
34
34
|
fbgemm_gpu/docs/sparse_ops.py,sha256=gSLUFdnu8lle_6gLewFkM20wL3ek2jKLvDGMKR6POaY,27292
|
|
35
|
-
fbgemm_gpu/docs/target.genai.json.py,sha256=
|
|
35
|
+
fbgemm_gpu/docs/target.genai.json.py,sha256=65V25E8odqOdgvGWjS63CwjMVYLsXRoJAK8-iaPYW7g,79
|
|
36
36
|
fbgemm_gpu/experimental/example/__init__.py,sha256=OvJHZgWnycL1gWKyCXFJCTKuys3KAqx4iadjx3R-tBQ,723
|
|
37
|
-
fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=
|
|
37
|
+
fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=PGtZj3tM9mq65PGD08gEiTlj5PsvGaqJ_VkCvveHIIk,243904
|
|
38
38
|
fbgemm_gpu/experimental/example/utils.py,sha256=Je__VkMlBMLOhh7NXOocOdvaa2gz9kl9Dkqeu25tpFA,562
|
|
39
39
|
fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py,sha256=1CqUfzlYyXTvU-BNaUq4RZpLV-2lKAVCAHeJzSIZFWw,419
|
|
40
40
|
fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py,sha256=2RjIDSzUXtoFoC2ryp-C-j5H83mbSjPwvsvTrThfrqE,215658
|
|
@@ -43,7 +43,7 @@ fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py,sha256=rbjxTMefjQWgJrWK
|
|
|
43
43
|
fbgemm_gpu/experimental/gemm/triton_gemm/matmul_perf_model.py,sha256=SltbY_dsit5e7B8lDIB_VYPrEq0t9kckthj9mQaVNfA,7571
|
|
44
44
|
fbgemm_gpu/experimental/gemm/triton_gemm/utils.py,sha256=rULXIpVaaRS3GKUZ1RHcWUrUyy0xMVREwS1SFShGgcw,4302
|
|
45
45
|
fbgemm_gpu/experimental/gen_ai/__init__.py,sha256=r3NlNCXuIh0pfKwKU5v14y6AZkpoIkKWbtzxSprgeKA,1713
|
|
46
|
-
fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=
|
|
46
|
+
fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=L6aV0GcM8BKbNcDdZJAIgm4BLBspjo4MQ2em452K_ag,73184824
|
|
47
47
|
fbgemm_gpu/experimental/gen_ai/quantize.py,sha256=KAljWSdN-1_c5DWfT-3MDxWLMULK49Yu36t6TmQI9Tw,12599
|
|
48
48
|
fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py,sha256=ntFgFs0foi6NQx8eqs5I3fCjzKSI0spXfEWiMhlcT00,897
|
|
49
49
|
fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py,sha256=FADVTYzS2u8fA-3iChS5CbtWd0mWF8F3lnXcwr_7vDw,7821
|
|
@@ -52,7 +52,7 @@ fbgemm_gpu/experimental/gen_ai/bench/__init__.py,sha256=XpAK_eyqDSKeFC5J9KpnKtbZ
|
|
|
52
52
|
fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py,sha256=ApEyJOf_rdIo8V_EgvhZXBGNov8ITC_dnB95v8szulI,8515
|
|
53
53
|
fbgemm_gpu/experimental/gen_ai/bench/gather_scatter_bench.py,sha256=K9Nib6D7xJbw1QwEVuCJrVyI1qs988moo3cieVKYuFY,12057
|
|
54
54
|
fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py,sha256=BWl6t-4acbuRSEX2aVNDlFrSWZkqMWK2sI3VONaMd3Q,24047
|
|
55
|
-
fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py,sha256=
|
|
55
|
+
fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py,sha256=cDZS2rCb1W2IEQYxsnGjauhlUhg2PFZ-9LqJ_SEdbiQ,104933
|
|
56
56
|
fbgemm_gpu/experimental/gen_ai/moe/README.md,sha256=z9ybHmv4KFJ1drj5OByuFaOY0tRQwwiIW3Q22TB_2-k,904
|
|
57
57
|
fbgemm_gpu/experimental/gen_ai/moe/__init__.py,sha256=lwSvff07yEav024B1XyfgW8r8hwNe--aEDywcO7rnbM,1905
|
|
58
58
|
fbgemm_gpu/experimental/gen_ai/moe/activation.py,sha256=NiXhWyCNagI3P9N3N89iSX7xKuShdkq9DxEUAzoV6y0,7892
|
|
@@ -121,7 +121,7 @@ fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,99
|
|
|
121
121
|
fbgemm_gpu/utils/torch_library.py,sha256=ywsAHjbuwesj50LjEu99WkAH17FlaVgePZ9OmFg6YE4,4193
|
|
122
122
|
list_versions/__init__.py,sha256=UmTeqCk-UJWFtlZQWvZao3xvui2w9E3X_JdOXVjRaNw,315
|
|
123
123
|
list_versions/cli_run.py,sha256=CChZoXQ-tiKaWboXAYlPVJ5w8K5zAKiKcncA087I1sc,4508
|
|
124
|
-
fbgemm_gpu_genai_nightly-2025.10.
|
|
125
|
-
fbgemm_gpu_genai_nightly-2025.10.
|
|
126
|
-
fbgemm_gpu_genai_nightly-2025.10.
|
|
127
|
-
fbgemm_gpu_genai_nightly-2025.10.
|
|
124
|
+
fbgemm_gpu_genai_nightly-2025.10.18.dist-info/METADATA,sha256=sUOrHh3PBHZdQr2NaPSJGmkTiw9K3N9oYAO6xvNYqyg,2656
|
|
125
|
+
fbgemm_gpu_genai_nightly-2025.10.18.dist-info/WHEEL,sha256=k9CVMKlTmOLLXq_OyiiJFbPd6UKfogV4yIUezgPmplE,108
|
|
126
|
+
fbgemm_gpu_genai_nightly-2025.10.18.dist-info/top_level.txt,sha256=_2s1Aa08r_eDn0JP4FjOhzK09Q8bVlEI7q8pMep51UY,25
|
|
127
|
+
fbgemm_gpu_genai_nightly-2025.10.18.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|