fbgemm-gpu-genai-nightly 2025.10.19__cp312-cp312-manylinux_2_28_x86_64.whl → 2025.10.20__cp312-cp312-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fbgemm_gpu/__init__.py +1 -0
- fbgemm_gpu/asmjit.so +0 -0
- fbgemm_gpu/docs/target.genai.json.py +1 -1
- fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so +0 -0
- fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py +2 -2
- fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so +0 -0
- fbgemm_gpu/fbgemm.so +0 -0
- fbgemm_gpu/split_table_batched_embeddings_ops_training.py +10 -15
- {fbgemm_gpu_genai_nightly-2025.10.19.dist-info → fbgemm_gpu_genai_nightly-2025.10.20.dist-info}/METADATA +1 -1
- {fbgemm_gpu_genai_nightly-2025.10.19.dist-info → fbgemm_gpu_genai_nightly-2025.10.20.dist-info}/RECORD +12 -12
- {fbgemm_gpu_genai_nightly-2025.10.19.dist-info → fbgemm_gpu_genai_nightly-2025.10.20.dist-info}/WHEEL +0 -0
- {fbgemm_gpu_genai_nightly-2025.10.19.dist-info → fbgemm_gpu_genai_nightly-2025.10.20.dist-info}/top_level.txt +0 -0
fbgemm_gpu/__init__.py
CHANGED
fbgemm_gpu/asmjit.so
CHANGED
|
Binary file
|
|
Binary file
|
|
@@ -175,7 +175,7 @@ class QuantizeOpBase(metaclass=abc.ABCMeta):
|
|
|
175
175
|
if use_cuda_graph:
|
|
176
176
|
with torch.cuda.stream(torch.cuda.Stream()):
|
|
177
177
|
t = triton.testing.do_bench_cudagraph(
|
|
178
|
-
lambda: self.quantize_and_compute(*args, **kwargs)
|
|
178
|
+
lambda: self.quantize_and_compute(*args, **kwargs)
|
|
179
179
|
)
|
|
180
180
|
else:
|
|
181
181
|
t = triton.testing.do_bench(
|
|
@@ -188,7 +188,7 @@ class QuantizeOpBase(metaclass=abc.ABCMeta):
|
|
|
188
188
|
if use_cuda_graph:
|
|
189
189
|
with torch.cuda.stream(torch.cuda.Stream()):
|
|
190
190
|
t = triton.testing.do_bench_cudagraph(
|
|
191
|
-
lambda: self.compute(*args, **kwargs)
|
|
191
|
+
lambda: self.compute(*args, **kwargs)
|
|
192
192
|
)
|
|
193
193
|
else:
|
|
194
194
|
t = triton.testing.do_bench(lambda: self.compute(*args, **kwargs))
|
|
Binary file
|
fbgemm_gpu/fbgemm.so
CHANGED
|
Binary file
|
|
@@ -1714,16 +1714,10 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
|
|
|
1714
1714
|
tbe_id=self.uuid,
|
|
1715
1715
|
)
|
|
1716
1716
|
|
|
1717
|
-
def _get_tensor_memory(self, tensor_name: str) -> int:
|
|
1718
|
-
"""Get memory usage of a tensor in bytes."""
|
|
1719
|
-
if not hasattr(self, tensor_name):
|
|
1720
|
-
self.log(f"Tensor '{tensor_name}' not found, using 0 bytes")
|
|
1721
|
-
return 0
|
|
1722
|
-
tensor = getattr(self, tensor_name)
|
|
1723
|
-
return tensor.numel() * tensor.element_size()
|
|
1724
|
-
|
|
1725
1717
|
@torch.jit.ignore
|
|
1726
|
-
def _report_tbe_mem_usage(
|
|
1718
|
+
def _report_tbe_mem_usage(
|
|
1719
|
+
self,
|
|
1720
|
+
) -> None:
|
|
1727
1721
|
if self.stats_reporter is None:
|
|
1728
1722
|
return
|
|
1729
1723
|
|
|
@@ -1732,17 +1726,18 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
|
|
|
1732
1726
|
return
|
|
1733
1727
|
|
|
1734
1728
|
total_mem_usage = sum(
|
|
1735
|
-
|
|
1736
|
-
) + sum(
|
|
1737
|
-
|
|
1729
|
+
param.numel() * param.element_size() for param in self.parameters()
|
|
1730
|
+
) + sum(buffer.numel() * buffer.element_size() for buffer in self.buffers())
|
|
1738
1731
|
if self.use_cpu:
|
|
1739
1732
|
total_hbm_usage = 0
|
|
1740
1733
|
total_uvm_usage = total_mem_usage
|
|
1741
1734
|
else:
|
|
1735
|
+
# hbm usage is total usage minus uvm usage
|
|
1742
1736
|
total_uvm_usage = sum(
|
|
1743
|
-
self.
|
|
1744
|
-
|
|
1745
|
-
|
|
1737
|
+
getattr(self, tensor_name).numel()
|
|
1738
|
+
* getattr(self, tensor_name).element_size()
|
|
1739
|
+
for tensor_name in self._uvm_tensors_log
|
|
1740
|
+
if hasattr(self, tensor_name)
|
|
1746
1741
|
)
|
|
1747
1742
|
total_hbm_usage = total_mem_usage - total_uvm_usage
|
|
1748
1743
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
fbgemm_gpu/__init__.py,sha256=
|
|
2
|
-
fbgemm_gpu/asmjit.so,sha256=
|
|
1
|
+
fbgemm_gpu/__init__.py,sha256=BxqlqUln-q_ljJpo_Cg3R2RYZqxCbZ0UjvdBe6DzNZk,6301
|
|
2
|
+
fbgemm_gpu/asmjit.so,sha256=tp-5cN7HUYo7cjvR_kl_vfPBSEv78-IQxdvHN-nXFAM,501728
|
|
3
3
|
fbgemm_gpu/batched_unary_embeddings_ops.py,sha256=GYeJ9pg-Wc9FokXVci_npDsL6UV18-pJXID2xzrJ9O8,2904
|
|
4
4
|
fbgemm_gpu/enums.py,sha256=37ewGSfO1x7sO31ZkRiqV1yKuklfHXT5qZIxzeeGogo,755
|
|
5
|
-
fbgemm_gpu/fbgemm.so,sha256=
|
|
5
|
+
fbgemm_gpu/fbgemm.so,sha256=B9y6MDLC6Ou7Bw_pT07Xfw5g5Q4j2yn9Xsp96QVpgEU,5646712
|
|
6
6
|
fbgemm_gpu/metrics.py,sha256=TsurFLJf0nJvPDN7urWb4LMQlf5RgdWPTTTDO7S4wtI,5663
|
|
7
7
|
fbgemm_gpu/permute_pooled_embedding_modules.py,sha256=vOXMYclaGnwSt0St_SOAlAe18kz6WjMyTeHnC9jLhcE,5130
|
|
8
8
|
fbgemm_gpu/permute_pooled_embedding_modules_split.py,sha256=f3VJvH_kw9Ltd_DXtaf_PJPHmlmEWrQgzQ7MDkhh5Nw,2746
|
|
@@ -17,7 +17,7 @@ fbgemm_gpu/split_embedding_utils.py,sha256=Gb40ZKeATxIKEKI3aVQMgDDBanNpKMc53Z43m
|
|
|
17
17
|
fbgemm_gpu/split_table_batched_embeddings_ops.py,sha256=_MIp6uHYHLn4GxGdrGsfddfSsZ2Z9mjsYIrih3ncI1I,2339
|
|
18
18
|
fbgemm_gpu/split_table_batched_embeddings_ops_common.py,sha256=76ME0692CC691xpjiOsY3Xxy-LD_XKs8w9vq1gcm9tM,16440
|
|
19
19
|
fbgemm_gpu/split_table_batched_embeddings_ops_inference.py,sha256=dGC85xjQiRUrequBibSf9oMAVHT5Q49zsVo2zW4n_88,81679
|
|
20
|
-
fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=
|
|
20
|
+
fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=lF9eP6GDTyqbEJgl-SO6gNYUk2dv2YE2bMEtzGkY21c,173757
|
|
21
21
|
fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py,sha256=e3O9ElaWBGvG7TdT3Ok_8cB06jhskXuyCQ0t40dzsEY,5449
|
|
22
22
|
fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py,sha256=7qGkO8FARku38mFYl4Bc4qL8dS1wrfyorS9l1m5ZAVA,718
|
|
23
23
|
fbgemm_gpu/tbe_input_multiplexer.py,sha256=TQjwkJ2JkOaQsMYuRdk9RbNa9759EPEtx8bYclChtZY,3063
|
|
@@ -32,9 +32,9 @@ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65
|
|
|
32
32
|
fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
|
|
33
33
|
fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
|
|
34
34
|
fbgemm_gpu/docs/sparse_ops.py,sha256=gSLUFdnu8lle_6gLewFkM20wL3ek2jKLvDGMKR6POaY,27292
|
|
35
|
-
fbgemm_gpu/docs/target.genai.json.py,sha256=
|
|
35
|
+
fbgemm_gpu/docs/target.genai.json.py,sha256=EffeoYnTPp4BLew_sFOpBinWQgXup1DReXuDroDTnh8,79
|
|
36
36
|
fbgemm_gpu/experimental/example/__init__.py,sha256=OvJHZgWnycL1gWKyCXFJCTKuys3KAqx4iadjx3R-tBQ,723
|
|
37
|
-
fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=
|
|
37
|
+
fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=fOyUuW3hkDvgT6wxaUvCzZtj5G6pWOfQKLnjIJ5FUAg,407744
|
|
38
38
|
fbgemm_gpu/experimental/example/utils.py,sha256=Je__VkMlBMLOhh7NXOocOdvaa2gz9kl9Dkqeu25tpFA,562
|
|
39
39
|
fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py,sha256=1CqUfzlYyXTvU-BNaUq4RZpLV-2lKAVCAHeJzSIZFWw,419
|
|
40
40
|
fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py,sha256=2RjIDSzUXtoFoC2ryp-C-j5H83mbSjPwvsvTrThfrqE,215658
|
|
@@ -43,7 +43,7 @@ fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py,sha256=rbjxTMefjQWgJrWK
|
|
|
43
43
|
fbgemm_gpu/experimental/gemm/triton_gemm/matmul_perf_model.py,sha256=SltbY_dsit5e7B8lDIB_VYPrEq0t9kckthj9mQaVNfA,7571
|
|
44
44
|
fbgemm_gpu/experimental/gemm/triton_gemm/utils.py,sha256=rULXIpVaaRS3GKUZ1RHcWUrUyy0xMVREwS1SFShGgcw,4302
|
|
45
45
|
fbgemm_gpu/experimental/gen_ai/__init__.py,sha256=r3NlNCXuIh0pfKwKU5v14y6AZkpoIkKWbtzxSprgeKA,1713
|
|
46
|
-
fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=
|
|
46
|
+
fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=xsfHP5BNQ6IqiCxVYYEvWfF2wTD3vSt9lYciiqm_5Nk,287360856
|
|
47
47
|
fbgemm_gpu/experimental/gen_ai/quantize.py,sha256=KAljWSdN-1_c5DWfT-3MDxWLMULK49Yu36t6TmQI9Tw,12599
|
|
48
48
|
fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py,sha256=ntFgFs0foi6NQx8eqs5I3fCjzKSI0spXfEWiMhlcT00,897
|
|
49
49
|
fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py,sha256=FADVTYzS2u8fA-3iChS5CbtWd0mWF8F3lnXcwr_7vDw,7821
|
|
@@ -52,7 +52,7 @@ fbgemm_gpu/experimental/gen_ai/bench/__init__.py,sha256=XpAK_eyqDSKeFC5J9KpnKtbZ
|
|
|
52
52
|
fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py,sha256=ApEyJOf_rdIo8V_EgvhZXBGNov8ITC_dnB95v8szulI,8515
|
|
53
53
|
fbgemm_gpu/experimental/gen_ai/bench/gather_scatter_bench.py,sha256=K9Nib6D7xJbw1QwEVuCJrVyI1qs988moo3cieVKYuFY,12057
|
|
54
54
|
fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py,sha256=BWl6t-4acbuRSEX2aVNDlFrSWZkqMWK2sI3VONaMd3Q,24047
|
|
55
|
-
fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py,sha256=
|
|
55
|
+
fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py,sha256=H6AchejyZs76_snM_ae5vV0cPr_Q0h35OQ8qED0r1N4,104915
|
|
56
56
|
fbgemm_gpu/experimental/gen_ai/moe/README.md,sha256=z9ybHmv4KFJ1drj5OByuFaOY0tRQwwiIW3Q22TB_2-k,904
|
|
57
57
|
fbgemm_gpu/experimental/gen_ai/moe/__init__.py,sha256=lwSvff07yEav024B1XyfgW8r8hwNe--aEDywcO7rnbM,1905
|
|
58
58
|
fbgemm_gpu/experimental/gen_ai/moe/activation.py,sha256=NiXhWyCNagI3P9N3N89iSX7xKuShdkq9DxEUAzoV6y0,7892
|
|
@@ -121,7 +121,7 @@ fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,99
|
|
|
121
121
|
fbgemm_gpu/utils/torch_library.py,sha256=ywsAHjbuwesj50LjEu99WkAH17FlaVgePZ9OmFg6YE4,4193
|
|
122
122
|
list_versions/__init__.py,sha256=UmTeqCk-UJWFtlZQWvZao3xvui2w9E3X_JdOXVjRaNw,315
|
|
123
123
|
list_versions/cli_run.py,sha256=CChZoXQ-tiKaWboXAYlPVJ5w8K5zAKiKcncA087I1sc,4508
|
|
124
|
-
fbgemm_gpu_genai_nightly-2025.10.
|
|
125
|
-
fbgemm_gpu_genai_nightly-2025.10.
|
|
126
|
-
fbgemm_gpu_genai_nightly-2025.10.
|
|
127
|
-
fbgemm_gpu_genai_nightly-2025.10.
|
|
124
|
+
fbgemm_gpu_genai_nightly-2025.10.20.dist-info/METADATA,sha256=YSlW54hUiRgcqmN2NJaqU8mF-KLXXKCF_MpwTI2USC0,2656
|
|
125
|
+
fbgemm_gpu_genai_nightly-2025.10.20.dist-info/WHEEL,sha256=vUT1hK8fT5m5CAs5kDyQ_ABrvCmtd0TCp5-4vN9tR5A,108
|
|
126
|
+
fbgemm_gpu_genai_nightly-2025.10.20.dist-info/top_level.txt,sha256=_2s1Aa08r_eDn0JP4FjOhzK09Q8bVlEI7q8pMep51UY,25
|
|
127
|
+
fbgemm_gpu_genai_nightly-2025.10.20.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|