fbgemm-gpu-genai-nightly 2025.10.11__cp39-cp39-manylinux_2_28_x86_64.whl → 2025.10.12__cp39-cp39-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fbgemm-gpu-genai-nightly might be problematic. Click here for more details.
- fbgemm_gpu/asmjit.so +0 -0
- fbgemm_gpu/docs/version.py +1 -1
- fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so +0 -0
- fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so +0 -0
- fbgemm_gpu/fbgemm.so +0 -0
- fbgemm_gpu/tbe/ssd/training.py +97 -2
- {fbgemm_gpu_genai_nightly-2025.10.11.dist-info → fbgemm_gpu_genai_nightly-2025.10.12.dist-info}/METADATA +1 -1
- {fbgemm_gpu_genai_nightly-2025.10.11.dist-info → fbgemm_gpu_genai_nightly-2025.10.12.dist-info}/RECORD +10 -10
- {fbgemm_gpu_genai_nightly-2025.10.11.dist-info → fbgemm_gpu_genai_nightly-2025.10.12.dist-info}/WHEEL +0 -0
- {fbgemm_gpu_genai_nightly-2025.10.11.dist-info → fbgemm_gpu_genai_nightly-2025.10.12.dist-info}/top_level.txt +0 -0
fbgemm_gpu/asmjit.so
CHANGED
|
Binary file
|
fbgemm_gpu/docs/version.py
CHANGED
|
Binary file
|
|
Binary file
|
fbgemm_gpu/fbgemm.so
CHANGED
|
Binary file
|
fbgemm_gpu/tbe/ssd/training.py
CHANGED
|
@@ -3971,8 +3971,8 @@ class SSDTableBatchedEmbeddingBags(nn.Module):
|
|
|
3971
3971
|
self.step, stats_reporter.report_interval # pyre-ignore
|
|
3972
3972
|
)
|
|
3973
3973
|
|
|
3974
|
-
if len(dram_kv_perf_stats) !=
|
|
3975
|
-
logging.error("dram cache perf stats should have
|
|
3974
|
+
if len(dram_kv_perf_stats) != 36:
|
|
3975
|
+
logging.error("dram cache perf stats should have 36 elements")
|
|
3976
3976
|
return
|
|
3977
3977
|
|
|
3978
3978
|
dram_read_duration = dram_kv_perf_stats[0]
|
|
@@ -4002,6 +4002,19 @@ class SSDTableBatchedEmbeddingBags(nn.Module):
|
|
|
4002
4002
|
dram_kv_actual_used_chunk_bytes = dram_kv_perf_stats[21]
|
|
4003
4003
|
dram_kv_num_rows = dram_kv_perf_stats[22]
|
|
4004
4004
|
dram_kv_read_counts = dram_kv_perf_stats[23]
|
|
4005
|
+
dram_metadata_write_sharding_total_duration = dram_kv_perf_stats[24]
|
|
4006
|
+
dram_metadata_write_total_duration = dram_kv_perf_stats[25]
|
|
4007
|
+
dram_metadata_write_allocate_avg_duration = dram_kv_perf_stats[26]
|
|
4008
|
+
dram_metadata_write_lookup_cache_avg_duration = dram_kv_perf_stats[27]
|
|
4009
|
+
dram_metadata_write_acquire_lock_avg_duration = dram_kv_perf_stats[28]
|
|
4010
|
+
dram_metadata_write_cache_miss_avg_count = dram_kv_perf_stats[29]
|
|
4011
|
+
|
|
4012
|
+
dram_read_metadata_total_duration = dram_kv_perf_stats[30]
|
|
4013
|
+
dram_read_metadata_sharding_total_duration = dram_kv_perf_stats[31]
|
|
4014
|
+
dram_read_metadata_cache_hit_copy_avg_duration = dram_kv_perf_stats[32]
|
|
4015
|
+
dram_read_metadata_lookup_cache_total_avg_duration = dram_kv_perf_stats[33]
|
|
4016
|
+
dram_read_metadata_acquire_lock_avg_duration = dram_kv_perf_stats[34]
|
|
4017
|
+
dram_read_read_metadata_load_size = dram_kv_perf_stats[35]
|
|
4005
4018
|
|
|
4006
4019
|
stats_reporter.report_duration(
|
|
4007
4020
|
iteration_step=self.step,
|
|
@@ -4168,6 +4181,88 @@ class SSDTableBatchedEmbeddingBags(nn.Module):
|
|
|
4168
4181
|
data_bytes=dram_kv_num_rows,
|
|
4169
4182
|
enable_tb_metrics=True,
|
|
4170
4183
|
)
|
|
4184
|
+
stats_reporter.report_duration(
|
|
4185
|
+
iteration_step=self.step,
|
|
4186
|
+
event_name="dram_kv.perf.set.dram_eviction_score_write_sharding_total_duration_us",
|
|
4187
|
+
duration_ms=dram_metadata_write_sharding_total_duration,
|
|
4188
|
+
enable_tb_metrics=True,
|
|
4189
|
+
time_unit="us",
|
|
4190
|
+
)
|
|
4191
|
+
stats_reporter.report_duration(
|
|
4192
|
+
iteration_step=self.step,
|
|
4193
|
+
event_name="dram_kv.perf.set.dram_eviction_score_write_total_duration_us",
|
|
4194
|
+
duration_ms=dram_metadata_write_total_duration,
|
|
4195
|
+
enable_tb_metrics=True,
|
|
4196
|
+
time_unit="us",
|
|
4197
|
+
)
|
|
4198
|
+
stats_reporter.report_duration(
|
|
4199
|
+
iteration_step=self.step,
|
|
4200
|
+
event_name="dram_kv.perf.set.dram_eviction_score_write_allocate_avg_duration_us",
|
|
4201
|
+
duration_ms=dram_metadata_write_allocate_avg_duration,
|
|
4202
|
+
enable_tb_metrics=True,
|
|
4203
|
+
time_unit="us",
|
|
4204
|
+
)
|
|
4205
|
+
stats_reporter.report_duration(
|
|
4206
|
+
iteration_step=self.step,
|
|
4207
|
+
event_name="dram_kv.perf.set.dram_eviction_score_write_lookup_cache_avg_duration_us",
|
|
4208
|
+
duration_ms=dram_metadata_write_lookup_cache_avg_duration,
|
|
4209
|
+
enable_tb_metrics=True,
|
|
4210
|
+
time_unit="us",
|
|
4211
|
+
)
|
|
4212
|
+
stats_reporter.report_duration(
|
|
4213
|
+
iteration_step=self.step,
|
|
4214
|
+
event_name="dram_kv.perf.set.dram_eviction_score_write_acquire_lock_avg_duration_us",
|
|
4215
|
+
duration_ms=dram_metadata_write_acquire_lock_avg_duration,
|
|
4216
|
+
enable_tb_metrics=True,
|
|
4217
|
+
time_unit="us",
|
|
4218
|
+
)
|
|
4219
|
+
stats_reporter.report_data_amount(
|
|
4220
|
+
iteration_step=self.step,
|
|
4221
|
+
event_name="dram_kv.perf.set.dram_eviction_score_write_cache_miss_avg_count",
|
|
4222
|
+
data_bytes=dram_metadata_write_cache_miss_avg_count,
|
|
4223
|
+
enable_tb_metrics=True,
|
|
4224
|
+
)
|
|
4225
|
+
stats_reporter.report_duration(
|
|
4226
|
+
iteration_step=self.step,
|
|
4227
|
+
event_name="dram_kv.perf.get.dram_eviction_score_read_total_duration_us",
|
|
4228
|
+
duration_ms=dram_read_metadata_total_duration,
|
|
4229
|
+
enable_tb_metrics=True,
|
|
4230
|
+
time_unit="us",
|
|
4231
|
+
)
|
|
4232
|
+
stats_reporter.report_duration(
|
|
4233
|
+
iteration_step=self.step,
|
|
4234
|
+
event_name="dram_kv.perf.get.dram_eviction_score_read_sharding_total_duration_us",
|
|
4235
|
+
duration_ms=dram_read_metadata_sharding_total_duration,
|
|
4236
|
+
enable_tb_metrics=True,
|
|
4237
|
+
time_unit="us",
|
|
4238
|
+
)
|
|
4239
|
+
stats_reporter.report_duration(
|
|
4240
|
+
iteration_step=self.step,
|
|
4241
|
+
event_name="dram_kv.perf.get.dram_eviction_score_read_cache_hit_copy_avg_duration_us",
|
|
4242
|
+
duration_ms=dram_read_metadata_cache_hit_copy_avg_duration,
|
|
4243
|
+
enable_tb_metrics=True,
|
|
4244
|
+
time_unit="us",
|
|
4245
|
+
)
|
|
4246
|
+
stats_reporter.report_duration(
|
|
4247
|
+
iteration_step=self.step,
|
|
4248
|
+
event_name="dram_kv.perf.get.dram_eviction_score_read_lookup_cache_total_avg_duration_us",
|
|
4249
|
+
duration_ms=dram_read_metadata_lookup_cache_total_avg_duration,
|
|
4250
|
+
enable_tb_metrics=True,
|
|
4251
|
+
time_unit="us",
|
|
4252
|
+
)
|
|
4253
|
+
stats_reporter.report_duration(
|
|
4254
|
+
iteration_step=self.step,
|
|
4255
|
+
event_name="dram_kv.perf.get.dram_eviction_score_read_acquire_lock_avg_duration_us",
|
|
4256
|
+
duration_ms=dram_read_metadata_acquire_lock_avg_duration,
|
|
4257
|
+
enable_tb_metrics=True,
|
|
4258
|
+
time_unit="us",
|
|
4259
|
+
)
|
|
4260
|
+
stats_reporter.report_data_amount(
|
|
4261
|
+
iteration_step=self.step,
|
|
4262
|
+
event_name="dram_kv.perf.get.dram_eviction_score_read_load_size",
|
|
4263
|
+
data_bytes=dram_read_read_metadata_load_size,
|
|
4264
|
+
enable_tb_metrics=True,
|
|
4265
|
+
)
|
|
4171
4266
|
|
|
4172
4267
|
def _recording_to_timer(
|
|
4173
4268
|
self, timer: Optional[AsyncSeriesTimer], **kwargs: Any
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
fbgemm_gpu/__init__.py,sha256=FdQCmpvETH80tlIPP6W8MrOmzLaX9eoGY-fuHtVPbj0,5747
|
|
2
|
-
fbgemm_gpu/asmjit.so,sha256=
|
|
2
|
+
fbgemm_gpu/asmjit.so,sha256=RMRQVgIB3mf-bUr2BOMzrgzIv3H9arLfYnJ8CTGZ8RQ,501728
|
|
3
3
|
fbgemm_gpu/batched_unary_embeddings_ops.py,sha256=GYeJ9pg-Wc9FokXVci_npDsL6UV18-pJXID2xzrJ9O8,2904
|
|
4
4
|
fbgemm_gpu/enums.py,sha256=37ewGSfO1x7sO31ZkRiqV1yKuklfHXT5qZIxzeeGogo,755
|
|
5
|
-
fbgemm_gpu/fbgemm.so,sha256=
|
|
5
|
+
fbgemm_gpu/fbgemm.so,sha256=B9y6MDLC6Ou7Bw_pT07Xfw5g5Q4j2yn9Xsp96QVpgEU,5646712
|
|
6
6
|
fbgemm_gpu/metrics.py,sha256=TsurFLJf0nJvPDN7urWb4LMQlf5RgdWPTTTDO7S4wtI,5663
|
|
7
7
|
fbgemm_gpu/permute_pooled_embedding_modules.py,sha256=vOXMYclaGnwSt0St_SOAlAe18kz6WjMyTeHnC9jLhcE,5130
|
|
8
8
|
fbgemm_gpu/permute_pooled_embedding_modules_split.py,sha256=f3VJvH_kw9Ltd_DXtaf_PJPHmlmEWrQgzQ7MDkhh5Nw,2746
|
|
@@ -32,9 +32,9 @@ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65
|
|
|
32
32
|
fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
|
|
33
33
|
fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
|
|
34
34
|
fbgemm_gpu/docs/sparse_ops.py,sha256=gSLUFdnu8lle_6gLewFkM20wL3ek2jKLvDGMKR6POaY,27292
|
|
35
|
-
fbgemm_gpu/docs/version.py,sha256=
|
|
35
|
+
fbgemm_gpu/docs/version.py,sha256=tx4meXyr0j01--CHHgF4Lv7ZZK5ONOKhmiHp_Ggngnw,317
|
|
36
36
|
fbgemm_gpu/experimental/example/__init__.py,sha256=V_XrGMq2oNVMpzwe1srlaTaHeIcZJw5oAGbo3seM_Ks,870
|
|
37
|
-
fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=
|
|
37
|
+
fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=owFe-ilNrqChzBPf7R9BfcY3lpFA6kMuXZn_bKPb2l8,239808
|
|
38
38
|
fbgemm_gpu/experimental/example/utils.py,sha256=Je__VkMlBMLOhh7NXOocOdvaa2gz9kl9Dkqeu25tpFA,562
|
|
39
39
|
fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py,sha256=AqHefiOaN_SjP5ew7RYGuKFuSlhedOJL_6f97TtLv7c,566
|
|
40
40
|
fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py,sha256=2RjIDSzUXtoFoC2ryp-C-j5H83mbSjPwvsvTrThfrqE,215658
|
|
@@ -43,7 +43,7 @@ fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py,sha256=rbjxTMefjQWgJrWK
|
|
|
43
43
|
fbgemm_gpu/experimental/gemm/triton_gemm/matmul_perf_model.py,sha256=SltbY_dsit5e7B8lDIB_VYPrEq0t9kckthj9mQaVNfA,7571
|
|
44
44
|
fbgemm_gpu/experimental/gemm/triton_gemm/utils.py,sha256=rULXIpVaaRS3GKUZ1RHcWUrUyy0xMVREwS1SFShGgcw,4302
|
|
45
45
|
fbgemm_gpu/experimental/gen_ai/__init__.py,sha256=qwfuF5E5K4oDiH7RJkpC7zth3kAsG7wv_glCl2A_G2A,1860
|
|
46
|
-
fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=
|
|
46
|
+
fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=zsAN7i8yprDxNNsVPIGEouGZTjGR9Y4_hyYjynvkaEI,73016984
|
|
47
47
|
fbgemm_gpu/experimental/gen_ai/quantize.py,sha256=KAljWSdN-1_c5DWfT-3MDxWLMULK49Yu36t6TmQI9Tw,12599
|
|
48
48
|
fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py,sha256=oExepXpjMOwM43gARZARY0UtR-EX2zqRnSrOaQPy448,1044
|
|
49
49
|
fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py,sha256=FADVTYzS2u8fA-3iChS5CbtWd0mWF8F3lnXcwr_7vDw,7821
|
|
@@ -99,7 +99,7 @@ fbgemm_gpu/tbe/cache/split_embeddings_cache_ops.py,sha256=vZHj7KIe1DoJDy5eft29Xt
|
|
|
99
99
|
fbgemm_gpu/tbe/ssd/__init__.py,sha256=wzfMT10cp_dqK2lrebC449hOdexBnizcf_98lA1NyHs,483
|
|
100
100
|
fbgemm_gpu/tbe/ssd/common.py,sha256=1J8K7sTQswgCYWaVwF-ZdCJj7mNN6O9GI70AaZWzJGE,1044
|
|
101
101
|
fbgemm_gpu/tbe/ssd/inference.py,sha256=B_uX66ajGA9YKGlFa5TmGWs7b-b1RFigzwxmENZ9Oio,22816
|
|
102
|
-
fbgemm_gpu/tbe/ssd/training.py,sha256=
|
|
102
|
+
fbgemm_gpu/tbe/ssd/training.py,sha256=X-u33ZUpxsVUXdQ8UgVtorywAAse5YLrH-ElAG6RnJs,199517
|
|
103
103
|
fbgemm_gpu/tbe/ssd/utils/__init__.py,sha256=5DgmR2HA6NtmYh2ddkUgpDsZ6a7hF0DPedA1gMpdh18,250
|
|
104
104
|
fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py,sha256=SFg2-29b-i49LWm-FlaWUkTz2XzXbicYi_AzVj4jKNE,7601
|
|
105
105
|
fbgemm_gpu/tbe/stats/__init__.py,sha256=on29iDtq7cVNh90JR9aeFNG-K9DDoYq0JryzoplL49I,322
|
|
@@ -121,7 +121,7 @@ fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,99
|
|
|
121
121
|
fbgemm_gpu/utils/torch_library.py,sha256=ywsAHjbuwesj50LjEu99WkAH17FlaVgePZ9OmFg6YE4,4193
|
|
122
122
|
list_versions/__init__.py,sha256=UmTeqCk-UJWFtlZQWvZao3xvui2w9E3X_JdOXVjRaNw,315
|
|
123
123
|
list_versions/cli_run.py,sha256=CChZoXQ-tiKaWboXAYlPVJ5w8K5zAKiKcncA087I1sc,4508
|
|
124
|
-
fbgemm_gpu_genai_nightly-2025.10.
|
|
125
|
-
fbgemm_gpu_genai_nightly-2025.10.
|
|
126
|
-
fbgemm_gpu_genai_nightly-2025.10.
|
|
127
|
-
fbgemm_gpu_genai_nightly-2025.10.
|
|
124
|
+
fbgemm_gpu_genai_nightly-2025.10.12.dist-info/METADATA,sha256=mVnsemyIjnAIiA0h1-0U1yChU5jXX7DhKtbyEnX1nUc,2656
|
|
125
|
+
fbgemm_gpu_genai_nightly-2025.10.12.dist-info/WHEEL,sha256=LLKFkWvXQZlw1kR7spdgzB2PptbzNM_HUUPM3Q0J_XE,106
|
|
126
|
+
fbgemm_gpu_genai_nightly-2025.10.12.dist-info/top_level.txt,sha256=_2s1Aa08r_eDn0JP4FjOhzK09Q8bVlEI7q8pMep51UY,25
|
|
127
|
+
fbgemm_gpu_genai_nightly-2025.10.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|