fbgemm-gpu-genai-nightly 2025.10.19__cp310-cp310-manylinux_2_28_x86_64.whl → 2025.10.25__cp310-cp310-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fbgemm-gpu-genai-nightly might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
 
2
2
  {
3
- "version": "2025.10.19",
3
+ "version": "2025.10.25",
4
4
  "target": "genai",
5
5
  "variant": "cuda"
6
6
  }
@@ -3840,6 +3840,10 @@ _MATMUL_CONFIG_TUPLES_PINGPONG_4K_8K_16K = [
3840
3840
  (256, 128, 128, 1, 1, 2, 16, 1, 8, 2),
3841
3841
  (128, 256, 128, 2, 1, 2, 16, 2, 4, 1),
3842
3842
  (256, 128, 64, 2, 1, 2, 16, 1, 4, 2),
3843
+ (128, 128, 256, 2, 1, 0, 16, 2, 8, 2),
3844
+ (128, 64, 128, 2, 1, 2, 16, 2, 4, 2),
3845
+ (128, 128, 64, 2, 1, 0, 16, 1, 4, 2),
3846
+ (128, 128, 128, 1, 1, 2, 16, 1, 4, 2),
3843
3847
  ]
3844
3848
 
3845
3849
 
@@ -61,6 +61,8 @@ def _cutlass_blackwell_fmha_forward(
61
61
  softmax_scale: float | None = None,
62
62
  causal: bool = False,
63
63
  seqlen_kv: torch.Tensor | None = None,
64
+ page_table: torch.Tensor | None = None,
65
+ seqlen_k: int | None = None,
64
66
  window_left: int = -1,
65
67
  window_right: int = -1,
66
68
  bottom_right: bool = True,
@@ -79,6 +81,8 @@ def _cutlass_blackwell_fmha_forward(
79
81
  softmax_scale=softmax_scale,
80
82
  causal=causal,
81
83
  seqlen_kv=seqlen_kv,
84
+ page_table=page_table,
85
+ seqlen_k=seqlen_k,
82
86
  window_size_left=window_left,
83
87
  window_size_right=window_right,
84
88
  bottom_right=bottom_right,
@@ -171,6 +175,8 @@ class CutlassBlackwellFmhaFunc(torch.autograd.Function):
171
175
  max_seq_len_q: Optional[int] = None,
172
176
  max_seq_len_k: Optional[int] = None,
173
177
  seqlen_kv: Optional[torch.Tensor] = None,
178
+ page_table: Optional[torch.Tensor] = None,
179
+ seqlen_k: Optional[int] = None,
174
180
  window_size: tuple[int, int] = (-1, -1),
175
181
  bottom_right: bool = True,
176
182
  deterministic: bool = False,
@@ -220,6 +226,8 @@ class CutlassBlackwellFmhaFunc(torch.autograd.Function):
220
226
  softmax_scale,
221
227
  causal,
222
228
  seqlen_kv,
229
+ page_table,
230
+ seqlen_k,
223
231
  window_left,
224
232
  window_right,
225
233
  bottom_right,
@@ -252,6 +260,8 @@ class CutlassBlackwellFmhaFunc(torch.autograd.Function):
252
260
  None,
253
261
  None,
254
262
  None,
263
+ None,
264
+ None,
255
265
  ]:
256
266
  if ctx.is_gen:
257
267
  # For gen case, no backward pass is needed (generation is inference only)
@@ -279,7 +289,23 @@ class CutlassBlackwellFmhaFunc(torch.autograd.Function):
279
289
  bottom_right=ctx.bottom_right,
280
290
  deterministic=ctx.deterministic,
281
291
  )
282
- return dq, dk, dv, None, None, None, None, None, None, None, None, None, None
292
+ return (
293
+ dq,
294
+ dk,
295
+ dv,
296
+ None,
297
+ None,
298
+ None,
299
+ None,
300
+ None,
301
+ None,
302
+ None,
303
+ None,
304
+ None,
305
+ None,
306
+ None,
307
+ None,
308
+ )
283
309
 
284
310
 
285
311
  def cutlass_blackwell_fmha_func(
@@ -293,6 +319,8 @@ def cutlass_blackwell_fmha_func(
293
319
  max_seq_len_q: int | None = None,
294
320
  max_seq_len_k: int | None = None,
295
321
  seqlen_kv: torch.Tensor | None = None,
322
+ page_table: torch.Tensor | None = None,
323
+ seqlen_k: int | None = None,
296
324
  window_size: tuple[int, int] | None = (-1, -1),
297
325
  bottom_right: bool = True,
298
326
  deterministic: bool = False,
@@ -308,6 +336,8 @@ def cutlass_blackwell_fmha_func(
308
336
  max_seq_len_q,
309
337
  max_seq_len_k,
310
338
  seqlen_kv,
339
+ page_table,
340
+ seqlen_k,
311
341
  window_size,
312
342
  bottom_right,
313
343
  deterministic,
@@ -1722,6 +1722,119 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
1722
1722
  tensor = getattr(self, tensor_name)
1723
1723
  return tensor.numel() * tensor.element_size()
1724
1724
 
1725
+ def _categorize_memory_by_location(
1726
+ self, tensor_names: list[str]
1727
+ ) -> tuple[int, int]:
1728
+ """Categorize memory into HBM and UVM for given tensors.
1729
+
1730
+ Returns:
1731
+ (hbm_bytes, uvm_bytes)
1732
+ """
1733
+ uvm_set = set(self._uvm_tensors_log)
1734
+ hbm_bytes = 0
1735
+ uvm_bytes = 0
1736
+
1737
+ for name in tensor_names:
1738
+ size = self._get_tensor_memory(name)
1739
+ if name in uvm_set:
1740
+ uvm_bytes += size
1741
+ else:
1742
+ hbm_bytes += size
1743
+
1744
+ return hbm_bytes, uvm_bytes
1745
+
1746
+ def _report_hbm_breakdown(
1747
+ self,
1748
+ stats_reporter: TBEStatsReporter,
1749
+ embeddings: int,
1750
+ optimizer_states: int,
1751
+ cache: int,
1752
+ total_static_sparse: int,
1753
+ ephemeral: int,
1754
+ ) -> None:
1755
+ """Report HBM memory breakdown to stats reporter."""
1756
+ stats_reporter.report_data_amount(
1757
+ iteration_step=self.step,
1758
+ event_name="tbe.hbm.embeddings",
1759
+ data_bytes=embeddings,
1760
+ embedding_id=self.logging_table_name,
1761
+ tbe_id=self.uuid,
1762
+ )
1763
+ stats_reporter.report_data_amount(
1764
+ iteration_step=self.step,
1765
+ event_name="tbe.hbm.optimizer_states",
1766
+ data_bytes=optimizer_states,
1767
+ embedding_id=self.logging_table_name,
1768
+ tbe_id=self.uuid,
1769
+ )
1770
+ stats_reporter.report_data_amount(
1771
+ iteration_step=self.step,
1772
+ event_name="tbe.hbm.cache",
1773
+ data_bytes=cache,
1774
+ embedding_id=self.logging_table_name,
1775
+ tbe_id=self.uuid,
1776
+ )
1777
+ stats_reporter.report_data_amount(
1778
+ iteration_step=self.step,
1779
+ event_name="tbe.hbm.total_static_sparse",
1780
+ data_bytes=total_static_sparse,
1781
+ embedding_id=self.logging_table_name,
1782
+ tbe_id=self.uuid,
1783
+ )
1784
+ stats_reporter.report_data_amount(
1785
+ iteration_step=self.step,
1786
+ event_name="tbe.hbm.ephemeral",
1787
+ data_bytes=ephemeral,
1788
+ embedding_id=self.logging_table_name,
1789
+ tbe_id=self.uuid,
1790
+ )
1791
+
1792
+ def _report_uvm_breakdown(
1793
+ self,
1794
+ stats_reporter: TBEStatsReporter,
1795
+ embeddings: int,
1796
+ optimizer_states: int,
1797
+ cache: int,
1798
+ total_static_sparse: int,
1799
+ ephemeral: int,
1800
+ ) -> None:
1801
+ """Report UVM memory breakdown to stats reporter."""
1802
+ stats_reporter.report_data_amount(
1803
+ iteration_step=self.step,
1804
+ event_name="tbe.uvm.embeddings",
1805
+ data_bytes=embeddings,
1806
+ embedding_id=self.logging_table_name,
1807
+ tbe_id=self.uuid,
1808
+ )
1809
+ stats_reporter.report_data_amount(
1810
+ iteration_step=self.step,
1811
+ event_name="tbe.uvm.optimizer_states",
1812
+ data_bytes=optimizer_states,
1813
+ embedding_id=self.logging_table_name,
1814
+ tbe_id=self.uuid,
1815
+ )
1816
+ stats_reporter.report_data_amount(
1817
+ iteration_step=self.step,
1818
+ event_name="tbe.uvm.cache",
1819
+ data_bytes=cache,
1820
+ embedding_id=self.logging_table_name,
1821
+ tbe_id=self.uuid,
1822
+ )
1823
+ stats_reporter.report_data_amount(
1824
+ iteration_step=self.step,
1825
+ event_name="tbe.uvm.total_static_sparse",
1826
+ data_bytes=total_static_sparse,
1827
+ embedding_id=self.logging_table_name,
1828
+ tbe_id=self.uuid,
1829
+ )
1830
+ stats_reporter.report_data_amount(
1831
+ iteration_step=self.step,
1832
+ event_name="tbe.uvm.ephemeral",
1833
+ data_bytes=ephemeral,
1834
+ embedding_id=self.logging_table_name,
1835
+ tbe_id=self.uuid,
1836
+ )
1837
+
1725
1838
  @torch.jit.ignore
1726
1839
  def _report_tbe_mem_usage(self) -> None:
1727
1840
  if self.stats_reporter is None:
@@ -1731,10 +1844,12 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
1731
1844
  if not stats_reporter.should_report(self.step):
1732
1845
  return
1733
1846
 
1847
+ # Calculate total memory from all parameters and buffers (always needed)
1734
1848
  total_mem_usage = sum(
1735
1849
  p.numel() * p.element_size() for p in self.parameters()
1736
1850
  ) + sum(b.numel() * b.element_size() for b in self.buffers())
1737
1851
 
1852
+ # Calculate total HBM and UVM usage (always needed)
1738
1853
  if self.use_cpu:
1739
1854
  total_hbm_usage = 0
1740
1855
  total_uvm_usage = total_mem_usage
@@ -1746,6 +1861,7 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
1746
1861
  )
1747
1862
  total_hbm_usage = total_mem_usage - total_uvm_usage
1748
1863
 
1864
+ # Report total memory usage metrics (always reported for backward compatibility)
1749
1865
  stats_reporter.report_data_amount(
1750
1866
  iteration_step=self.step,
1751
1867
  event_name="tbe.total_hbm_usage",
@@ -1761,6 +1877,76 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
1761
1877
  tbe_id=self.uuid,
1762
1878
  )
1763
1879
 
1880
+ # Check if detailed memory breakdown is enabled via environment variable
1881
+ # Set FBGEMM_TBE_MEM_BREAKDOWN=1 to enable expensive detailed breakdown
1882
+ enable_detailed_breakdown = (
1883
+ int(os.environ.get("FBGEMM_TBE_MEM_BREAKDOWN", "0")) == 1
1884
+ )
1885
+
1886
+ if not enable_detailed_breakdown:
1887
+ return
1888
+
1889
+ # Tensor groups for sparse memory categorization
1890
+ weight_tensors = ["weights_dev", "weights_host", "weights_uvm"]
1891
+ optimizer_tensors = [
1892
+ "momentum1_dev",
1893
+ "momentum1_host",
1894
+ "momentum1_uvm",
1895
+ "momentum2_dev",
1896
+ "momentum2_host",
1897
+ "momentum2_uvm",
1898
+ ]
1899
+ cache_tensors = [
1900
+ "lxu_cache_weights",
1901
+ "lxu_cache_state",
1902
+ "lxu_state",
1903
+ "cache_hash_size_cumsum",
1904
+ "cache_index_table_map",
1905
+ "cache_miss_counter",
1906
+ "lxu_cache_locking_counter",
1907
+ ]
1908
+
1909
+ # Calculate total memory for each component
1910
+ weights_total = sum(self._get_tensor_memory(t) for t in weight_tensors)
1911
+ optimizer_total = sum(self._get_tensor_memory(t) for t in optimizer_tensors)
1912
+ cache_total = sum(self._get_tensor_memory(t) for t in cache_tensors)
1913
+
1914
+ # Categorize memory by location (HBM vs UVM)
1915
+ if self.use_cpu:
1916
+ weights_hbm, weights_uvm = 0, weights_total
1917
+ opt_hbm, opt_uvm = 0, optimizer_total
1918
+ cache_hbm, cache_uvm = 0, cache_total
1919
+ else:
1920
+ weights_hbm, weights_uvm = self._categorize_memory_by_location(
1921
+ weight_tensors
1922
+ )
1923
+ opt_hbm, opt_uvm = self._categorize_memory_by_location(optimizer_tensors)
1924
+ cache_hbm, cache_uvm = self._categorize_memory_by_location(cache_tensors)
1925
+
1926
+ # Calculate ephemeral memory split between HBM and UVM
1927
+ static_sparse_hbm = weights_hbm + opt_hbm + cache_hbm
1928
+ static_sparse_uvm = weights_uvm + opt_uvm + cache_uvm
1929
+ ephemeral_hbm = total_hbm_usage - static_sparse_hbm
1930
+ ephemeral_uvm = total_uvm_usage - static_sparse_uvm
1931
+
1932
+ # Report granular memory breakdowns
1933
+ self._report_hbm_breakdown(
1934
+ stats_reporter,
1935
+ weights_hbm,
1936
+ opt_hbm,
1937
+ cache_hbm,
1938
+ static_sparse_hbm,
1939
+ ephemeral_hbm,
1940
+ )
1941
+ self._report_uvm_breakdown(
1942
+ stats_reporter,
1943
+ weights_uvm,
1944
+ opt_uvm,
1945
+ cache_uvm,
1946
+ static_sparse_uvm,
1947
+ ephemeral_uvm,
1948
+ )
1949
+
1764
1950
  @torch.jit.ignore
1765
1951
  def _report_io_size_count(self, event: str, data: Tensor) -> Tensor:
1766
1952
  if self.stats_reporter is None:
@@ -76,6 +76,7 @@ class KVEmbeddingInference(IntNBitTableBatchedEmbeddingBagsCodegen):
76
76
  reverse_qparam: bool = False, # True to load qparams at end of each row; False to load qparam at begnning of each row.
77
77
  feature_names_per_table: Optional[list[list[str]]] = None,
78
78
  indices_dtype: torch.dtype = torch.int32, # Used for construction of the remap_indices tensors. Should match the dtype of the indices passed in the forward() call (INT32 or INT64).
79
+ embedding_cache_mode: bool = False, # True for zero initialization, False for randomized initialization
79
80
  ) -> None: # noqa C901 # tuple of (rows, dims,)
80
81
  super(KVEmbeddingInference, self).__init__(
81
82
  embedding_specs=embedding_specs,
@@ -114,9 +115,13 @@ class KVEmbeddingInference(IntNBitTableBatchedEmbeddingBagsCodegen):
114
115
  num_shards = 32
115
116
  uniform_init_lower: float = -0.01
116
117
  uniform_init_upper: float = 0.01
118
+
117
119
  # pyre-fixme[4]: Attribute must be annotated.
118
120
  self.kv_embedding_cache = torch.classes.fbgemm.DramKVEmbeddingInferenceWrapper(
119
- num_shards, uniform_init_lower, uniform_init_upper
121
+ num_shards,
122
+ uniform_init_lower,
123
+ uniform_init_upper,
124
+ embedding_cache_mode, # in embedding_cache_mode, we disable random init
120
125
  )
121
126
 
122
127
  self.specs: list[tuple[int, int, int]] = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fbgemm_gpu_genai_nightly
3
- Version: 2025.10.19
3
+ Version: 2025.10.25
4
4
  Home-page: https://github.com/pytorch/fbgemm
5
5
  Author: FBGEMM Team
6
6
  Author-email: packages@pytorch.org
@@ -17,7 +17,7 @@ fbgemm_gpu/split_embedding_utils.py,sha256=Gb40ZKeATxIKEKI3aVQMgDDBanNpKMc53Z43m
17
17
  fbgemm_gpu/split_table_batched_embeddings_ops.py,sha256=_MIp6uHYHLn4GxGdrGsfddfSsZ2Z9mjsYIrih3ncI1I,2339
18
18
  fbgemm_gpu/split_table_batched_embeddings_ops_common.py,sha256=76ME0692CC691xpjiOsY3Xxy-LD_XKs8w9vq1gcm9tM,16440
19
19
  fbgemm_gpu/split_table_batched_embeddings_ops_inference.py,sha256=dGC85xjQiRUrequBibSf9oMAVHT5Q49zsVo2zW4n_88,81679
20
- fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=nbrdPt2WYeVB1BDyToa4vfl_XiOza5dEGjSB8jCV_mY,173930
20
+ fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=f0sXfvkE0Wx0Rd3qTT4XmCbBK0wYgWGzhPncZEv-p48,180420
21
21
  fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py,sha256=e3O9ElaWBGvG7TdT3Ok_8cB06jhskXuyCQ0t40dzsEY,5449
22
22
  fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py,sha256=7qGkO8FARku38mFYl4Bc4qL8dS1wrfyorS9l1m5ZAVA,718
23
23
  fbgemm_gpu/tbe_input_multiplexer.py,sha256=TQjwkJ2JkOaQsMYuRdk9RbNa9759EPEtx8bYclChtZY,3063
@@ -32,22 +32,22 @@ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65
32
32
  fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
33
33
  fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
34
34
  fbgemm_gpu/docs/sparse_ops.py,sha256=gSLUFdnu8lle_6gLewFkM20wL3ek2jKLvDGMKR6POaY,27292
35
- fbgemm_gpu/docs/target.genai.json.py,sha256=jEkWy-JyNnwHPxd3flNvyzdyJLdp7YlavRyw46AFi2E,79
35
+ fbgemm_gpu/docs/target.genai.json.py,sha256=zheBID2LxrSDF8HifsFuVZUqVl4YgiUCdj1Xr8ty-O8,79
36
36
  fbgemm_gpu/experimental/example/__init__.py,sha256=OvJHZgWnycL1gWKyCXFJCTKuys3KAqx4iadjx3R-tBQ,723
37
37
  fbgemm_gpu/experimental/example/fbgemm_gpu_experimental_example_py.so,sha256=PGtZj3tM9mq65PGD08gEiTlj5PsvGaqJ_VkCvveHIIk,243904
38
38
  fbgemm_gpu/experimental/example/utils.py,sha256=Je__VkMlBMLOhh7NXOocOdvaa2gz9kl9Dkqeu25tpFA,562
39
39
  fbgemm_gpu/experimental/gemm/triton_gemm/__init__.py,sha256=1CqUfzlYyXTvU-BNaUq4RZpLV-2lKAVCAHeJzSIZFWw,419
40
40
  fbgemm_gpu/experimental/gemm/triton_gemm/fp4_quantize.py,sha256=2RjIDSzUXtoFoC2ryp-C-j5H83mbSjPwvsvTrThfrqE,215658
41
- fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py,sha256=5m4SdgUsf2rM_Vul8czgRn_5oVnyi-52TmeidXh05hg,152754
41
+ fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py,sha256=q1o0FfGcUAQjkxKlJjjqKVSaPd3HaBSs6L9qVHY7qKI,152924
42
42
  fbgemm_gpu/experimental/gemm/triton_gemm/grouped_gemm.py,sha256=rbjxTMefjQWgJrWK_bYFtBklJigFwv4awPeVexkkiIA,44511
43
43
  fbgemm_gpu/experimental/gemm/triton_gemm/matmul_perf_model.py,sha256=SltbY_dsit5e7B8lDIB_VYPrEq0t9kckthj9mQaVNfA,7571
44
44
  fbgemm_gpu/experimental/gemm/triton_gemm/utils.py,sha256=rULXIpVaaRS3GKUZ1RHcWUrUyy0xMVREwS1SFShGgcw,4302
45
45
  fbgemm_gpu/experimental/gen_ai/__init__.py,sha256=r3NlNCXuIh0pfKwKU5v14y6AZkpoIkKWbtzxSprgeKA,1713
46
- fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=xStScVAbpzqt0A4gTDkyqWEjamP2hr2Yqz9x17-K-3Q,73184824
46
+ fbgemm_gpu/experimental/gen_ai/fbgemm_gpu_experimental_gen_ai.so,sha256=634_cv9QwuB1pBLoHNdY0zL57T0ByunmBUnFM795WOQ,74888976
47
47
  fbgemm_gpu/experimental/gen_ai/quantize.py,sha256=KAljWSdN-1_c5DWfT-3MDxWLMULK49Yu36t6TmQI9Tw,12599
48
48
  fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/__init__.py,sha256=ntFgFs0foi6NQx8eqs5I3fCjzKSI0spXfEWiMhlcT00,897
49
49
  fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_custom_op.py,sha256=FADVTYzS2u8fA-3iChS5CbtWd0mWF8F3lnXcwr_7vDw,7821
50
- fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_interface.py,sha256=7ydkrZ6qqyiah1dlJX6EuEXXw6WwOqCj7D48PWNJcUw,9259
50
+ fbgemm_gpu/experimental/gen_ai/attention/cutlass_blackwell_fmha/cutlass_blackwell_fmha_interface.py,sha256=K9cPXGOF4E9VHzuVtJjDPoTC7JjhEqS1RmmWSehQrKU,9887
51
51
  fbgemm_gpu/experimental/gen_ai/bench/__init__.py,sha256=XpAK_eyqDSKeFC5J9KpnKtbZG07mrDh9d2j1LFKzr-8,404
52
52
  fbgemm_gpu/experimental/gen_ai/bench/comm_bench.py,sha256=ApEyJOf_rdIo8V_EgvhZXBGNov8ITC_dnB95v8szulI,8515
53
53
  fbgemm_gpu/experimental/gen_ai/bench/gather_scatter_bench.py,sha256=K9Nib6D7xJbw1QwEVuCJrVyI1qs988moo3cieVKYuFY,12057
@@ -94,7 +94,7 @@ fbgemm_gpu/tbe/bench/tbe_data_config_loader.py,sha256=MNddYzoRlu0mNhnsVVG57JN7pB
94
94
  fbgemm_gpu/tbe/bench/tbe_data_config_param_models.py,sha256=sptdqcNE9JlgyIJ17neZaMxagKG469_ynX0mVx_JKBY,6090
95
95
  fbgemm_gpu/tbe/bench/utils.py,sha256=cq_6FJHlgZ5femAK6XKpj7nJ9jc03qXI16N1ht1CcLg,1721
96
96
  fbgemm_gpu/tbe/cache/__init__.py,sha256=lrYwhvqX2eWN0vAPe89HYgMW_O1vccoOcoFHJ9cyM-s,398
97
- fbgemm_gpu/tbe/cache/kv_embedding_ops_inference.py,sha256=m8rCF8bc_5vBrg9677TDZTQXqRdFt6YPUVVKv85up5s,14380
97
+ fbgemm_gpu/tbe/cache/kv_embedding_ops_inference.py,sha256=VmG9EennGcq2By8Tj8VkFsJG0oOCGw8EhlPo8-t--Fk,14604
98
98
  fbgemm_gpu/tbe/cache/split_embeddings_cache_ops.py,sha256=vZHj7KIe1DoJDy5eft29XtGg6I-tRx60tjKOcTHRAYI,1321
99
99
  fbgemm_gpu/tbe/ssd/__init__.py,sha256=wzfMT10cp_dqK2lrebC449hOdexBnizcf_98lA1NyHs,483
100
100
  fbgemm_gpu/tbe/ssd/common.py,sha256=1J8K7sTQswgCYWaVwF-ZdCJj7mNN6O9GI70AaZWzJGE,1044
@@ -121,7 +121,7 @@ fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,99
121
121
  fbgemm_gpu/utils/torch_library.py,sha256=ywsAHjbuwesj50LjEu99WkAH17FlaVgePZ9OmFg6YE4,4193
122
122
  list_versions/__init__.py,sha256=UmTeqCk-UJWFtlZQWvZao3xvui2w9E3X_JdOXVjRaNw,315
123
123
  list_versions/cli_run.py,sha256=CChZoXQ-tiKaWboXAYlPVJ5w8K5zAKiKcncA087I1sc,4508
124
- fbgemm_gpu_genai_nightly-2025.10.19.dist-info/METADATA,sha256=lB9fb2fZ26k7aR-2jJN6pPU1nuW0KX12jcQ72MOviR4,2656
125
- fbgemm_gpu_genai_nightly-2025.10.19.dist-info/WHEEL,sha256=k9CVMKlTmOLLXq_OyiiJFbPd6UKfogV4yIUezgPmplE,108
126
- fbgemm_gpu_genai_nightly-2025.10.19.dist-info/top_level.txt,sha256=_2s1Aa08r_eDn0JP4FjOhzK09Q8bVlEI7q8pMep51UY,25
127
- fbgemm_gpu_genai_nightly-2025.10.19.dist-info/RECORD,,
124
+ fbgemm_gpu_genai_nightly-2025.10.25.dist-info/METADATA,sha256=nAiko7_2Se0u8j18sS-uwInAjpc9TEsVEq0Jn_YNmi4,2656
125
+ fbgemm_gpu_genai_nightly-2025.10.25.dist-info/WHEEL,sha256=k9CVMKlTmOLLXq_OyiiJFbPd6UKfogV4yIUezgPmplE,108
126
+ fbgemm_gpu_genai_nightly-2025.10.25.dist-info/top_level.txt,sha256=_2s1Aa08r_eDn0JP4FjOhzK09Q8bVlEI7q8pMep51UY,25
127
+ fbgemm_gpu_genai_nightly-2025.10.25.dist-info/RECORD,,