fbgemm-gpu-hstu-nightly 2025.6.15__cp39-cp39-manylinux_2_28_x86_64.whl → 2025.6.17__cp39-cp39-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fbgemm_gpu/asmjit.so CHANGED
Binary file
@@ -6,6 +6,6 @@
6
6
  # This source code is licensed under the BSD-style license found in the
7
7
  # LICENSE file in the root directory of this source tree.
8
8
 
9
- __version__: str = "2025.6.15"
9
+ __version__: str = "2025.6.17"
10
10
  __target__: str = "hstu"
11
11
  __variant__: str = "cuda"
fbgemm_gpu/fbgemm.so CHANGED
Binary file
@@ -1947,6 +1947,7 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
1947
1947
  per_sample_weights,
1948
1948
  batch_size_per_feature_per_rank,
1949
1949
  force_cast_input_types=True,
1950
+ prefetch_pipeline=False,
1950
1951
  )
1951
1952
 
1952
1953
  # Print input stats if enable (for debugging purpose only)
@@ -2478,6 +2479,7 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
2478
2479
  per_sample_weights=None,
2479
2480
  batch_size_per_feature_per_rank=batch_size_per_feature_per_rank,
2480
2481
  force_cast_input_types=False,
2482
+ prefetch_pipeline=self.prefetch_pipeline,
2481
2483
  )
2482
2484
 
2483
2485
  with self._recording_to_timer(
@@ -3543,6 +3545,7 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
3543
3545
  per_sample_weights: Optional[Tensor] = None,
3544
3546
  batch_size_per_feature_per_rank: Optional[List[List[int]]] = None,
3545
3547
  force_cast_input_types: bool = True,
3548
+ prefetch_pipeline: bool = False,
3546
3549
  ) -> Tuple[Tensor, Tensor, Optional[Tensor], invokers.lookup_args.VBEMetadata]:
3547
3550
  """
3548
3551
  Prepare TBE inputs as follows:
@@ -3613,9 +3616,17 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
3613
3616
  per_sample_weights = per_sample_weights.float()
3614
3617
 
3615
3618
  if self.bounds_check_mode_int != BoundsCheckMode.NONE.value:
3619
+ # Override the bounds check version based on prefetch_pipeline
3620
+ use_bounds_check_v2 = self.bounds_check_version == 2 or prefetch_pipeline
3621
+ bounds_check_version = (
3622
+ 2 if use_bounds_check_v2 else self.bounds_check_version
3623
+ )
3624
+
3625
+ vbe = vbe_metadata.B_offsets is not None
3626
+
3616
3627
  # Compute B info and VBE metadata for bounds_check_indices only if
3617
3628
  # VBE and bounds check indices v2 are used
3618
- if vbe and self.bounds_check_version == 2:
3629
+ if vbe and use_bounds_check_v2:
3619
3630
  B_offsets = vbe_metadata.B_offsets
3620
3631
  B_offsets_rank_per_feature = vbe_metadata.B_offsets_rank_per_feature
3621
3632
  output_offsets_feature_rank = vbe_metadata.output_offsets_feature_rank
@@ -3653,7 +3664,8 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
3653
3664
  b_t_map=b_t_map,
3654
3665
  info_B_num_bits=self.info_B_num_bits,
3655
3666
  info_B_mask=self.info_B_mask,
3656
- bounds_check_version=self.bounds_check_version,
3667
+ bounds_check_version=bounds_check_version,
3668
+ prefetch_pipeline=prefetch_pipeline,
3657
3669
  )
3658
3670
 
3659
3671
  return indices, offsets, per_sample_weights, vbe_metadata
@@ -155,4 +155,53 @@ class FileStore:
155
155
  True if file exists, False otherwise.
156
156
  """
157
157
  filepath = f"{self.bucket}/{path}"
158
- return os.path.isfile(filepath)
158
+ return os.path.exists(filepath)
159
+
160
+ def create_directory(self, path: str) -> "FileStore":
161
+ """
162
+ Creates a directory in the file store.
163
+
164
+ Args:
165
+ path (str): The path of the node or symlink to a directory (relative
166
+ to `self.bucket`) to be created.
167
+
168
+ Returns:
169
+ self. This allows for method-chaining.
170
+ """
171
+ filepath = f"{self.bucket}/{path}"
172
+ event = f"creating directory {filepath}"
173
+ logger.info(f"FileStore: {event}")
174
+
175
+ try:
176
+ if not os.path.exists(filepath):
177
+ os.makedirs(filepath, exist_ok=True)
178
+ except Exception as e:
179
+ logger.error(f"FileStore: exception occurred when {event}: {e}")
180
+ raise e
181
+
182
+ return self
183
+
184
+ def remove_directory(self, path: str) -> "FileStore":
185
+ """
186
+ Removes a directory from the file store.
187
+
188
+ Args:
189
+ path (str): The path of the node or symlink to a directory (relative
190
+ to `self.bucket`) to be removed.
191
+
192
+ Returns:
193
+ self. This allows for method-chaining.
194
+ """
195
+ filepath = f"{self.bucket}/{path}"
196
+ event = f"deleting {filepath}"
197
+ logger.info(f"FileStore: {event}")
198
+
199
+ try:
200
+ if os.path.isdir(filepath):
201
+ os.rmdir(filepath)
202
+
203
+ except Exception as e:
204
+ logger.error(f"Manifold: exception occurred when {event}: {e}")
205
+ raise e
206
+
207
+ return self
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fbgemm_gpu_hstu_nightly
3
- Version: 2025.6.15
3
+ Version: 2025.6.17
4
4
  Home-page: https://github.com/pytorch/fbgemm
5
5
  Author: FBGEMM Team
6
6
  Author-email: packages@pytorch.org
@@ -40,9 +40,6 @@ PyTorch GPU operator libraries for training and inference. The library provides
40
40
  efficient table batched embedding bag, data layout transformation, and
41
41
  quantization supports.
42
42
 
43
- FBGEMM_GPU is currently tested with CUDA 12.4 and 11.8 in CI, and with PyTorch
44
- packages (2.1+) that are built against those CUDA versions.
45
-
46
43
  See the full [Documentation](https://pytorch.org/FBGEMM) for more information
47
44
  on building, installing, and developing with FBGEMM_GPU, as well as the most
48
45
  up-to-date support matrix for this library.
@@ -1,8 +1,8 @@
1
1
  fbgemm_gpu/__init__.py,sha256=BrIitwvFsRtKEk1ZBHFUi9j6ZUgoA5K7CvepoBez0u4,3419
2
- fbgemm_gpu/asmjit.so,sha256=L1fhkss7le5eppbkBXPIWjFV4-US7s7jjlkI_wpI-Bo,488288
2
+ fbgemm_gpu/asmjit.so,sha256=j3-tHEPivCTYbFLSYhe_eZztverGclMx9ZuYPenyq3Q,501728
3
3
  fbgemm_gpu/batched_unary_embeddings_ops.py,sha256=u7LfseNeM5gGFQGLAMVO7h2QkFWEOL3ezV5RuhbZn4M,2928
4
4
  fbgemm_gpu/enums.py,sha256=GVuzF5cFTLzttkvlH1SdcGrxrppMhDSbQj_Vm_4zmEo,789
5
- fbgemm_gpu/fbgemm.so,sha256=3uXoJvcUn26NnKzoQJjqpIYYeWLAb2PjIKveQDLZNaE,5601928
5
+ fbgemm_gpu/fbgemm.so,sha256=Q3L1mCy2KdgNbY7Ad1vvNKYATp_WOTbSGQHcQfy4dPI,5453880
6
6
  fbgemm_gpu/metrics.py,sha256=TsurFLJf0nJvPDN7urWb4LMQlf5RgdWPTTTDO7S4wtI,5663
7
7
  fbgemm_gpu/permute_pooled_embedding_modules.py,sha256=kjWuWmQY8e2kMRwIPTzjGjyjV4syKPrphtHdsQTAjWM,5136
8
8
  fbgemm_gpu/permute_pooled_embedding_modules_split.py,sha256=cUrEbRIvLFW_3Zmh07QkN4S1Cfvvge6TYO1VXBFCpz8,2752
@@ -17,7 +17,7 @@ fbgemm_gpu/split_embedding_utils.py,sha256=Gb40ZKeATxIKEKI3aVQMgDDBanNpKMc53Z43m
17
17
  fbgemm_gpu/split_table_batched_embeddings_ops.py,sha256=_MIp6uHYHLn4GxGdrGsfddfSsZ2Z9mjsYIrih3ncI1I,2339
18
18
  fbgemm_gpu/split_table_batched_embeddings_ops_common.py,sha256=qglNRKKuHkrKiTw90ACjZpMzcjHKXKV7ME3a8QHfQt4,8237
19
19
  fbgemm_gpu/split_table_batched_embeddings_ops_inference.py,sha256=bUDWa6IR0vGLDThgB3nmD1yfYa8_HD34B0dtLnd7thw,81692
20
- fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=YCLPSW9CXrRwMN5KEU6x0ESbutdhzKTaNOO8oN5kX7I,163875
20
+ fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=GY3Wm5X2utjIXcjIcHwByzI38hLa9NlShtHg58wIOOU,164383
21
21
  fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py,sha256=ktC10-nakOBpcmJNCOGQsxuBCP8XTwXJ2WeEgIg91tc,5455
22
22
  fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py,sha256=7qGkO8FARku38mFYl4Bc4qL8dS1wrfyorS9l1m5ZAVA,718
23
23
  fbgemm_gpu/tbe_input_multiplexer.py,sha256=DjU7dPHgAT1avXGvgi8SFfw2Pq7yT8S_7IH8qCXoptA,3069
@@ -32,10 +32,10 @@ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65
32
32
  fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
33
33
  fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
34
34
  fbgemm_gpu/docs/sparse_ops.py,sha256=NTcTm0q9h8W2B8PKPoic2fHsAaCbCYunSa_EYK0LtHQ,21382
35
- fbgemm_gpu/docs/version.py,sha256=Wfdofi1dDfFLcvCYRMDlfYaAgz8J7SIMZPIBXTmCxcA,315
35
+ fbgemm_gpu/docs/version.py,sha256=mYY8Au7MNpxkPW4e6-KmEl1zCys0U_I92tLFTvoN8Oc,315
36
36
  fbgemm_gpu/experimental/hstu/__init__.py,sha256=KNisP6qDMwgjgxkGlqUZRNjJ_8o8R-cTmm3HxF7pSqI,1564
37
37
  fbgemm_gpu/experimental/hstu/cuda_hstu_attention.py,sha256=5425GRjJuzpXQC-TowgQOCFjZmOwv_EK0lKbURhHBTQ,9920
38
- fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so,sha256=eUv5kw6kgKc46dHVMeH_xPQB-eAIq5thOpON3Z3pbic,352287576
38
+ fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so,sha256=4RPZaDc9TkwsOXbT4_EMxvwfhJLIHvG91Yorh37lKok,352696288
39
39
  fbgemm_gpu/quantize/__init__.py,sha256=pftciXHE7csekDFkl7Ui1AWglVMMnSrOO04mREnUdb0,921
40
40
  fbgemm_gpu/quantize/quantize_ops.py,sha256=25AIOv9n2UoxamMUaI6EK1Ur4gSHxbZIReHBtgOjjCs,2228
41
41
  fbgemm_gpu/sll/__init__.py,sha256=rgXh35-OFUE54E9gGBq3NGxouGvgMv2ccY2bWUTxONY,4191
@@ -90,10 +90,10 @@ fbgemm_gpu/triton/quantize_ref.py,sha256=q4RBmFaqPVPELU52lbSgB0n26Aun7apeK7bRF2M
90
90
  fbgemm_gpu/triton/jagged/__init__.py,sha256=om0yhjuzKuE1UQakFMWHsXN4WNb8mvNkZtYofQ8hdn4,246
91
91
  fbgemm_gpu/triton/jagged/triton_jagged_tensor_ops.py,sha256=AIC1G6_QBQtMVTyOyEV4ZKJyDzu36UI_9HDgWmZIRaA,29884
92
92
  fbgemm_gpu/utils/__init__.py,sha256=JQQNdcTTaEU6ptK-OW-ZQBwTFxEZZpWOtBXWwEZm39o,354
93
- fbgemm_gpu/utils/filestore.py,sha256=Zshw1dA03m9aHMMAtETdq4bgOLocyLhzlkAUoG8VkdM,4743
93
+ fbgemm_gpu/utils/filestore.py,sha256=ijoJYDqHUQlv0OrEtLrgSjkNCreCvy5ZXHfd8atwewc,6186
94
94
  fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,990
95
95
  fbgemm_gpu/utils/torch_library.py,sha256=dQcHv1qgpu5QYlJjxjd6oeHjtxnmmXzx3PL6vjCmxL4,4199
96
- fbgemm_gpu_hstu_nightly-2025.6.15.dist-info/METADATA,sha256=SxliCSqrubFUeLb4Kc97w7fTVCFw0mTOAK4PtWGAw6U,2794
97
- fbgemm_gpu_hstu_nightly-2025.6.15.dist-info/WHEEL,sha256=LLKFkWvXQZlw1kR7spdgzB2PptbzNM_HUUPM3Q0J_XE,106
98
- fbgemm_gpu_hstu_nightly-2025.6.15.dist-info/top_level.txt,sha256=2tlbTWLkPjhqvLF_6BbqKzkcPluSE-oPRVjI8axK76I,11
99
- fbgemm_gpu_hstu_nightly-2025.6.15.dist-info/RECORD,,
96
+ fbgemm_gpu_hstu_nightly-2025.6.17.dist-info/METADATA,sha256=JW6tQAPy8jF9_SLsAZ8BY4c4YnTy3uVQaEc2ac8VyA4,2654
97
+ fbgemm_gpu_hstu_nightly-2025.6.17.dist-info/WHEEL,sha256=LLKFkWvXQZlw1kR7spdgzB2PptbzNM_HUUPM3Q0J_XE,106
98
+ fbgemm_gpu_hstu_nightly-2025.6.17.dist-info/top_level.txt,sha256=2tlbTWLkPjhqvLF_6BbqKzkcPluSE-oPRVjI8axK76I,11
99
+ fbgemm_gpu_hstu_nightly-2025.6.17.dist-info/RECORD,,