PyPI - fbgemm-gpu-hstu-nightly - Versions diffs - 2025.6.15__cp313-cp313-manylinux_2_28_x86_64.whl → 2025.6.17__cp313-cp313-manylinux_2_28_x86_64.whl - Mend

fbgemm-gpu-hstu-nightly 2025.6.15__cp313-cp313-manylinux_2_28_x86_64.whl → 2025.6.17__cp313-cp313-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

fbgemm_gpu/asmjit.so CHANGED Viewed

Binary file

fbgemm_gpu/docs/version.py CHANGED Viewed

@@ -6,6 +6,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-__version__: str = "2025.6.15"
+__version__: str = "2025.6.17"
 __target__: str = "hstu"
 __variant__: str = "cuda"

fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so CHANGED Viewed

Binary file

fbgemm_gpu/fbgemm.so CHANGED Viewed

Binary file

fbgemm_gpu/split_table_batched_embeddings_ops_training.py CHANGED Viewed

@@ -1947,6 +1947,7 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
             per_sample_weights,
             batch_size_per_feature_per_rank,
             force_cast_input_types=True,
+            prefetch_pipeline=False,
         )
         # Print input stats if enable (for debugging purpose only)
@@ -2478,6 +2479,7 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
             per_sample_weights=None,
             batch_size_per_feature_per_rank=batch_size_per_feature_per_rank,
             force_cast_input_types=False,
+            prefetch_pipeline=self.prefetch_pipeline,
         )
         with self._recording_to_timer(
@@ -3543,6 +3545,7 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
         per_sample_weights: Optional[Tensor] = None,
         batch_size_per_feature_per_rank: Optional[List[List[int]]] = None,
         force_cast_input_types: bool = True,
+        prefetch_pipeline: bool = False,
     ) -> Tuple[Tensor, Tensor, Optional[Tensor], invokers.lookup_args.VBEMetadata]:
         """
         Prepare TBE inputs as follows:
@@ -3613,9 +3616,17 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
                 per_sample_weights = per_sample_weights.float()
         if self.bounds_check_mode_int != BoundsCheckMode.NONE.value:
+            # Override the bounds check version based on prefetch_pipeline
+            use_bounds_check_v2 = self.bounds_check_version == 2 or prefetch_pipeline
+            bounds_check_version = (
+                2 if use_bounds_check_v2 else self.bounds_check_version
+            )
+            vbe = vbe_metadata.B_offsets is not None
             # Compute B info and VBE metadata for bounds_check_indices only if
             # VBE and bounds check indices v2 are used
-            if vbe and self.bounds_check_version == 2:
+            if vbe and use_bounds_check_v2:
                 B_offsets = vbe_metadata.B_offsets
                 B_offsets_rank_per_feature = vbe_metadata.B_offsets_rank_per_feature
                 output_offsets_feature_rank = vbe_metadata.output_offsets_feature_rank
@@ -3653,7 +3664,8 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
                 b_t_map=b_t_map,
                 info_B_num_bits=self.info_B_num_bits,
                 info_B_mask=self.info_B_mask,
-                bounds_check_version=self.bounds_check_version,
+                bounds_check_version=bounds_check_version,
+                prefetch_pipeline=prefetch_pipeline,
             )
         return indices, offsets, per_sample_weights, vbe_metadata

fbgemm_gpu/utils/filestore.py CHANGED Viewed

@@ -155,4 +155,53 @@ class FileStore:
             True if file exists, False otherwise.
         """
         filepath = f"{self.bucket}/{path}"
-        return os.path.isfile(filepath)
+        return os.path.exists(filepath)
+    def create_directory(self, path: str) -> "FileStore":
+        """
+        Creates a directory in the file store.
+        Args:
+            path (str): The path of the node or symlink to a directory (relative
+            to `self.bucket`) to be created.
+        Returns:
+            self.  This allows for method-chaining.
+        """
+        filepath = f"{self.bucket}/{path}"
+        event = f"creating directory {filepath}"
+        logger.info(f"FileStore: {event}")
+        try:
+            if not os.path.exists(filepath):
+                os.makedirs(filepath, exist_ok=True)
+        except Exception as e:
+            logger.error(f"FileStore: exception occurred when {event}: {e}")
+            raise e
+        return self
+    def remove_directory(self, path: str) -> "FileStore":
+        """
+        Removes a directory from the file store.
+        Args:
+            path (str): The path of the node or symlink to a directory (relative
+            to `self.bucket`) to be removed.
+        Returns:
+            self.  This allows for method-chaining.
+        """
+        filepath = f"{self.bucket}/{path}"
+        event = f"deleting {filepath}"
+        logger.info(f"FileStore: {event}")
+        try:
+            if os.path.isdir(filepath):
+                os.rmdir(filepath)
+        except Exception as e:
+            logger.error(f"Manifold: exception occurred when {event}: {e}")
+            raise e
+        return self

{fbgemm_gpu_hstu_nightly-2025.6.15.dist-info → fbgemm_gpu_hstu_nightly-2025.6.17.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fbgemm_gpu_hstu_nightly
-Version: 2025.6.15
+Version: 2025.6.17
 Home-page: https://github.com/pytorch/fbgemm
 Author: FBGEMM Team
 Author-email: packages@pytorch.org
@@ -40,9 +40,6 @@ PyTorch GPU operator libraries for training and inference.  The library provides
 efficient table batched embedding bag, data layout transformation, and
 quantization supports.
-FBGEMM_GPU is currently tested with CUDA 12.4 and 11.8 in CI, and with PyTorch
-packages (2.1+) that are built against those CUDA versions.
 See the full [Documentation](https://pytorch.org/FBGEMM) for more information
 on building, installing, and developing with FBGEMM_GPU, as well as the most
 up-to-date support matrix for this library.

{fbgemm_gpu_hstu_nightly-2025.6.15.dist-info → fbgemm_gpu_hstu_nightly-2025.6.17.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 fbgemm_gpu/__init__.py,sha256=BrIitwvFsRtKEk1ZBHFUi9j6ZUgoA5K7CvepoBez0u4,3419
-fbgemm_gpu/asmjit.so,sha256=N7cRqpxFdbuDfCa4auIqhOARZL6mtifGbfY0OyaEuh8,501728
+fbgemm_gpu/asmjit.so,sha256=1mgsQhqX1yiUdU9p2w3e7XNhDxhMprHy8qkFKYM01Ww,488288
 fbgemm_gpu/batched_unary_embeddings_ops.py,sha256=u7LfseNeM5gGFQGLAMVO7h2QkFWEOL3ezV5RuhbZn4M,2928
 fbgemm_gpu/enums.py,sha256=GVuzF5cFTLzttkvlH1SdcGrxrppMhDSbQj_Vm_4zmEo,789
-fbgemm_gpu/fbgemm.so,sha256=l2CnLwTqGYFdS-vdhZ56w6EPfMDjeh0vIa1uQycYObU,5408824
+fbgemm_gpu/fbgemm.so,sha256=2giLGFkDpN5f6NtML_Din2J98LCdwJ0kgL_U3sbGoc0,5634864
 fbgemm_gpu/metrics.py,sha256=TsurFLJf0nJvPDN7urWb4LMQlf5RgdWPTTTDO7S4wtI,5663
 fbgemm_gpu/permute_pooled_embedding_modules.py,sha256=kjWuWmQY8e2kMRwIPTzjGjyjV4syKPrphtHdsQTAjWM,5136
 fbgemm_gpu/permute_pooled_embedding_modules_split.py,sha256=cUrEbRIvLFW_3Zmh07QkN4S1Cfvvge6TYO1VXBFCpz8,2752
@@ -17,7 +17,7 @@ fbgemm_gpu/split_embedding_utils.py,sha256=Gb40ZKeATxIKEKI3aVQMgDDBanNpKMc53Z43m
 fbgemm_gpu/split_table_batched_embeddings_ops.py,sha256=_MIp6uHYHLn4GxGdrGsfddfSsZ2Z9mjsYIrih3ncI1I,2339
 fbgemm_gpu/split_table_batched_embeddings_ops_common.py,sha256=qglNRKKuHkrKiTw90ACjZpMzcjHKXKV7ME3a8QHfQt4,8237
 fbgemm_gpu/split_table_batched_embeddings_ops_inference.py,sha256=bUDWa6IR0vGLDThgB3nmD1yfYa8_HD34B0dtLnd7thw,81692
-fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=YCLPSW9CXrRwMN5KEU6x0ESbutdhzKTaNOO8oN5kX7I,163875
+fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=GY3Wm5X2utjIXcjIcHwByzI38hLa9NlShtHg58wIOOU,164383
 fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py,sha256=ktC10-nakOBpcmJNCOGQsxuBCP8XTwXJ2WeEgIg91tc,5455
 fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py,sha256=7qGkO8FARku38mFYl4Bc4qL8dS1wrfyorS9l1m5ZAVA,718
 fbgemm_gpu/tbe_input_multiplexer.py,sha256=DjU7dPHgAT1avXGvgi8SFfw2Pq7yT8S_7IH8qCXoptA,3069
@@ -32,10 +32,10 @@ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65
 fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
 fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
 fbgemm_gpu/docs/sparse_ops.py,sha256=NTcTm0q9h8W2B8PKPoic2fHsAaCbCYunSa_EYK0LtHQ,21382
-fbgemm_gpu/docs/version.py,sha256=Wfdofi1dDfFLcvCYRMDlfYaAgz8J7SIMZPIBXTmCxcA,315
+fbgemm_gpu/docs/version.py,sha256=mYY8Au7MNpxkPW4e6-KmEl1zCys0U_I92tLFTvoN8Oc,315
 fbgemm_gpu/experimental/hstu/__init__.py,sha256=KNisP6qDMwgjgxkGlqUZRNjJ_8o8R-cTmm3HxF7pSqI,1564
 fbgemm_gpu/experimental/hstu/cuda_hstu_attention.py,sha256=5425GRjJuzpXQC-TowgQOCFjZmOwv_EK0lKbURhHBTQ,9920
-fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so,sha256=ICHJaRmXwXoi_gfj281yCbMnahYSMEjtn4t6rr_blaw,352696288
+fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so,sha256=Y-D0fGB76-GmSS0nU9dqRf3NkXvCmq86ucb8pPhtU0s,352287576
 fbgemm_gpu/quantize/__init__.py,sha256=pftciXHE7csekDFkl7Ui1AWglVMMnSrOO04mREnUdb0,921
 fbgemm_gpu/quantize/quantize_ops.py,sha256=25AIOv9n2UoxamMUaI6EK1Ur4gSHxbZIReHBtgOjjCs,2228
 fbgemm_gpu/sll/__init__.py,sha256=rgXh35-OFUE54E9gGBq3NGxouGvgMv2ccY2bWUTxONY,4191
@@ -90,10 +90,10 @@ fbgemm_gpu/triton/quantize_ref.py,sha256=q4RBmFaqPVPELU52lbSgB0n26Aun7apeK7bRF2M
 fbgemm_gpu/triton/jagged/__init__.py,sha256=om0yhjuzKuE1UQakFMWHsXN4WNb8mvNkZtYofQ8hdn4,246
 fbgemm_gpu/triton/jagged/triton_jagged_tensor_ops.py,sha256=AIC1G6_QBQtMVTyOyEV4ZKJyDzu36UI_9HDgWmZIRaA,29884
 fbgemm_gpu/utils/__init__.py,sha256=JQQNdcTTaEU6ptK-OW-ZQBwTFxEZZpWOtBXWwEZm39o,354
-fbgemm_gpu/utils/filestore.py,sha256=Zshw1dA03m9aHMMAtETdq4bgOLocyLhzlkAUoG8VkdM,4743
+fbgemm_gpu/utils/filestore.py,sha256=ijoJYDqHUQlv0OrEtLrgSjkNCreCvy5ZXHfd8atwewc,6186
 fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,990
 fbgemm_gpu/utils/torch_library.py,sha256=dQcHv1qgpu5QYlJjxjd6oeHjtxnmmXzx3PL6vjCmxL4,4199
-fbgemm_gpu_hstu_nightly-2025.6.15.dist-info/METADATA,sha256=SxliCSqrubFUeLb4Kc97w7fTVCFw0mTOAK4PtWGAw6U,2794
-fbgemm_gpu_hstu_nightly-2025.6.15.dist-info/WHEEL,sha256=Nkv8TSWVt7XcnRf1cdq5HOzycTl6Pjzlmn7gPSv4NiQ,108
-fbgemm_gpu_hstu_nightly-2025.6.15.dist-info/top_level.txt,sha256=2tlbTWLkPjhqvLF_6BbqKzkcPluSE-oPRVjI8axK76I,11
-fbgemm_gpu_hstu_nightly-2025.6.15.dist-info/RECORD,,
+fbgemm_gpu_hstu_nightly-2025.6.17.dist-info/METADATA,sha256=JW6tQAPy8jF9_SLsAZ8BY4c4YnTy3uVQaEc2ac8VyA4,2654
+fbgemm_gpu_hstu_nightly-2025.6.17.dist-info/WHEEL,sha256=Nkv8TSWVt7XcnRf1cdq5HOzycTl6Pjzlmn7gPSv4NiQ,108
+fbgemm_gpu_hstu_nightly-2025.6.17.dist-info/top_level.txt,sha256=2tlbTWLkPjhqvLF_6BbqKzkcPluSE-oPRVjI8axK76I,11
+fbgemm_gpu_hstu_nightly-2025.6.17.dist-info/RECORD,,

{fbgemm_gpu_hstu_nightly-2025.6.15.dist-info → fbgemm_gpu_hstu_nightly-2025.6.17.dist-info}/WHEEL RENAMED Viewed

File without changes

{fbgemm_gpu_hstu_nightly-2025.6.15.dist-info → fbgemm_gpu_hstu_nightly-2025.6.17.dist-info}/top_level.txt RENAMED Viewed

File without changes