fbgemm-gpu-hstu-nightly 2025.6.15__cp313-cp313-manylinux_2_28_x86_64.whl → 2025.6.17__cp313-cp313-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fbgemm_gpu/asmjit.so +0 -0
- fbgemm_gpu/docs/version.py +1 -1
- fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so +0 -0
- fbgemm_gpu/fbgemm.so +0 -0
- fbgemm_gpu/split_table_batched_embeddings_ops_training.py +14 -2
- fbgemm_gpu/utils/filestore.py +50 -1
- {fbgemm_gpu_hstu_nightly-2025.6.15.dist-info → fbgemm_gpu_hstu_nightly-2025.6.17.dist-info}/METADATA +1 -4
- {fbgemm_gpu_hstu_nightly-2025.6.15.dist-info → fbgemm_gpu_hstu_nightly-2025.6.17.dist-info}/RECORD +10 -10
- {fbgemm_gpu_hstu_nightly-2025.6.15.dist-info → fbgemm_gpu_hstu_nightly-2025.6.17.dist-info}/WHEEL +0 -0
- {fbgemm_gpu_hstu_nightly-2025.6.15.dist-info → fbgemm_gpu_hstu_nightly-2025.6.17.dist-info}/top_level.txt +0 -0
fbgemm_gpu/asmjit.so
CHANGED
|
Binary file
|
fbgemm_gpu/docs/version.py
CHANGED
|
Binary file
|
fbgemm_gpu/fbgemm.so
CHANGED
|
Binary file
|
|
@@ -1947,6 +1947,7 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
|
|
|
1947
1947
|
per_sample_weights,
|
|
1948
1948
|
batch_size_per_feature_per_rank,
|
|
1949
1949
|
force_cast_input_types=True,
|
|
1950
|
+
prefetch_pipeline=False,
|
|
1950
1951
|
)
|
|
1951
1952
|
|
|
1952
1953
|
# Print input stats if enable (for debugging purpose only)
|
|
@@ -2478,6 +2479,7 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
|
|
|
2478
2479
|
per_sample_weights=None,
|
|
2479
2480
|
batch_size_per_feature_per_rank=batch_size_per_feature_per_rank,
|
|
2480
2481
|
force_cast_input_types=False,
|
|
2482
|
+
prefetch_pipeline=self.prefetch_pipeline,
|
|
2481
2483
|
)
|
|
2482
2484
|
|
|
2483
2485
|
with self._recording_to_timer(
|
|
@@ -3543,6 +3545,7 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
|
|
|
3543
3545
|
per_sample_weights: Optional[Tensor] = None,
|
|
3544
3546
|
batch_size_per_feature_per_rank: Optional[List[List[int]]] = None,
|
|
3545
3547
|
force_cast_input_types: bool = True,
|
|
3548
|
+
prefetch_pipeline: bool = False,
|
|
3546
3549
|
) -> Tuple[Tensor, Tensor, Optional[Tensor], invokers.lookup_args.VBEMetadata]:
|
|
3547
3550
|
"""
|
|
3548
3551
|
Prepare TBE inputs as follows:
|
|
@@ -3613,9 +3616,17 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
|
|
|
3613
3616
|
per_sample_weights = per_sample_weights.float()
|
|
3614
3617
|
|
|
3615
3618
|
if self.bounds_check_mode_int != BoundsCheckMode.NONE.value:
|
|
3619
|
+
# Override the bounds check version based on prefetch_pipeline
|
|
3620
|
+
use_bounds_check_v2 = self.bounds_check_version == 2 or prefetch_pipeline
|
|
3621
|
+
bounds_check_version = (
|
|
3622
|
+
2 if use_bounds_check_v2 else self.bounds_check_version
|
|
3623
|
+
)
|
|
3624
|
+
|
|
3625
|
+
vbe = vbe_metadata.B_offsets is not None
|
|
3626
|
+
|
|
3616
3627
|
# Compute B info and VBE metadata for bounds_check_indices only if
|
|
3617
3628
|
# VBE and bounds check indices v2 are used
|
|
3618
|
-
if vbe and
|
|
3629
|
+
if vbe and use_bounds_check_v2:
|
|
3619
3630
|
B_offsets = vbe_metadata.B_offsets
|
|
3620
3631
|
B_offsets_rank_per_feature = vbe_metadata.B_offsets_rank_per_feature
|
|
3621
3632
|
output_offsets_feature_rank = vbe_metadata.output_offsets_feature_rank
|
|
@@ -3653,7 +3664,8 @@ class SplitTableBatchedEmbeddingBagsCodegen(nn.Module):
|
|
|
3653
3664
|
b_t_map=b_t_map,
|
|
3654
3665
|
info_B_num_bits=self.info_B_num_bits,
|
|
3655
3666
|
info_B_mask=self.info_B_mask,
|
|
3656
|
-
bounds_check_version=
|
|
3667
|
+
bounds_check_version=bounds_check_version,
|
|
3668
|
+
prefetch_pipeline=prefetch_pipeline,
|
|
3657
3669
|
)
|
|
3658
3670
|
|
|
3659
3671
|
return indices, offsets, per_sample_weights, vbe_metadata
|
fbgemm_gpu/utils/filestore.py
CHANGED
|
@@ -155,4 +155,53 @@ class FileStore:
|
|
|
155
155
|
True if file exists, False otherwise.
|
|
156
156
|
"""
|
|
157
157
|
filepath = f"{self.bucket}/{path}"
|
|
158
|
-
return os.path.
|
|
158
|
+
return os.path.exists(filepath)
|
|
159
|
+
|
|
160
|
+
def create_directory(self, path: str) -> "FileStore":
|
|
161
|
+
"""
|
|
162
|
+
Creates a directory in the file store.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
path (str): The path of the node or symlink to a directory (relative
|
|
166
|
+
to `self.bucket`) to be created.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
self. This allows for method-chaining.
|
|
170
|
+
"""
|
|
171
|
+
filepath = f"{self.bucket}/{path}"
|
|
172
|
+
event = f"creating directory {filepath}"
|
|
173
|
+
logger.info(f"FileStore: {event}")
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
if not os.path.exists(filepath):
|
|
177
|
+
os.makedirs(filepath, exist_ok=True)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
logger.error(f"FileStore: exception occurred when {event}: {e}")
|
|
180
|
+
raise e
|
|
181
|
+
|
|
182
|
+
return self
|
|
183
|
+
|
|
184
|
+
def remove_directory(self, path: str) -> "FileStore":
|
|
185
|
+
"""
|
|
186
|
+
Removes a directory from the file store.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
path (str): The path of the node or symlink to a directory (relative
|
|
190
|
+
to `self.bucket`) to be removed.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
self. This allows for method-chaining.
|
|
194
|
+
"""
|
|
195
|
+
filepath = f"{self.bucket}/{path}"
|
|
196
|
+
event = f"deleting {filepath}"
|
|
197
|
+
logger.info(f"FileStore: {event}")
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
if os.path.isdir(filepath):
|
|
201
|
+
os.rmdir(filepath)
|
|
202
|
+
|
|
203
|
+
except Exception as e:
|
|
204
|
+
logger.error(f"Manifold: exception occurred when {event}: {e}")
|
|
205
|
+
raise e
|
|
206
|
+
|
|
207
|
+
return self
|
{fbgemm_gpu_hstu_nightly-2025.6.15.dist-info → fbgemm_gpu_hstu_nightly-2025.6.17.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fbgemm_gpu_hstu_nightly
|
|
3
|
-
Version: 2025.6.
|
|
3
|
+
Version: 2025.6.17
|
|
4
4
|
Home-page: https://github.com/pytorch/fbgemm
|
|
5
5
|
Author: FBGEMM Team
|
|
6
6
|
Author-email: packages@pytorch.org
|
|
@@ -40,9 +40,6 @@ PyTorch GPU operator libraries for training and inference. The library provides
|
|
|
40
40
|
efficient table batched embedding bag, data layout transformation, and
|
|
41
41
|
quantization supports.
|
|
42
42
|
|
|
43
|
-
FBGEMM_GPU is currently tested with CUDA 12.4 and 11.8 in CI, and with PyTorch
|
|
44
|
-
packages (2.1+) that are built against those CUDA versions.
|
|
45
|
-
|
|
46
43
|
See the full [Documentation](https://pytorch.org/FBGEMM) for more information
|
|
47
44
|
on building, installing, and developing with FBGEMM_GPU, as well as the most
|
|
48
45
|
up-to-date support matrix for this library.
|
{fbgemm_gpu_hstu_nightly-2025.6.15.dist-info → fbgemm_gpu_hstu_nightly-2025.6.17.dist-info}/RECORD
RENAMED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
fbgemm_gpu/__init__.py,sha256=BrIitwvFsRtKEk1ZBHFUi9j6ZUgoA5K7CvepoBez0u4,3419
|
|
2
|
-
fbgemm_gpu/asmjit.so,sha256=
|
|
2
|
+
fbgemm_gpu/asmjit.so,sha256=1mgsQhqX1yiUdU9p2w3e7XNhDxhMprHy8qkFKYM01Ww,488288
|
|
3
3
|
fbgemm_gpu/batched_unary_embeddings_ops.py,sha256=u7LfseNeM5gGFQGLAMVO7h2QkFWEOL3ezV5RuhbZn4M,2928
|
|
4
4
|
fbgemm_gpu/enums.py,sha256=GVuzF5cFTLzttkvlH1SdcGrxrppMhDSbQj_Vm_4zmEo,789
|
|
5
|
-
fbgemm_gpu/fbgemm.so,sha256=
|
|
5
|
+
fbgemm_gpu/fbgemm.so,sha256=2giLGFkDpN5f6NtML_Din2J98LCdwJ0kgL_U3sbGoc0,5634864
|
|
6
6
|
fbgemm_gpu/metrics.py,sha256=TsurFLJf0nJvPDN7urWb4LMQlf5RgdWPTTTDO7S4wtI,5663
|
|
7
7
|
fbgemm_gpu/permute_pooled_embedding_modules.py,sha256=kjWuWmQY8e2kMRwIPTzjGjyjV4syKPrphtHdsQTAjWM,5136
|
|
8
8
|
fbgemm_gpu/permute_pooled_embedding_modules_split.py,sha256=cUrEbRIvLFW_3Zmh07QkN4S1Cfvvge6TYO1VXBFCpz8,2752
|
|
@@ -17,7 +17,7 @@ fbgemm_gpu/split_embedding_utils.py,sha256=Gb40ZKeATxIKEKI3aVQMgDDBanNpKMc53Z43m
|
|
|
17
17
|
fbgemm_gpu/split_table_batched_embeddings_ops.py,sha256=_MIp6uHYHLn4GxGdrGsfddfSsZ2Z9mjsYIrih3ncI1I,2339
|
|
18
18
|
fbgemm_gpu/split_table_batched_embeddings_ops_common.py,sha256=qglNRKKuHkrKiTw90ACjZpMzcjHKXKV7ME3a8QHfQt4,8237
|
|
19
19
|
fbgemm_gpu/split_table_batched_embeddings_ops_inference.py,sha256=bUDWa6IR0vGLDThgB3nmD1yfYa8_HD34B0dtLnd7thw,81692
|
|
20
|
-
fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=
|
|
20
|
+
fbgemm_gpu/split_table_batched_embeddings_ops_training.py,sha256=GY3Wm5X2utjIXcjIcHwByzI38hLa9NlShtHg58wIOOU,164383
|
|
21
21
|
fbgemm_gpu/split_table_batched_embeddings_ops_training_common.py,sha256=ktC10-nakOBpcmJNCOGQsxuBCP8XTwXJ2WeEgIg91tc,5455
|
|
22
22
|
fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py,sha256=7qGkO8FARku38mFYl4Bc4qL8dS1wrfyorS9l1m5ZAVA,718
|
|
23
23
|
fbgemm_gpu/tbe_input_multiplexer.py,sha256=DjU7dPHgAT1avXGvgi8SFfw2Pq7yT8S_7IH8qCXoptA,3069
|
|
@@ -32,10 +32,10 @@ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65
|
|
|
32
32
|
fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
|
|
33
33
|
fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
|
|
34
34
|
fbgemm_gpu/docs/sparse_ops.py,sha256=NTcTm0q9h8W2B8PKPoic2fHsAaCbCYunSa_EYK0LtHQ,21382
|
|
35
|
-
fbgemm_gpu/docs/version.py,sha256=
|
|
35
|
+
fbgemm_gpu/docs/version.py,sha256=mYY8Au7MNpxkPW4e6-KmEl1zCys0U_I92tLFTvoN8Oc,315
|
|
36
36
|
fbgemm_gpu/experimental/hstu/__init__.py,sha256=KNisP6qDMwgjgxkGlqUZRNjJ_8o8R-cTmm3HxF7pSqI,1564
|
|
37
37
|
fbgemm_gpu/experimental/hstu/cuda_hstu_attention.py,sha256=5425GRjJuzpXQC-TowgQOCFjZmOwv_EK0lKbURhHBTQ,9920
|
|
38
|
-
fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so,sha256=
|
|
38
|
+
fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so,sha256=Y-D0fGB76-GmSS0nU9dqRf3NkXvCmq86ucb8pPhtU0s,352287576
|
|
39
39
|
fbgemm_gpu/quantize/__init__.py,sha256=pftciXHE7csekDFkl7Ui1AWglVMMnSrOO04mREnUdb0,921
|
|
40
40
|
fbgemm_gpu/quantize/quantize_ops.py,sha256=25AIOv9n2UoxamMUaI6EK1Ur4gSHxbZIReHBtgOjjCs,2228
|
|
41
41
|
fbgemm_gpu/sll/__init__.py,sha256=rgXh35-OFUE54E9gGBq3NGxouGvgMv2ccY2bWUTxONY,4191
|
|
@@ -90,10 +90,10 @@ fbgemm_gpu/triton/quantize_ref.py,sha256=q4RBmFaqPVPELU52lbSgB0n26Aun7apeK7bRF2M
|
|
|
90
90
|
fbgemm_gpu/triton/jagged/__init__.py,sha256=om0yhjuzKuE1UQakFMWHsXN4WNb8mvNkZtYofQ8hdn4,246
|
|
91
91
|
fbgemm_gpu/triton/jagged/triton_jagged_tensor_ops.py,sha256=AIC1G6_QBQtMVTyOyEV4ZKJyDzu36UI_9HDgWmZIRaA,29884
|
|
92
92
|
fbgemm_gpu/utils/__init__.py,sha256=JQQNdcTTaEU6ptK-OW-ZQBwTFxEZZpWOtBXWwEZm39o,354
|
|
93
|
-
fbgemm_gpu/utils/filestore.py,sha256=
|
|
93
|
+
fbgemm_gpu/utils/filestore.py,sha256=ijoJYDqHUQlv0OrEtLrgSjkNCreCvy5ZXHfd8atwewc,6186
|
|
94
94
|
fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,990
|
|
95
95
|
fbgemm_gpu/utils/torch_library.py,sha256=dQcHv1qgpu5QYlJjxjd6oeHjtxnmmXzx3PL6vjCmxL4,4199
|
|
96
|
-
fbgemm_gpu_hstu_nightly-2025.6.
|
|
97
|
-
fbgemm_gpu_hstu_nightly-2025.6.
|
|
98
|
-
fbgemm_gpu_hstu_nightly-2025.6.
|
|
99
|
-
fbgemm_gpu_hstu_nightly-2025.6.
|
|
96
|
+
fbgemm_gpu_hstu_nightly-2025.6.17.dist-info/METADATA,sha256=JW6tQAPy8jF9_SLsAZ8BY4c4YnTy3uVQaEc2ac8VyA4,2654
|
|
97
|
+
fbgemm_gpu_hstu_nightly-2025.6.17.dist-info/WHEEL,sha256=Nkv8TSWVt7XcnRf1cdq5HOzycTl6Pjzlmn7gPSv4NiQ,108
|
|
98
|
+
fbgemm_gpu_hstu_nightly-2025.6.17.dist-info/top_level.txt,sha256=2tlbTWLkPjhqvLF_6BbqKzkcPluSE-oPRVjI8axK76I,11
|
|
99
|
+
fbgemm_gpu_hstu_nightly-2025.6.17.dist-info/RECORD,,
|
{fbgemm_gpu_hstu_nightly-2025.6.15.dist-info → fbgemm_gpu_hstu_nightly-2025.6.17.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|