fbgemm-gpu-hstu-nightly 2025.6.12__cp313-cp313-manylinux_2_28_x86_64.whl → 2025.6.13__cp313-cp313-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fbgemm_gpu/docs/version.py +1 -1
- fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so +0 -0
- fbgemm_gpu/sparse_ops.py +2 -1
- fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py +106 -1
- {fbgemm_gpu_hstu_nightly-2025.6.12.dist-info → fbgemm_gpu_hstu_nightly-2025.6.13.dist-info}/METADATA +1 -1
- {fbgemm_gpu_hstu_nightly-2025.6.12.dist-info → fbgemm_gpu_hstu_nightly-2025.6.13.dist-info}/RECORD +8 -8
- {fbgemm_gpu_hstu_nightly-2025.6.12.dist-info → fbgemm_gpu_hstu_nightly-2025.6.13.dist-info}/WHEEL +0 -0
- {fbgemm_gpu_hstu_nightly-2025.6.12.dist-info → fbgemm_gpu_hstu_nightly-2025.6.13.dist-info}/top_level.txt +0 -0
fbgemm_gpu/docs/version.py
CHANGED
|
Binary file
|
fbgemm_gpu/sparse_ops.py
CHANGED
|
@@ -420,6 +420,7 @@ def int_nbit_split_embedding_codegen_lookup_function_meta(
|
|
|
420
420
|
kINT8QparamsBytes = 8
|
|
421
421
|
|
|
422
422
|
if pooling_mode == PoolingMode.NONE:
|
|
423
|
+
kINT8QparamsBytes = 4
|
|
423
424
|
D = max(
|
|
424
425
|
[
|
|
425
426
|
max_int2_D,
|
|
@@ -435,7 +436,7 @@ def int_nbit_split_embedding_codegen_lookup_function_meta(
|
|
|
435
436
|
torch._check(D > 0)
|
|
436
437
|
adjusted_D = D
|
|
437
438
|
if SparseType.from_int(output_dtype_int) == SparseType.INT8:
|
|
438
|
-
adjusted_D +=
|
|
439
|
+
adjusted_D += kINT8QparamsBytes
|
|
439
440
|
output = dev_weights.new_empty([total_L, adjusted_D], dtype=output_dtype)
|
|
440
441
|
return output
|
|
441
442
|
|
|
@@ -9,7 +9,8 @@
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
11
|
import functools
|
|
12
|
-
|
|
12
|
+
import logging
|
|
13
|
+
from typing import List, Optional, Union
|
|
13
14
|
|
|
14
15
|
import torch
|
|
15
16
|
|
|
@@ -25,6 +26,58 @@ def implements(torch_function):
|
|
|
25
26
|
return decorator
|
|
26
27
|
|
|
27
28
|
|
|
29
|
+
class KVTensorMetadata:
|
|
30
|
+
"""
|
|
31
|
+
Class that is used to represent a KVTensor as a Serialized Metadata in python
|
|
32
|
+
This object is used to reconstruct the KVTensor in the publish component
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
checkpoint_paths: List[str]
|
|
36
|
+
tbe_uuid: str
|
|
37
|
+
rdb_num_shards: int
|
|
38
|
+
rdb_num_threads: int
|
|
39
|
+
max_D: int
|
|
40
|
+
table_offset: int
|
|
41
|
+
table_shape: List[int]
|
|
42
|
+
dtype: int
|
|
43
|
+
checkpoint_uuid: str
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
checkpoint_paths: List[str],
|
|
48
|
+
tbe_uuid: str,
|
|
49
|
+
rdb_num_shards: int,
|
|
50
|
+
rdb_num_threads: int,
|
|
51
|
+
max_D: int,
|
|
52
|
+
table_offset: int,
|
|
53
|
+
table_shape: List[int],
|
|
54
|
+
dtype: int,
|
|
55
|
+
checkpoint_uuid: str,
|
|
56
|
+
) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Ensure caller loads the module before creating this object.
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
load_torch_module(
|
|
62
|
+
"//deeplearning/fbgemm/fbgemm_gpu:ssd_split_table_batched_embeddings"
|
|
63
|
+
)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
|
|
68
|
+
wrapped: torch.classes.fbgemm.KVTensorWrapper
|
|
69
|
+
"""
|
|
70
|
+
self.checkpoint_paths = checkpoint_paths
|
|
71
|
+
self.tbe_uuid = tbe_uuid
|
|
72
|
+
self.rdb_num_shards = rdb_num_shards
|
|
73
|
+
self.rdb_num_threads = rdb_num_threads
|
|
74
|
+
self.max_D = max_D
|
|
75
|
+
self.table_offset = table_offset
|
|
76
|
+
self.table_shape = table_shape
|
|
77
|
+
self.checkpoint_uuid = checkpoint_uuid
|
|
78
|
+
self.dtype = dtype
|
|
79
|
+
|
|
80
|
+
|
|
28
81
|
class PartiallyMaterializedTensor:
|
|
29
82
|
"""
|
|
30
83
|
A tensor-like object that represents a partially materialized tensor in memory.
|
|
@@ -51,6 +104,55 @@ class PartiallyMaterializedTensor:
|
|
|
51
104
|
self._is_virtual = is_virtual
|
|
52
105
|
self._requires_grad = False
|
|
53
106
|
|
|
107
|
+
@property
|
|
108
|
+
def generate_kvtensor_metadata(self) -> KVTensorMetadata:
|
|
109
|
+
serialized_metadata = self.wrapped.get_kvtensor_serializable_metadata()
|
|
110
|
+
try:
|
|
111
|
+
metadata_itr = 0
|
|
112
|
+
num_rdb_ckpts = int(serialized_metadata[0])
|
|
113
|
+
metadata_itr += 1
|
|
114
|
+
checkpoint_paths: List[str] = []
|
|
115
|
+
for i in range(num_rdb_ckpts):
|
|
116
|
+
checkpoint_paths.append(serialized_metadata[i + metadata_itr])
|
|
117
|
+
metadata_itr += num_rdb_ckpts
|
|
118
|
+
tbe_uuid = serialized_metadata[metadata_itr]
|
|
119
|
+
metadata_itr += 1
|
|
120
|
+
rdb_num_shards = int(serialized_metadata[metadata_itr])
|
|
121
|
+
metadata_itr += 1
|
|
122
|
+
rdb_num_threads = int(serialized_metadata[metadata_itr])
|
|
123
|
+
metadata_itr += 1
|
|
124
|
+
max_D = int(serialized_metadata[metadata_itr])
|
|
125
|
+
metadata_itr += 1
|
|
126
|
+
table_offset = int(serialized_metadata[metadata_itr])
|
|
127
|
+
metadata_itr += 1
|
|
128
|
+
table_shape: List[int] = []
|
|
129
|
+
table_shape.append(int(serialized_metadata[metadata_itr]))
|
|
130
|
+
metadata_itr += 1
|
|
131
|
+
table_shape.append(int(serialized_metadata[metadata_itr]))
|
|
132
|
+
metadata_itr += 1
|
|
133
|
+
dtype = int(serialized_metadata[metadata_itr])
|
|
134
|
+
metadata_itr += 1
|
|
135
|
+
checkpoint_uuid = serialized_metadata[metadata_itr]
|
|
136
|
+
metadata_itr += 1
|
|
137
|
+
res = KVTensorMetadata(
|
|
138
|
+
checkpoint_paths,
|
|
139
|
+
tbe_uuid,
|
|
140
|
+
rdb_num_shards,
|
|
141
|
+
rdb_num_threads,
|
|
142
|
+
max_D,
|
|
143
|
+
table_offset,
|
|
144
|
+
table_shape,
|
|
145
|
+
dtype,
|
|
146
|
+
checkpoint_uuid,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
return res
|
|
150
|
+
except Exception as e:
|
|
151
|
+
logging.error(
|
|
152
|
+
f"Failed to parse metadata: {e}, here is metadata: {serialized_metadata}"
|
|
153
|
+
)
|
|
154
|
+
raise e
|
|
155
|
+
|
|
54
156
|
@property
|
|
55
157
|
def wrapped(self):
|
|
56
158
|
"""
|
|
@@ -249,6 +351,9 @@ class PartiallyMaterializedTensor:
|
|
|
249
351
|
|
|
250
352
|
return torch.equal(tensor1.full_tensor(), tensor2.full_tensor())
|
|
251
353
|
|
|
354
|
+
def get_kvtensor_serializable_metadata(self) -> List[str]:
|
|
355
|
+
return self._wrapped.get_kvtensor_serializable_metadata()
|
|
356
|
+
|
|
252
357
|
def __hash__(self):
|
|
253
358
|
return id(self)
|
|
254
359
|
|
{fbgemm_gpu_hstu_nightly-2025.6.12.dist-info → fbgemm_gpu_hstu_nightly-2025.6.13.dist-info}/RECORD
RENAMED
|
@@ -9,7 +9,7 @@ fbgemm_gpu/permute_pooled_embedding_modules_split.py,sha256=cUrEbRIvLFW_3Zmh07Qk
|
|
|
9
9
|
fbgemm_gpu/quantize_comm.py,sha256=YUzk8F1MZckbkseEoDJ4rOKiglGCGtrPdR1IKGD6Mk4,11177
|
|
10
10
|
fbgemm_gpu/quantize_utils.py,sha256=hb8G_1xzRwYmwFp8VJrmoaolNxCwqcFwkwnyStk1C0w,7394
|
|
11
11
|
fbgemm_gpu/runtime_monitor.py,sha256=HM_0cxMO7uuAq8sCiv2lmGgp1jKGzba2qhuUcGyRMog,7425
|
|
12
|
-
fbgemm_gpu/sparse_ops.py,sha256=
|
|
12
|
+
fbgemm_gpu/sparse_ops.py,sha256=xNoRMp6QNMz8Lq_5LE9IprQtUip3gkTVbyPgJ1AiWSI,47194
|
|
13
13
|
fbgemm_gpu/split_embedding_configs.py,sha256=DcZ7SV4AmhlN9QPDaJBqzigR-c1zM_bZI3Fh4PYuab4,7266
|
|
14
14
|
fbgemm_gpu/split_embedding_inference_converter.py,sha256=ilVVowkTiY0WDpOYorj917Tqsez4KWNBdTXuz2bWbp8,7063
|
|
15
15
|
fbgemm_gpu/split_embedding_optimizer_ops.py,sha256=wXuGazClBMk62yL_r9udUIKaPgQP7SlkSb5ugB75wrQ,711
|
|
@@ -32,10 +32,10 @@ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65
|
|
|
32
32
|
fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
|
|
33
33
|
fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
|
|
34
34
|
fbgemm_gpu/docs/sparse_ops.py,sha256=NTcTm0q9h8W2B8PKPoic2fHsAaCbCYunSa_EYK0LtHQ,21382
|
|
35
|
-
fbgemm_gpu/docs/version.py,sha256=
|
|
35
|
+
fbgemm_gpu/docs/version.py,sha256=9kytZ-RJTwv5Z4Lajmjf4ELvL28p4wC2CGW1Zv5ZfSg,315
|
|
36
36
|
fbgemm_gpu/experimental/hstu/__init__.py,sha256=KNisP6qDMwgjgxkGlqUZRNjJ_8o8R-cTmm3HxF7pSqI,1564
|
|
37
37
|
fbgemm_gpu/experimental/hstu/cuda_hstu_attention.py,sha256=5425GRjJuzpXQC-TowgQOCFjZmOwv_EK0lKbURhHBTQ,9920
|
|
38
|
-
fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so,sha256=
|
|
38
|
+
fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so,sha256=2aOGFADSpOqYB1y2KqKwL0M8Xxn5McU53frUw8y7XsE,352696288
|
|
39
39
|
fbgemm_gpu/quantize/__init__.py,sha256=pftciXHE7csekDFkl7Ui1AWglVMMnSrOO04mREnUdb0,921
|
|
40
40
|
fbgemm_gpu/quantize/quantize_ops.py,sha256=25AIOv9n2UoxamMUaI6EK1Ur4gSHxbZIReHBtgOjjCs,2228
|
|
41
41
|
fbgemm_gpu/sll/__init__.py,sha256=rgXh35-OFUE54E9gGBq3NGxouGvgMv2ccY2bWUTxONY,4191
|
|
@@ -75,7 +75,7 @@ fbgemm_gpu/tbe/ssd/common.py,sha256=1J8K7sTQswgCYWaVwF-ZdCJj7mNN6O9GI70AaZWzJGE,
|
|
|
75
75
|
fbgemm_gpu/tbe/ssd/inference.py,sha256=DTjwj3f6JaUMcecWoRNkZpRgXDJ-eE3grtixYwKb5DI,22829
|
|
76
76
|
fbgemm_gpu/tbe/ssd/training.py,sha256=gCvMY8fGP2JNosMyzW7uiQUuUsDIdUULB_PG51wOpRk,131519
|
|
77
77
|
fbgemm_gpu/tbe/ssd/utils/__init__.py,sha256=5DgmR2HA6NtmYh2ddkUgpDsZ6a7hF0DPedA1gMpdh18,250
|
|
78
|
-
fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py,sha256=
|
|
78
|
+
fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py,sha256=ePF2b3vCFB_ZzPaMneaXe5gODFVRJmXKbagT2uNKAAQ,10934
|
|
79
79
|
fbgemm_gpu/tbe/stats/__init__.py,sha256=on29iDtq7cVNh90JR9aeFNG-K9DDoYq0JryzoplL49I,322
|
|
80
80
|
fbgemm_gpu/tbe/stats/bench_params_reporter.py,sha256=7XIWVObJOxSVUG73xsd_lVSuCFUQkMEGSWW--BoyCH0,7358
|
|
81
81
|
fbgemm_gpu/tbe/utils/__init__.py,sha256=rlXFm-kTByFZO4SS5C5zMzANRiQmM1NT__eWBayncYg,549
|
|
@@ -93,7 +93,7 @@ fbgemm_gpu/utils/__init__.py,sha256=JQQNdcTTaEU6ptK-OW-ZQBwTFxEZZpWOtBXWwEZm39o,
|
|
|
93
93
|
fbgemm_gpu/utils/filestore.py,sha256=Zshw1dA03m9aHMMAtETdq4bgOLocyLhzlkAUoG8VkdM,4743
|
|
94
94
|
fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,990
|
|
95
95
|
fbgemm_gpu/utils/torch_library.py,sha256=dQcHv1qgpu5QYlJjxjd6oeHjtxnmmXzx3PL6vjCmxL4,4199
|
|
96
|
-
fbgemm_gpu_hstu_nightly-2025.6.
|
|
97
|
-
fbgemm_gpu_hstu_nightly-2025.6.
|
|
98
|
-
fbgemm_gpu_hstu_nightly-2025.6.
|
|
99
|
-
fbgemm_gpu_hstu_nightly-2025.6.
|
|
96
|
+
fbgemm_gpu_hstu_nightly-2025.6.13.dist-info/METADATA,sha256=G6aLh_w9SYySzT7xRYMUj8OW6l873R2B71m-tgD6GWs,2794
|
|
97
|
+
fbgemm_gpu_hstu_nightly-2025.6.13.dist-info/WHEEL,sha256=Nkv8TSWVt7XcnRf1cdq5HOzycTl6Pjzlmn7gPSv4NiQ,108
|
|
98
|
+
fbgemm_gpu_hstu_nightly-2025.6.13.dist-info/top_level.txt,sha256=2tlbTWLkPjhqvLF_6BbqKzkcPluSE-oPRVjI8axK76I,11
|
|
99
|
+
fbgemm_gpu_hstu_nightly-2025.6.13.dist-info/RECORD,,
|
{fbgemm_gpu_hstu_nightly-2025.6.12.dist-info → fbgemm_gpu_hstu_nightly-2025.6.13.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|