fbgemm-gpu-hstu-nightly 2025.6.12__cp311-cp311-manylinux_2_28_x86_64.whl → 2025.6.13__cp311-cp311-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,6 @@
6
6
  # This source code is licensed under the BSD-style license found in the
7
7
  # LICENSE file in the root directory of this source tree.
8
8
 
9
- __version__: str = "2025.6.12"
9
+ __version__: str = "2025.6.13"
10
10
  __target__: str = "hstu"
11
11
  __variant__: str = "cuda"
fbgemm_gpu/sparse_ops.py CHANGED
@@ -420,6 +420,7 @@ def int_nbit_split_embedding_codegen_lookup_function_meta(
420
420
  kINT8QparamsBytes = 8
421
421
 
422
422
  if pooling_mode == PoolingMode.NONE:
423
+ kINT8QparamsBytes = 4
423
424
  D = max(
424
425
  [
425
426
  max_int2_D,
@@ -435,7 +436,7 @@ def int_nbit_split_embedding_codegen_lookup_function_meta(
435
436
  torch._check(D > 0)
436
437
  adjusted_D = D
437
438
  if SparseType.from_int(output_dtype_int) == SparseType.INT8:
438
- adjusted_D += T * kINT8QparamsBytes
439
+ adjusted_D += kINT8QparamsBytes
439
440
  output = dev_weights.new_empty([total_L, adjusted_D], dtype=output_dtype)
440
441
  return output
441
442
 
@@ -9,7 +9,8 @@
9
9
  from __future__ import annotations
10
10
 
11
11
  import functools
12
- from typing import Optional, Union
12
+ import logging
13
+ from typing import List, Optional, Union
13
14
 
14
15
  import torch
15
16
 
@@ -25,6 +26,58 @@ def implements(torch_function):
25
26
  return decorator
26
27
 
27
28
 
29
+ class KVTensorMetadata:
30
+ """
31
+ Class that is used to represent a KVTensor as a Serialized Metadata in python
32
+ This object is used to reconstruct the KVTensor in the publish component
33
+ """
34
+
35
+ checkpoint_paths: List[str]
36
+ tbe_uuid: str
37
+ rdb_num_shards: int
38
+ rdb_num_threads: int
39
+ max_D: int
40
+ table_offset: int
41
+ table_shape: List[int]
42
+ dtype: int
43
+ checkpoint_uuid: str
44
+
45
+ def __init__(
46
+ self,
47
+ checkpoint_paths: List[str],
48
+ tbe_uuid: str,
49
+ rdb_num_shards: int,
50
+ rdb_num_threads: int,
51
+ max_D: int,
52
+ table_offset: int,
53
+ table_shape: List[int],
54
+ dtype: int,
55
+ checkpoint_uuid: str,
56
+ ) -> None:
57
+ """
58
+ Ensure caller loads the module before creating this object.
59
+
60
+ ```
61
+ load_torch_module(
62
+ "//deeplearning/fbgemm/fbgemm_gpu:ssd_split_table_batched_embeddings"
63
+ )
64
+ ```
65
+
66
+ Args:
67
+
68
+ wrapped: torch.classes.fbgemm.KVTensorWrapper
69
+ """
70
+ self.checkpoint_paths = checkpoint_paths
71
+ self.tbe_uuid = tbe_uuid
72
+ self.rdb_num_shards = rdb_num_shards
73
+ self.rdb_num_threads = rdb_num_threads
74
+ self.max_D = max_D
75
+ self.table_offset = table_offset
76
+ self.table_shape = table_shape
77
+ self.checkpoint_uuid = checkpoint_uuid
78
+ self.dtype = dtype
79
+
80
+
28
81
  class PartiallyMaterializedTensor:
29
82
  """
30
83
  A tensor-like object that represents a partially materialized tensor in memory.
@@ -51,6 +104,55 @@ class PartiallyMaterializedTensor:
51
104
  self._is_virtual = is_virtual
52
105
  self._requires_grad = False
53
106
 
107
+ @property
108
+ def generate_kvtensor_metadata(self) -> KVTensorMetadata:
109
+ serialized_metadata = self.wrapped.get_kvtensor_serializable_metadata()
110
+ try:
111
+ metadata_itr = 0
112
+ num_rdb_ckpts = int(serialized_metadata[0])
113
+ metadata_itr += 1
114
+ checkpoint_paths: List[str] = []
115
+ for i in range(num_rdb_ckpts):
116
+ checkpoint_paths.append(serialized_metadata[i + metadata_itr])
117
+ metadata_itr += num_rdb_ckpts
118
+ tbe_uuid = serialized_metadata[metadata_itr]
119
+ metadata_itr += 1
120
+ rdb_num_shards = int(serialized_metadata[metadata_itr])
121
+ metadata_itr += 1
122
+ rdb_num_threads = int(serialized_metadata[metadata_itr])
123
+ metadata_itr += 1
124
+ max_D = int(serialized_metadata[metadata_itr])
125
+ metadata_itr += 1
126
+ table_offset = int(serialized_metadata[metadata_itr])
127
+ metadata_itr += 1
128
+ table_shape: List[int] = []
129
+ table_shape.append(int(serialized_metadata[metadata_itr]))
130
+ metadata_itr += 1
131
+ table_shape.append(int(serialized_metadata[metadata_itr]))
132
+ metadata_itr += 1
133
+ dtype = int(serialized_metadata[metadata_itr])
134
+ metadata_itr += 1
135
+ checkpoint_uuid = serialized_metadata[metadata_itr]
136
+ metadata_itr += 1
137
+ res = KVTensorMetadata(
138
+ checkpoint_paths,
139
+ tbe_uuid,
140
+ rdb_num_shards,
141
+ rdb_num_threads,
142
+ max_D,
143
+ table_offset,
144
+ table_shape,
145
+ dtype,
146
+ checkpoint_uuid,
147
+ )
148
+
149
+ return res
150
+ except Exception as e:
151
+ logging.error(
152
+ f"Failed to parse metadata: {e}, here is metadata: {serialized_metadata}"
153
+ )
154
+ raise e
155
+
54
156
  @property
55
157
  def wrapped(self):
56
158
  """
@@ -249,6 +351,9 @@ class PartiallyMaterializedTensor:
249
351
 
250
352
  return torch.equal(tensor1.full_tensor(), tensor2.full_tensor())
251
353
 
354
+ def get_kvtensor_serializable_metadata(self) -> List[str]:
355
+ return self._wrapped.get_kvtensor_serializable_metadata()
356
+
252
357
  def __hash__(self):
253
358
  return id(self)
254
359
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fbgemm_gpu_hstu_nightly
3
- Version: 2025.6.12
3
+ Version: 2025.6.13
4
4
  Home-page: https://github.com/pytorch/fbgemm
5
5
  Author: FBGEMM Team
6
6
  Author-email: packages@pytorch.org
@@ -9,7 +9,7 @@ fbgemm_gpu/permute_pooled_embedding_modules_split.py,sha256=cUrEbRIvLFW_3Zmh07Qk
9
9
  fbgemm_gpu/quantize_comm.py,sha256=YUzk8F1MZckbkseEoDJ4rOKiglGCGtrPdR1IKGD6Mk4,11177
10
10
  fbgemm_gpu/quantize_utils.py,sha256=hb8G_1xzRwYmwFp8VJrmoaolNxCwqcFwkwnyStk1C0w,7394
11
11
  fbgemm_gpu/runtime_monitor.py,sha256=HM_0cxMO7uuAq8sCiv2lmGgp1jKGzba2qhuUcGyRMog,7425
12
- fbgemm_gpu/sparse_ops.py,sha256=84x_hobQofY29Vzlz0eJxe126Ba-O5oSEQREMpRgOtE,47168
12
+ fbgemm_gpu/sparse_ops.py,sha256=xNoRMp6QNMz8Lq_5LE9IprQtUip3gkTVbyPgJ1AiWSI,47194
13
13
  fbgemm_gpu/split_embedding_configs.py,sha256=DcZ7SV4AmhlN9QPDaJBqzigR-c1zM_bZI3Fh4PYuab4,7266
14
14
  fbgemm_gpu/split_embedding_inference_converter.py,sha256=ilVVowkTiY0WDpOYorj917Tqsez4KWNBdTXuz2bWbp8,7063
15
15
  fbgemm_gpu/split_embedding_optimizer_ops.py,sha256=wXuGazClBMk62yL_r9udUIKaPgQP7SlkSb5ugB75wrQ,711
@@ -32,10 +32,10 @@ fbgemm_gpu/docs/merge_pooled_embedding_ops.py,sha256=oJLgSgZQmhsyGLbTmZTxNgQrk65
32
32
  fbgemm_gpu/docs/permute_pooled_embedding_ops.py,sha256=tZUqLVXlk5O6VAKKDA-OEMx2fCu5QPOOeoAPZA9_nLY,4454
33
33
  fbgemm_gpu/docs/quantize_ops.py,sha256=xTtOaVK1P02ymreE_i21YiyYDZCqhoZY9eWp_mEIRlo,1297
34
34
  fbgemm_gpu/docs/sparse_ops.py,sha256=NTcTm0q9h8W2B8PKPoic2fHsAaCbCYunSa_EYK0LtHQ,21382
35
- fbgemm_gpu/docs/version.py,sha256=XOS_m9ZE4Pr3zwz272OUxVj3s034QJ5xDrpkMLk-x5s,315
35
+ fbgemm_gpu/docs/version.py,sha256=9kytZ-RJTwv5Z4Lajmjf4ELvL28p4wC2CGW1Zv5ZfSg,315
36
36
  fbgemm_gpu/experimental/hstu/__init__.py,sha256=KNisP6qDMwgjgxkGlqUZRNjJ_8o8R-cTmm3HxF7pSqI,1564
37
37
  fbgemm_gpu/experimental/hstu/cuda_hstu_attention.py,sha256=5425GRjJuzpXQC-TowgQOCFjZmOwv_EK0lKbURhHBTQ,9920
38
- fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so,sha256=KcSmmINAo_BjNginy3sl3FJ5JUJZVN6swdv_OKB576g,352287576
38
+ fbgemm_gpu/experimental/hstu/fbgemm_gpu_experimental_hstu.so,sha256=pDSxzlMTxno_1ZdHPfpE2nR1Vf1LNSyjQGMSrKejE5g,352287576
39
39
  fbgemm_gpu/quantize/__init__.py,sha256=pftciXHE7csekDFkl7Ui1AWglVMMnSrOO04mREnUdb0,921
40
40
  fbgemm_gpu/quantize/quantize_ops.py,sha256=25AIOv9n2UoxamMUaI6EK1Ur4gSHxbZIReHBtgOjjCs,2228
41
41
  fbgemm_gpu/sll/__init__.py,sha256=rgXh35-OFUE54E9gGBq3NGxouGvgMv2ccY2bWUTxONY,4191
@@ -75,7 +75,7 @@ fbgemm_gpu/tbe/ssd/common.py,sha256=1J8K7sTQswgCYWaVwF-ZdCJj7mNN6O9GI70AaZWzJGE,
75
75
  fbgemm_gpu/tbe/ssd/inference.py,sha256=DTjwj3f6JaUMcecWoRNkZpRgXDJ-eE3grtixYwKb5DI,22829
76
76
  fbgemm_gpu/tbe/ssd/training.py,sha256=gCvMY8fGP2JNosMyzW7uiQUuUsDIdUULB_PG51wOpRk,131519
77
77
  fbgemm_gpu/tbe/ssd/utils/__init__.py,sha256=5DgmR2HA6NtmYh2ddkUgpDsZ6a7hF0DPedA1gMpdh18,250
78
- fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py,sha256=V18ZQxdYJNSi6qLqhFaxXj3IxpcPgan3GsoXrosoZ1Q,7510
78
+ fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py,sha256=ePF2b3vCFB_ZzPaMneaXe5gODFVRJmXKbagT2uNKAAQ,10934
79
79
  fbgemm_gpu/tbe/stats/__init__.py,sha256=on29iDtq7cVNh90JR9aeFNG-K9DDoYq0JryzoplL49I,322
80
80
  fbgemm_gpu/tbe/stats/bench_params_reporter.py,sha256=7XIWVObJOxSVUG73xsd_lVSuCFUQkMEGSWW--BoyCH0,7358
81
81
  fbgemm_gpu/tbe/utils/__init__.py,sha256=rlXFm-kTByFZO4SS5C5zMzANRiQmM1NT__eWBayncYg,549
@@ -93,7 +93,7 @@ fbgemm_gpu/utils/__init__.py,sha256=JQQNdcTTaEU6ptK-OW-ZQBwTFxEZZpWOtBXWwEZm39o,
93
93
  fbgemm_gpu/utils/filestore.py,sha256=Zshw1dA03m9aHMMAtETdq4bgOLocyLhzlkAUoG8VkdM,4743
94
94
  fbgemm_gpu/utils/loader.py,sha256=1hCEhNvkflniH46fGcrguLeP1z-6uyOu2QFwqKU5CIM,990
95
95
  fbgemm_gpu/utils/torch_library.py,sha256=dQcHv1qgpu5QYlJjxjd6oeHjtxnmmXzx3PL6vjCmxL4,4199
96
- fbgemm_gpu_hstu_nightly-2025.6.12.dist-info/METADATA,sha256=ayz07_xnYRnh1FYOD4_6vcF3Pn4Ngw8hmoQhOBEfOz8,2794
97
- fbgemm_gpu_hstu_nightly-2025.6.12.dist-info/WHEEL,sha256=V2Q6mQKbouIadCxoRjt9FQ9oKfi45-uZUcoc77zzs0M,108
98
- fbgemm_gpu_hstu_nightly-2025.6.12.dist-info/top_level.txt,sha256=2tlbTWLkPjhqvLF_6BbqKzkcPluSE-oPRVjI8axK76I,11
99
- fbgemm_gpu_hstu_nightly-2025.6.12.dist-info/RECORD,,
96
+ fbgemm_gpu_hstu_nightly-2025.6.13.dist-info/METADATA,sha256=G6aLh_w9SYySzT7xRYMUj8OW6l873R2B71m-tgD6GWs,2794
97
+ fbgemm_gpu_hstu_nightly-2025.6.13.dist-info/WHEEL,sha256=V2Q6mQKbouIadCxoRjt9FQ9oKfi45-uZUcoc77zzs0M,108
98
+ fbgemm_gpu_hstu_nightly-2025.6.13.dist-info/top_level.txt,sha256=2tlbTWLkPjhqvLF_6BbqKzkcPluSE-oPRVjI8axK76I,11
99
+ fbgemm_gpu_hstu_nightly-2025.6.13.dist-info/RECORD,,