deltacat 1.1.36__py3-none-any.whl → 2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. deltacat/__init__.py +42 -3
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +168 -0
  4. deltacat/aws/s3u.py +4 -4
  5. deltacat/benchmarking/benchmark_engine.py +82 -0
  6. deltacat/benchmarking/benchmark_report.py +86 -0
  7. deltacat/benchmarking/benchmark_suite.py +11 -0
  8. deltacat/benchmarking/conftest.py +21 -0
  9. deltacat/benchmarking/data/random_row_generator.py +94 -0
  10. deltacat/benchmarking/data/row_generator.py +10 -0
  11. deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
  12. deltacat/catalog/__init__.py +14 -0
  13. deltacat/catalog/delegate.py +199 -106
  14. deltacat/catalog/iceberg/__init__.py +4 -0
  15. deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
  16. deltacat/catalog/iceberg/impl.py +368 -0
  17. deltacat/catalog/iceberg/overrides.py +74 -0
  18. deltacat/catalog/interface.py +273 -76
  19. deltacat/catalog/main/impl.py +720 -0
  20. deltacat/catalog/model/catalog.py +227 -20
  21. deltacat/catalog/model/properties.py +116 -0
  22. deltacat/catalog/model/table_definition.py +32 -1
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +5 -5
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +1 -1
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +1 -1
  32. deltacat/compute/compactor/steps/materialize.py +6 -2
  33. deltacat/compute/compactor/utils/io.py +1 -1
  34. deltacat/compute/compactor/utils/sort_key.py +9 -2
  35. deltacat/compute/compactor_v2/compaction_session.py +5 -9
  36. deltacat/compute/compactor_v2/constants.py +1 -30
  37. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  38. deltacat/compute/compactor_v2/model/merge_input.py +1 -7
  39. deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
  40. deltacat/compute/compactor_v2/steps/merge.py +17 -126
  41. deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
  42. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  43. deltacat/compute/compactor_v2/utils/io.py +1 -1
  44. deltacat/compute/compactor_v2/utils/merge.py +0 -1
  45. deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
  46. deltacat/compute/compactor_v2/utils/task_options.py +23 -43
  47. deltacat/compute/converter/constants.py +4 -0
  48. deltacat/compute/converter/converter_session.py +143 -0
  49. deltacat/compute/converter/model/convert_input.py +69 -0
  50. deltacat/compute/converter/model/convert_input_files.py +61 -0
  51. deltacat/compute/converter/model/converter_session_params.py +99 -0
  52. deltacat/compute/converter/pyiceberg/__init__.py +0 -0
  53. deltacat/compute/converter/pyiceberg/catalog.py +75 -0
  54. deltacat/compute/converter/pyiceberg/overrides.py +135 -0
  55. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
  56. deltacat/compute/converter/steps/__init__.py +0 -0
  57. deltacat/compute/converter/steps/convert.py +211 -0
  58. deltacat/compute/converter/steps/dedupe.py +60 -0
  59. deltacat/compute/converter/utils/__init__.py +0 -0
  60. deltacat/compute/converter/utils/convert_task_options.py +88 -0
  61. deltacat/compute/converter/utils/converter_session_utils.py +109 -0
  62. deltacat/compute/converter/utils/iceberg_columns.py +82 -0
  63. deltacat/compute/converter/utils/io.py +43 -0
  64. deltacat/compute/converter/utils/s3u.py +133 -0
  65. deltacat/compute/resource_estimation/delta.py +1 -19
  66. deltacat/constants.py +47 -1
  67. deltacat/env.py +51 -0
  68. deltacat/examples/__init__.py +0 -0
  69. deltacat/examples/basic_logging.py +101 -0
  70. deltacat/examples/common/__init__.py +0 -0
  71. deltacat/examples/common/fixtures.py +15 -0
  72. deltacat/examples/hello_world.py +27 -0
  73. deltacat/examples/iceberg/__init__.py +0 -0
  74. deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
  75. deltacat/examples/iceberg/iceberg_reader.py +149 -0
  76. deltacat/exceptions.py +51 -9
  77. deltacat/logs.py +4 -1
  78. deltacat/storage/__init__.py +118 -28
  79. deltacat/storage/iceberg/__init__.py +0 -0
  80. deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
  81. deltacat/storage/iceberg/impl.py +737 -0
  82. deltacat/storage/iceberg/model.py +709 -0
  83. deltacat/storage/interface.py +217 -134
  84. deltacat/storage/main/__init__.py +0 -0
  85. deltacat/storage/main/impl.py +2077 -0
  86. deltacat/storage/model/delta.py +118 -71
  87. deltacat/storage/model/interop.py +24 -0
  88. deltacat/storage/model/list_result.py +8 -0
  89. deltacat/storage/model/locator.py +93 -3
  90. deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
  91. deltacat/storage/model/metafile.py +1316 -0
  92. deltacat/storage/model/namespace.py +34 -18
  93. deltacat/storage/model/partition.py +362 -37
  94. deltacat/storage/model/scan/__init__.py +0 -0
  95. deltacat/storage/model/scan/push_down.py +19 -0
  96. deltacat/storage/model/scan/scan_plan.py +10 -0
  97. deltacat/storage/model/scan/scan_task.py +34 -0
  98. deltacat/storage/model/schema.py +892 -0
  99. deltacat/storage/model/shard.py +47 -0
  100. deltacat/storage/model/sort_key.py +170 -13
  101. deltacat/storage/model/stream.py +208 -80
  102. deltacat/storage/model/table.py +123 -29
  103. deltacat/storage/model/table_version.py +322 -46
  104. deltacat/storage/model/transaction.py +757 -0
  105. deltacat/storage/model/transform.py +198 -61
  106. deltacat/storage/model/types.py +111 -13
  107. deltacat/storage/rivulet/__init__.py +11 -0
  108. deltacat/storage/rivulet/arrow/__init__.py +0 -0
  109. deltacat/storage/rivulet/arrow/serializer.py +75 -0
  110. deltacat/storage/rivulet/dataset.py +744 -0
  111. deltacat/storage/rivulet/dataset_executor.py +87 -0
  112. deltacat/storage/rivulet/feather/__init__.py +5 -0
  113. deltacat/storage/rivulet/feather/file_reader.py +136 -0
  114. deltacat/storage/rivulet/feather/serializer.py +35 -0
  115. deltacat/storage/rivulet/fs/__init__.py +0 -0
  116. deltacat/storage/rivulet/fs/file_provider.py +105 -0
  117. deltacat/storage/rivulet/fs/file_store.py +130 -0
  118. deltacat/storage/rivulet/fs/input_file.py +76 -0
  119. deltacat/storage/rivulet/fs/output_file.py +86 -0
  120. deltacat/storage/rivulet/logical_plan.py +105 -0
  121. deltacat/storage/rivulet/metastore/__init__.py +0 -0
  122. deltacat/storage/rivulet/metastore/delta.py +190 -0
  123. deltacat/storage/rivulet/metastore/json_sst.py +105 -0
  124. deltacat/storage/rivulet/metastore/sst.py +82 -0
  125. deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  126. deltacat/storage/rivulet/mvp/Table.py +101 -0
  127. deltacat/storage/rivulet/mvp/__init__.py +5 -0
  128. deltacat/storage/rivulet/parquet/__init__.py +5 -0
  129. deltacat/storage/rivulet/parquet/data_reader.py +0 -0
  130. deltacat/storage/rivulet/parquet/file_reader.py +127 -0
  131. deltacat/storage/rivulet/parquet/serializer.py +37 -0
  132. deltacat/storage/rivulet/reader/__init__.py +0 -0
  133. deltacat/storage/rivulet/reader/block_scanner.py +378 -0
  134. deltacat/storage/rivulet/reader/data_reader.py +136 -0
  135. deltacat/storage/rivulet/reader/data_scan.py +63 -0
  136. deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
  137. deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
  138. deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
  139. deltacat/storage/rivulet/reader/query_expression.py +99 -0
  140. deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
  141. deltacat/storage/rivulet/schema/__init__.py +0 -0
  142. deltacat/storage/rivulet/schema/datatype.py +128 -0
  143. deltacat/storage/rivulet/schema/schema.py +251 -0
  144. deltacat/storage/rivulet/serializer.py +40 -0
  145. deltacat/storage/rivulet/serializer_factory.py +42 -0
  146. deltacat/storage/rivulet/writer/__init__.py +0 -0
  147. deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
  148. deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
  149. deltacat/tests/_io/__init__.py +1 -0
  150. deltacat/tests/catalog/test_catalogs.py +324 -0
  151. deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
  152. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  153. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  154. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  155. deltacat/tests/compute/compact_partition_test_cases.py +19 -53
  156. deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
  157. deltacat/tests/compute/compactor/utils/test_io.py +6 -8
  158. deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
  159. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
  160. deltacat/tests/compute/conftest.py +75 -0
  161. deltacat/tests/compute/converter/__init__.py +0 -0
  162. deltacat/tests/compute/converter/conftest.py +80 -0
  163. deltacat/tests/compute/converter/test_convert_session.py +478 -0
  164. deltacat/tests/compute/converter/utils.py +123 -0
  165. deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
  166. deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
  167. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
  168. deltacat/tests/compute/test_compact_partition_params.py +3 -3
  169. deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
  170. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
  171. deltacat/tests/compute/test_util_common.py +19 -12
  172. deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
  173. deltacat/tests/local_deltacat_storage/__init__.py +76 -103
  174. deltacat/tests/storage/__init__.py +0 -0
  175. deltacat/tests/storage/conftest.py +25 -0
  176. deltacat/tests/storage/main/__init__.py +0 -0
  177. deltacat/tests/storage/main/test_main_storage.py +1399 -0
  178. deltacat/tests/storage/model/__init__.py +0 -0
  179. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  180. deltacat/tests/storage/model/test_metafile_io.py +2535 -0
  181. deltacat/tests/storage/model/test_schema.py +308 -0
  182. deltacat/tests/storage/model/test_shard.py +22 -0
  183. deltacat/tests/storage/model/test_table_version.py +110 -0
  184. deltacat/tests/storage/model/test_transaction.py +308 -0
  185. deltacat/tests/storage/rivulet/__init__.py +0 -0
  186. deltacat/tests/storage/rivulet/conftest.py +149 -0
  187. deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
  188. deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
  189. deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
  190. deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
  191. deltacat/tests/storage/rivulet/test_dataset.py +406 -0
  192. deltacat/tests/storage/rivulet/test_manifest.py +67 -0
  193. deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
  194. deltacat/tests/storage/rivulet/test_utils.py +122 -0
  195. deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
  196. deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
  197. deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
  198. deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  199. deltacat/tests/test_deltacat_api.py +39 -0
  200. deltacat/tests/test_utils/filesystem.py +14 -0
  201. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  202. deltacat/tests/test_utils/pyarrow.py +8 -15
  203. deltacat/tests/test_utils/storage.py +266 -3
  204. deltacat/tests/utils/test_daft.py +3 -3
  205. deltacat/tests/utils/test_pyarrow.py +0 -432
  206. deltacat/types/partial_download.py +1 -1
  207. deltacat/types/tables.py +1 -1
  208. deltacat/utils/export.py +59 -0
  209. deltacat/utils/filesystem.py +320 -0
  210. deltacat/utils/metafile_locator.py +73 -0
  211. deltacat/utils/pyarrow.py +36 -183
  212. deltacat-2.0.dist-info/METADATA +65 -0
  213. deltacat-2.0.dist-info/RECORD +347 -0
  214. deltacat/aws/redshift/__init__.py +0 -19
  215. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  216. deltacat/io/dataset.py +0 -73
  217. deltacat/io/read_api.py +0 -143
  218. deltacat/storage/model/delete_parameters.py +0 -40
  219. deltacat/storage/model/partition_spec.py +0 -71
  220. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
  221. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
  222. deltacat-1.1.36.dist-info/METADATA +0 -64
  223. deltacat-1.1.36.dist-info/RECORD +0 -219
  224. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  225. /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
  226. /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
  227. /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
  228. /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
  229. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  230. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  231. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  232. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  233. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  234. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
  235. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
  236. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -1,31 +1,50 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
3
 
4
+ import posixpath
4
5
  from typing import Any, Dict, List, Optional
5
6
 
6
- from deltacat.aws.redshift import Manifest, ManifestAuthor, ManifestMeta
7
- from deltacat.storage.model.delete_parameters import DeleteParameters
8
- from deltacat.storage.model.locator import Locator
7
+ import pyarrow
8
+
9
+ from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
10
+ from deltacat.constants import TXN_DIR_NAME
11
+ from deltacat.storage.model.manifest import (
12
+ Manifest,
13
+ ManifestMeta,
14
+ ManifestAuthor,
15
+ )
16
+ from deltacat.storage.model.locator import (
17
+ Locator,
18
+ LocatorName,
19
+ )
9
20
  from deltacat.storage.model.namespace import NamespaceLocator
10
- from deltacat.storage.model.partition import PartitionLocator
21
+ from deltacat.storage.model.partition import (
22
+ PartitionLocator,
23
+ PartitionValues,
24
+ )
11
25
  from deltacat.storage.model.stream import StreamLocator
12
- from deltacat.storage.model.table import TableLocator
26
+ from deltacat.storage.model.table import (
27
+ TableLocator,
28
+ Table,
29
+ )
13
30
  from deltacat.storage.model.table_version import TableVersionLocator
14
- from deltacat.storage.model.types import DeltaType
15
- from deltacat.storage.model.partition_spec import DeltaPartitionSpec, PartitionValues
31
+ from deltacat.storage.model.types import (
32
+ DeltaType,
33
+ StreamFormat,
34
+ )
16
35
 
36
+ DeltaProperties = Dict[str, Any]
17
37
 
18
- class Delta(dict):
38
+
39
+ class Delta(Metafile):
19
40
  @staticmethod
20
41
  def of(
21
42
  locator: Optional[DeltaLocator],
22
43
  delta_type: Optional[DeltaType],
23
44
  meta: Optional[ManifestMeta],
24
- properties: Optional[Dict[str, str]],
45
+ properties: Optional[DeltaProperties],
25
46
  manifest: Optional[Manifest],
26
47
  previous_stream_position: Optional[int] = None,
27
- delete_parameters: Optional[DeleteParameters] = None,
28
- partition_spec: Optional[DeltaPartitionSpec] = None,
29
48
  ) -> Delta:
30
49
  """
31
50
  Creates a Delta metadata model with the given Delta Locator, Delta Type,
@@ -39,8 +58,6 @@ class Delta(dict):
39
58
  delta.properties = properties
40
59
  delta.manifest = manifest
41
60
  delta.previous_stream_position = previous_stream_position
42
- delta.delete_parameters = delete_parameters
43
- delta.partition_spec = partition_spec
44
61
  return delta
45
62
 
46
63
  @staticmethod
@@ -48,7 +65,7 @@ class Delta(dict):
48
65
  deltas: List[Delta],
49
66
  manifest_author: Optional[ManifestAuthor] = None,
50
67
  stream_position: Optional[int] = None,
51
- properties: Optional[Dict[str, str]] = None,
68
+ properties: Optional[DeltaProperties] = None,
52
69
  ) -> Delta:
53
70
  """
54
71
  Merges the input list of deltas into a single delta. All input deltas to
@@ -93,25 +110,10 @@ class Delta(dict):
93
110
  f"Deltas to merge must all share the same delta type "
94
111
  f"(found {len(distinct_delta_types)} delta types)."
95
112
  )
96
- distinct_partition_spec = set([d.partition_spec for d in deltas])
97
- if len(distinct_partition_spec) > 1:
98
- raise ValueError(
99
- f"Deltas to merge must all share the same partition spec "
100
- f"(found {len(distinct_partition_spec)} partition specs)."
101
- )
102
113
  merged_manifest = Manifest.merge_manifests(
103
114
  manifests,
104
115
  manifest_author,
105
116
  )
106
- distinct_delta_type = list(distinct_delta_types)[0]
107
- merged_delete_parameters = None
108
- if distinct_delta_type is DeltaType.DELETE:
109
- delete_parameters: List[DeleteParameters] = [
110
- d.delete_parameters for d in deltas if d.delete_parameters
111
- ]
112
- merged_delete_parameters: Optional[
113
- DeleteParameters
114
- ] = DeleteParameters.merge_delete_parameters(delete_parameters)
115
117
  partition_locator = deltas[0].partition_locator
116
118
  prev_positions = [d.previous_stream_position for d in deltas]
117
119
  prev_position = None if None in prev_positions else max(prev_positions)
@@ -122,7 +124,6 @@ class Delta(dict):
122
124
  properties,
123
125
  merged_manifest,
124
126
  prev_position,
125
- merged_delete_parameters,
126
127
  )
127
128
 
128
129
  @property
@@ -148,11 +149,11 @@ class Delta(dict):
148
149
  self["meta"] = meta
149
150
 
150
151
  @property
151
- def properties(self) -> Optional[Dict[str, str]]:
152
+ def properties(self) -> Optional[DeltaProperties]:
152
153
  return self.get("properties")
153
154
 
154
155
  @properties.setter
155
- def properties(self, properties: Optional[Dict[str, str]]) -> None:
156
+ def properties(self, properties: Optional[DeltaProperties]) -> None:
156
157
  self["properties"] = properties
157
158
 
158
159
  @property
@@ -222,7 +223,7 @@ class Delta(dict):
222
223
  def storage_type(self) -> Optional[str]:
223
224
  delta_locator = self.locator
224
225
  if delta_locator:
225
- return delta_locator.storage_type
226
+ return delta_locator.stream_format
226
227
  return None
227
228
 
228
229
  @property
@@ -253,6 +254,13 @@ class Delta(dict):
253
254
  return delta_locator.stream_id
254
255
  return None
255
256
 
257
+ @property
258
+ def stream_format(self) -> Optional[str]:
259
+ delta_locator = self.locator
260
+ if delta_locator:
261
+ return delta_locator.stream_format
262
+ return None
263
+
256
264
  @property
257
265
  def partition_id(self) -> Optional[str]:
258
266
  delta_locator = self.locator
@@ -274,27 +282,64 @@ class Delta(dict):
274
282
  return delta_locator.stream_position
275
283
  return None
276
284
 
277
- @property
278
- def delete_parameters(self) -> Optional[DeleteParameters]:
279
- delete_parameters = self.get("delete_parameters")
280
- return (
281
- None if delete_parameters is None else DeleteParameters(delete_parameters)
282
- )
285
+ def to_serializable(self) -> Delta:
286
+ serializable = self
287
+ if serializable.table_locator:
288
+ serializable: Delta = Delta.update_for(self)
289
+ # remove the mutable table locator
290
+ serializable.table_version_locator.table_locator = TableLocator.at(
291
+ namespace=self.id,
292
+ table_name=self.id,
293
+ )
294
+ return serializable
295
+
296
+ def from_serializable(
297
+ self,
298
+ path: str,
299
+ filesystem: Optional[pyarrow.fs.FileSystem] = None,
300
+ ) -> Delta:
301
+ # TODO(pdames): Lazily restore table locator on 1st property get.
302
+ # Cache Metafile ID <-> Table/Namespace-Name map at Catalog Init, then
303
+ # swap only Metafile IDs with Names here.
304
+ if self.table_locator and self.table_locator.table_name == self.id:
305
+ parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
306
+ base_metafile_path=path,
307
+ parent_number=4,
308
+ )
309
+ txn_log_dir = posixpath.join(
310
+ posixpath.dirname(
311
+ posixpath.dirname(
312
+ posixpath.dirname(parent_rev_dir_path),
313
+ )
314
+ ),
315
+ TXN_DIR_NAME,
316
+ )
317
+ table = Table.read(
318
+ MetafileRevisionInfo.latest_revision(
319
+ revision_dir_path=parent_rev_dir_path,
320
+ filesystem=filesystem,
321
+ success_txn_log_dir=txn_log_dir,
322
+ ).path,
323
+ filesystem,
324
+ )
325
+ self.table_version_locator.table_locator = table.locator
326
+ return self
283
327
 
284
- @delete_parameters.setter
285
- def delete_parameters(self, delete_parameters: Optional[DeleteParameters]) -> None:
286
- self["delete_parameters"] = delete_parameters
328
+
329
+ class DeltaLocatorName(LocatorName):
330
+ def __init__(self, locator: DeltaLocator):
331
+ self.locator = locator
287
332
 
288
333
  @property
289
- def partition_spec(self) -> Optional[DeltaPartitionSpec]:
290
- val: Dict[str, Any] = self.get("partitionSpec")
291
- if val is not None and not isinstance(val, DeltaPartitionSpec):
292
- self.partition_spec = val = DeltaPartitionSpec(val)
293
- return val
334
+ def immutable_id(self) -> Optional[str]:
335
+ return str(self.locator.stream_position)
294
336
 
295
- @partition_spec.setter
296
- def partition_spec(self, value: Optional[DeltaPartitionSpec]) -> None:
297
- self["partitionSpec"] = value
337
+ @immutable_id.setter
338
+ def immutable_id(self, immutable_id: Optional[str]):
339
+ self.locator.stream_position = int(immutable_id)
340
+
341
+ def parts(self) -> List[str]:
342
+ return [str(self.locator.stream_position)]
298
343
 
299
344
 
300
345
  class DeltaLocator(Locator, dict):
@@ -318,25 +363,37 @@ class DeltaLocator(Locator, dict):
318
363
  table_name: Optional[str],
319
364
  table_version: Optional[str],
320
365
  stream_id: Optional[str],
321
- storage_type: Optional[str],
366
+ stream_format: Optional[StreamFormat],
322
367
  partition_values: Optional[PartitionValues],
323
368
  partition_id: Optional[str],
324
369
  stream_position: Optional[int],
325
370
  ) -> DeltaLocator:
326
- partition_locator = PartitionLocator.at(
327
- namespace,
328
- table_name,
329
- table_version,
330
- stream_id,
331
- storage_type,
332
- partition_values,
333
- partition_id,
371
+ partition_locator = (
372
+ PartitionLocator.at(
373
+ namespace,
374
+ table_name,
375
+ table_version,
376
+ stream_id,
377
+ stream_format,
378
+ partition_values,
379
+ partition_id,
380
+ )
381
+ if partition_values and partition_id
382
+ else None
334
383
  )
335
384
  return DeltaLocator.of(
336
385
  partition_locator,
337
386
  stream_position,
338
387
  )
339
388
 
389
+ @property
390
+ def name(self):
391
+ return DeltaLocatorName(self)
392
+
393
+ @property
394
+ def parent(self) -> Optional[PartitionLocator]:
395
+ return self.partition_locator
396
+
340
397
  @property
341
398
  def partition_locator(self) -> Optional[PartitionLocator]:
342
399
  val: Dict[str, Any] = self.get("partitionLocator")
@@ -406,10 +463,10 @@ class DeltaLocator(Locator, dict):
406
463
  return None
407
464
 
408
465
  @property
409
- def storage_type(self) -> Optional[str]:
466
+ def stream_format(self) -> Optional[str]:
410
467
  partition_locator = self.partition_locator
411
468
  if partition_locator:
412
- return partition_locator.storage_type
469
+ return partition_locator.stream_format
413
470
  return None
414
471
 
415
472
  @property
@@ -432,13 +489,3 @@ class DeltaLocator(Locator, dict):
432
489
  if partition_locator:
433
490
  return partition_locator.table_version
434
491
  return None
435
-
436
- def canonical_string(self) -> str:
437
- """
438
- Returns a unique string for the given locator that can be used
439
- for equality checks (i.e. two locators are equal if they have
440
- the same canonical string).
441
- """
442
- pl_hexdigest = self.partition_locator.hexdigest()
443
- stream_position = self.stream_position
444
- return f"{pl_hexdigest}|{stream_position}"
@@ -0,0 +1,24 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Generic, Optional, TypeVar
3
+
4
+ T = TypeVar("T")
5
+ U = TypeVar("U")
6
+
7
+
8
+ class ModelMapper(ABC, Generic[T, U]):
9
+ @staticmethod
10
+ @abstractmethod
11
+ def map(obj: Optional[T], *args, **kwargs) -> Optional[U]:
12
+ pass
13
+
14
+ @staticmethod
15
+ @abstractmethod
16
+ def unmap(obj: Optional[U], **kwargs) -> Optional[T]:
17
+ pass
18
+
19
+
20
+ class OneWayModelMapper(ABC, Generic[T, U]):
21
+ @staticmethod
22
+ @abstractmethod
23
+ def map(obj: Optional[T], **kwargs) -> Optional[U]:
24
+ pass
@@ -21,6 +21,14 @@ class ListResult(dict, Generic[T]):
21
21
  list_result["nextPageProvider"] = next_page_provider
22
22
  return list_result
23
23
 
24
+ @staticmethod
25
+ def empty() -> ListResult:
26
+ list_result = ListResult()
27
+ list_result["items"] = []
28
+ list_result["paginationKey"] = None
29
+ list_result["nextPageProvider"] = None
30
+ return list_result
31
+
24
32
  def read_page(self) -> Optional[List[T]]:
25
33
  return self.get("items")
26
34
 
@@ -1,14 +1,104 @@
1
+ # Allow classes to use self-referencing Type hints in Python 3.7.
2
+ from __future__ import annotations
3
+
4
+ from typing import Optional, List
5
+
1
6
  from deltacat.utils.common import sha1_digest, sha1_hexdigest
2
7
 
8
+ DEFAULT_NAME_SEPARATOR = "|"
9
+ DEFAULT_PATH_SEPARATOR = "/"
10
+
11
+
12
+ class LocatorName:
13
+ """
14
+ Assigns a name to a catalog object. All sibling catalog objects must be
15
+ assigned unique names (e.g., all namespaces in a catalog must be assigned
16
+ unique locator names, all tables under a namespace must be assigned unique
17
+ locator names, etc.). Names may be mutable (e.g., namespace and table names)
18
+ or immutable (e.g., partition/stream IDs and delta stream positions). Names
19
+ may be single or multi-part.
20
+ """
21
+
22
+ @property
23
+ def immutable_id(self) -> Optional[str]:
24
+ """
25
+ If this locator name is immutable (i.e., if the object it refers to
26
+ can't be renamed) then returns an immutable ID suitable for use in
27
+ URLS or filesystem paths. Returns None if this locator name is mutable
28
+ (i.e., if the object it refers to can be renamed).
29
+ """
30
+ raise NotImplementedError()
31
+
32
+ @immutable_id.setter
33
+ def immutable_id(self, immutable_id: Optional[str]) -> None:
34
+ """
35
+ If this locator name is immutable (i.e., if the object it refers to
36
+ can't be renamed), then sets an immutable ID for this
37
+ locator name suitable for use in URLS or filesystem paths. Note that
38
+ the ID is only considered immutable in durable catalog storage, and
39
+ remains mutable in transient memory (i.e., this setter remains
40
+ functional regardless of whether an ID is already assigned, but each
41
+ update causes it to refer to a new, distinct object in durable storage).
42
+ """
43
+ raise NotImplementedError()
44
+
45
+ def parts(self) -> List[str]:
46
+ """
47
+ Returns the ordered parts of this locator's name.
48
+ """
49
+ raise NotImplementedError()
50
+
51
+ def join(self, separator: str = DEFAULT_NAME_SEPARATOR) -> str:
52
+ """
53
+ Returns this locator name as a string by joining its parts with the
54
+ given separator.
55
+ """
56
+ return separator.join(self.parts())
57
+
58
+ def exists(self) -> bool:
59
+ """
60
+ Returns True if this locator name is defined, False otherwise.
61
+ """
62
+ return self.immutable_id or all(self.parts())
63
+
3
64
 
4
65
  class Locator:
5
- def canonical_string(self) -> str:
66
+ """
67
+ Creates a globally unique reference to any named catalog object. Locators
68
+ are composed of the name of the referenced catalog object and its parent
69
+ Locator (if any). Every Locator has a canonical string representation that
70
+ can be used for global equality checks. Cryptographic digests of this
71
+ canonical string can be used for uniform random hash distribution and
72
+ path-based references to the underlying catalog object in filesystems or
73
+ URLs.
74
+ """
75
+
76
+ @property
77
+ def name(self) -> LocatorName:
78
+ """
79
+ Returns the name of this locator.
80
+ """
81
+ raise NotImplementedError()
82
+
83
+ @property
84
+ def parent(self) -> Optional[Locator]:
85
+ """
86
+ Returns the parent of this locator, if any.
87
+ """
88
+ raise NotImplementedError()
89
+
90
+ def canonical_string(self, separator: str = DEFAULT_NAME_SEPARATOR) -> str:
6
91
  """
7
92
  Returns a unique string for the given locator that can be used
8
93
  for equality checks (i.e. two locators are equal if they have
9
94
  the same canonical string).
10
95
  """
11
- raise NotImplementedError()
96
+ parts = []
97
+ parent_hexdigest = self.parent.hexdigest() if self.parent else None
98
+ if parent_hexdigest:
99
+ parts.append(parent_hexdigest)
100
+ parts.extend(self.name.parts())
101
+ return separator.join([str(part) for part in parts])
12
102
 
13
103
  def digest(self) -> bytes:
14
104
  """
@@ -26,7 +116,7 @@ class Locator:
26
116
  """
27
117
  return sha1_hexdigest(self.canonical_string().encode("utf-8"))
28
118
 
29
- def path(self, root: str, separator: str = "/") -> str:
119
+ def path(self, root: str, separator: str = DEFAULT_PATH_SEPARATOR) -> str:
30
120
  """
31
121
  Returns a path for the locator of the form: "{root}/{hexdigest}", where
32
122
  the default path separator of "/" may optionally be overridden with