deltacat 2.0.0b11__py3-none-any.whl → 2.0.0b12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. deltacat/__init__.py +78 -3
  2. deltacat/api.py +122 -67
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/conftest.py +0 -18
  6. deltacat/catalog/__init__.py +2 -0
  7. deltacat/catalog/delegate.py +445 -63
  8. deltacat/catalog/interface.py +188 -62
  9. deltacat/catalog/main/impl.py +2417 -271
  10. deltacat/catalog/model/catalog.py +49 -10
  11. deltacat/catalog/model/properties.py +38 -0
  12. deltacat/compute/compactor/compaction_session.py +97 -75
  13. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  14. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  15. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  16. deltacat/compute/compactor/repartition_session.py +8 -21
  17. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  18. deltacat/compute/compactor/steps/materialize.py +9 -7
  19. deltacat/compute/compactor/steps/repartition.py +12 -11
  20. deltacat/compute/compactor/utils/io.py +6 -5
  21. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  22. deltacat/compute/compactor/utils/system_columns.py +3 -1
  23. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  24. deltacat/compute/compactor_v2/constants.py +30 -1
  25. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  26. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  27. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  28. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  29. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  30. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  31. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  32. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  33. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  34. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  35. deltacat/compute/compactor_v2/utils/io.py +11 -4
  36. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  37. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  38. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  39. deltacat/compute/converter/converter_session.py +145 -32
  40. deltacat/compute/converter/model/convert_input.py +26 -19
  41. deltacat/compute/converter/model/convert_input_files.py +33 -16
  42. deltacat/compute/converter/model/convert_result.py +35 -16
  43. deltacat/compute/converter/model/converter_session_params.py +24 -21
  44. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  45. deltacat/compute/converter/pyiceberg/overrides.py +18 -9
  46. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  47. deltacat/compute/converter/steps/convert.py +157 -50
  48. deltacat/compute/converter/steps/dedupe.py +24 -11
  49. deltacat/compute/converter/utils/convert_task_options.py +27 -12
  50. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  51. deltacat/compute/converter/utils/iceberg_columns.py +8 -8
  52. deltacat/compute/converter/utils/io.py +101 -12
  53. deltacat/compute/converter/utils/s3u.py +33 -27
  54. deltacat/compute/janitor.py +205 -0
  55. deltacat/compute/jobs/client.py +19 -8
  56. deltacat/compute/resource_estimation/delta.py +38 -6
  57. deltacat/compute/resource_estimation/model.py +8 -0
  58. deltacat/constants.py +44 -0
  59. deltacat/docs/autogen/schema/__init__.py +0 -0
  60. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  61. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  62. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  63. deltacat/examples/compactor/__init__.py +0 -0
  64. deltacat/examples/compactor/aws/__init__.py +1 -0
  65. deltacat/examples/compactor/bootstrap.py +863 -0
  66. deltacat/examples/compactor/compactor.py +373 -0
  67. deltacat/examples/compactor/explorer.py +473 -0
  68. deltacat/examples/compactor/gcp/__init__.py +1 -0
  69. deltacat/examples/compactor/job_runner.py +439 -0
  70. deltacat/examples/compactor/utils/__init__.py +1 -0
  71. deltacat/examples/compactor/utils/common.py +261 -0
  72. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  73. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  74. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  80. deltacat/exceptions.py +66 -4
  81. deltacat/experimental/catalog/iceberg/impl.py +2 -2
  82. deltacat/experimental/compatibility/__init__.py +0 -0
  83. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  84. deltacat/experimental/converter_agent/__init__.py +0 -0
  85. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  86. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  87. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  88. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +105 -4
  89. deltacat/experimental/storage/iceberg/impl.py +5 -3
  90. deltacat/experimental/storage/iceberg/model.py +7 -3
  91. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  92. deltacat/experimental/storage/rivulet/dataset.py +0 -3
  93. deltacat/experimental/storage/rivulet/metastore/delta.py +0 -2
  94. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +3 -2
  95. deltacat/io/datasource/deltacat_datasource.py +0 -1
  96. deltacat/storage/__init__.py +20 -2
  97. deltacat/storage/interface.py +54 -32
  98. deltacat/storage/main/impl.py +1494 -541
  99. deltacat/storage/model/delta.py +27 -3
  100. deltacat/storage/model/locator.py +6 -12
  101. deltacat/storage/model/manifest.py +182 -6
  102. deltacat/storage/model/metafile.py +151 -78
  103. deltacat/storage/model/namespace.py +8 -1
  104. deltacat/storage/model/partition.py +117 -42
  105. deltacat/storage/model/schema.py +2427 -159
  106. deltacat/storage/model/sort_key.py +40 -0
  107. deltacat/storage/model/stream.py +9 -2
  108. deltacat/storage/model/table.py +12 -1
  109. deltacat/storage/model/table_version.py +11 -0
  110. deltacat/storage/model/transaction.py +1184 -208
  111. deltacat/storage/model/transform.py +81 -2
  112. deltacat/storage/model/types.py +48 -26
  113. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  114. deltacat/tests/aws/test_s3u.py +2 -31
  115. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1606 -70
  116. deltacat/tests/catalog/test_catalogs.py +54 -11
  117. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -71
  118. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  119. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  120. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  121. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  122. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  123. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  124. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  125. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  126. deltacat/tests/compute/conftest.py +8 -44
  127. deltacat/tests/compute/converter/test_convert_session.py +675 -490
  128. deltacat/tests/compute/converter/utils.py +15 -6
  129. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  130. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  131. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  132. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  133. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  134. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  135. deltacat/tests/compute/test_janitor.py +236 -0
  136. deltacat/tests/compute/test_util_common.py +716 -43
  137. deltacat/tests/compute/test_util_constant.py +0 -1
  138. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  139. deltacat/tests/experimental/__init__.py +1 -0
  140. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  141. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  142. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  143. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  144. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  145. deltacat/tests/storage/model/test_schema.py +171 -0
  146. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  147. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  148. deltacat/tests/storage/model/test_transaction.py +393 -48
  149. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  150. deltacat/tests/test_deltacat_api.py +988 -4
  151. deltacat/tests/test_exceptions.py +9 -5
  152. deltacat/tests/test_utils/pyarrow.py +52 -21
  153. deltacat/tests/test_utils/storage.py +23 -34
  154. deltacat/tests/types/__init__.py +0 -0
  155. deltacat/tests/types/test_tables.py +104 -0
  156. deltacat/tests/utils/exceptions.py +22 -0
  157. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  158. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  159. deltacat/tests/utils/test_daft.py +121 -31
  160. deltacat/tests/utils/test_numpy.py +1193 -0
  161. deltacat/tests/utils/test_pandas.py +1106 -0
  162. deltacat/tests/utils/test_polars.py +1040 -0
  163. deltacat/tests/utils/test_pyarrow.py +1370 -89
  164. deltacat/types/media.py +221 -11
  165. deltacat/types/tables.py +2329 -59
  166. deltacat/utils/arguments.py +33 -1
  167. deltacat/utils/daft.py +411 -150
  168. deltacat/utils/filesystem.py +100 -0
  169. deltacat/utils/metafile_locator.py +2 -1
  170. deltacat/utils/numpy.py +118 -26
  171. deltacat/utils/pandas.py +577 -48
  172. deltacat/utils/polars.py +658 -27
  173. deltacat/utils/pyarrow.py +1258 -213
  174. deltacat/utils/ray_utils/dataset.py +101 -10
  175. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  176. deltacat/utils/url.py +56 -15
  177. deltacat-2.0.0b12.dist-info/METADATA +1163 -0
  178. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/RECORD +183 -145
  179. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
  180. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  181. deltacat/compute/merge_on_read/__init__.py +0 -4
  182. deltacat/compute/merge_on_read/daft.py +0 -40
  183. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  184. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  185. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  186. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  187. deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
  188. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  189. deltacat/utils/s3fs.py +0 -21
  190. deltacat-2.0.0b11.dist-info/METADATA +0 -67
  191. /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
  192. /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
  193. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
  194. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,17 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
3
 
4
- import base64
4
+ import json
5
5
  import posixpath
6
6
 
7
7
  import pyarrow
8
- import pyarrow as pa
9
8
 
10
- from typing import Any, Dict, List, Optional
9
+ from typing import Any, Dict, List, Optional, TYPE_CHECKING
11
10
 
12
11
  from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
13
- from deltacat.constants import METAFILE_FORMAT, METAFILE_FORMAT_JSON, TXN_DIR_NAME
12
+ from deltacat.constants import TXN_DIR_NAME
14
13
  from deltacat.storage.model.schema import (
15
14
  FieldLocator,
16
- Schema,
17
15
  )
18
16
  from deltacat.storage.model.locator import (
19
17
  Locator,
@@ -33,38 +31,50 @@ from deltacat.storage.model.types import (
33
31
  )
34
32
  from deltacat.types.media import ContentType
35
33
 
34
+ if TYPE_CHECKING:
35
+ from deltacat.compute.compactor import RoundCompletionInfo
36
+
36
37
 
37
38
  """
38
39
  An ordered list of partition values. Partition values are typically derived
39
40
  by applying one or more transforms to a table's fields.
40
41
  """
41
42
  PartitionValues = List[Any]
43
+
44
+ """
45
+ Constants for special partition types.
46
+ """
47
+ UNPARTITIONED_SCHEME_NAME = "unpartitioned_scheme"
42
48
  UNPARTITIONED_SCHEME_ID = "deadbeef-7277-49a4-a195-fdc8ed235d42"
49
+ UNKNOWN_PARTITION_ID = "deadbeef-2fe7-4557-82c9-da53b1862003" # a partition ID that is assumed to exist but is not known
50
+ UNSPECIFIED_PARTITION_ID = "deadbeef-5bff-41ea-b82c-e531f445632b" # a partition ID that has been left intentionally unspecified
43
51
 
44
52
 
45
53
  class Partition(Metafile):
46
54
  @staticmethod
47
55
  def of(
48
56
  locator: Optional[PartitionLocator],
49
- schema: Optional[Schema],
50
57
  content_types: Optional[List[ContentType]],
51
58
  state: Optional[CommitState] = None,
52
59
  previous_stream_position: Optional[int] = None,
53
60
  previous_partition_id: Optional[str] = None,
54
61
  stream_position: Optional[int] = None,
55
62
  partition_scheme_id: Optional[str] = None,
63
+ compaction_round_completion_info: Optional[RoundCompletionInfo] = None,
56
64
  ) -> Partition:
57
65
  partition = Partition()
58
66
  partition.locator = locator
59
- partition.schema = schema
60
67
  partition.content_types = content_types
61
68
  partition.state = state
62
69
  partition.previous_stream_position = previous_stream_position
63
70
  partition.previous_partition_id = previous_partition_id
64
71
  partition.stream_position = stream_position
65
72
  partition.partition_scheme_id = (
66
- partition_scheme_id if locator.partition_values else UNPARTITIONED_SCHEME_ID
73
+ partition_scheme_id
74
+ if locator and locator.partition_values
75
+ else UNPARTITIONED_SCHEME_ID
67
76
  )
77
+ partition.compaction_round_completion_info = compaction_round_completion_info
68
78
  return partition
69
79
 
70
80
  @property
@@ -82,17 +92,6 @@ class Partition(Metafile):
82
92
  def locator_alias(self) -> Optional[PartitionLocatorAlias]:
83
93
  return PartitionLocatorAlias.of(self)
84
94
 
85
- @property
86
- def schema(self) -> Optional[Schema]:
87
- val: Dict[str, Any] = self.get("schema")
88
- if val is not None and not isinstance(val, Schema):
89
- self.schema = val = Schema(val)
90
- return val
91
-
92
- @schema.setter
93
- def schema(self, schema: Optional[Schema]) -> None:
94
- self["schema"] = schema
95
-
96
95
  @property
97
96
  def content_types(self) -> Optional[List[ContentType]]:
98
97
  content_types = self.get("contentTypes")
@@ -149,6 +148,27 @@ class Partition(Metafile):
149
148
  def partition_scheme_id(self, partition_scheme_id: Optional[str]) -> None:
150
149
  self["partitionSchemeId"] = partition_scheme_id
151
150
 
151
+ @property
152
+ def compaction_round_completion_info(self) -> Optional[RoundCompletionInfo]:
153
+ """
154
+ Round completion info for compaction operations.
155
+ This replaces the need for separate round completion files.
156
+ """
157
+ val: Dict[str, Any] = self.get("compactionRoundCompletionInfo")
158
+ if val is not None:
159
+ # Import here to avoid circular imports
160
+ from deltacat.compute.compactor import RoundCompletionInfo
161
+
162
+ if not isinstance(val, RoundCompletionInfo):
163
+ self["compactionRoundCompletionInfo"] = val = RoundCompletionInfo(val)
164
+ return val
165
+
166
+ @compaction_round_completion_info.setter
167
+ def compaction_round_completion_info(
168
+ self, compaction_round_completion_info: Optional[RoundCompletionInfo]
169
+ ) -> None:
170
+ self["compactionRoundCompletionInfo"] = compaction_round_completion_info
171
+
152
172
  @property
153
173
  def partition_id(self) -> Optional[str]:
154
174
  partition_locator = self.locator
@@ -175,6 +195,7 @@ class Partition(Metafile):
175
195
  partition_locator = self.locator
176
196
  if partition_locator:
177
197
  return partition_locator.partition_values
198
+ return None
178
199
 
179
200
  @property
180
201
  def namespace_locator(self) -> Optional[NamespaceLocator]:
@@ -232,6 +253,13 @@ class Partition(Metafile):
232
253
  return partition_locator.table_version
233
254
  return None
234
255
 
256
+ def url(self, catalog_name: Optional[str] = None) -> str:
257
+ return (
258
+ f"dc://{catalog_name}/{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/{json.dumps(self.partition_values)}/"
259
+ if catalog_name
260
+ else f"table://{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/{json.dumps(self.partition_values)}/"
261
+ )
262
+
235
263
  def is_supported_content_type(self, content_type: ContentType) -> bool:
236
264
  supported_content_types = self.content_types
237
265
  return (not supported_content_types) or (
@@ -240,14 +268,6 @@ class Partition(Metafile):
240
268
 
241
269
  def to_serializable(self) -> Partition:
242
270
  serializable: Partition = Partition.update_for(self)
243
- if serializable.schema:
244
- schema_bytes = serializable.schema.serialize().to_pybytes()
245
- serializable.schema = (
246
- base64.b64encode(schema_bytes).decode("utf-8")
247
- if METAFILE_FORMAT == METAFILE_FORMAT_JSON
248
- else schema_bytes
249
- )
250
-
251
271
  if serializable.table_locator:
252
272
  # replace the mutable table locator
253
273
  serializable.table_version_locator.table_locator = TableLocator.at(
@@ -261,17 +281,6 @@ class Partition(Metafile):
261
281
  path: str,
262
282
  filesystem: Optional[pyarrow.fs.FileSystem] = None,
263
283
  ) -> Partition:
264
- if self.get("schema"):
265
- schema_data = self["schema"]
266
- schema_bytes = (
267
- base64.b64decode(schema_data)
268
- if METAFILE_FORMAT == METAFILE_FORMAT_JSON
269
- else schema_data
270
- )
271
- self["schema"] = Schema.deserialize(pa.py_buffer(schema_bytes))
272
- else:
273
- self["schema"] = None
274
-
275
284
  # restore the table locator from its mapped immutable metafile ID
276
285
  if self.table_locator and self.table_locator.table_name == self.id:
277
286
  parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
@@ -358,7 +367,7 @@ class PartitionLocator(Locator, dict):
358
367
  stream_id,
359
368
  stream_format,
360
369
  )
361
- if stream_id and stream_format
370
+ if stream_format or stream_id
362
371
  else None
363
372
  )
364
373
  return PartitionLocator.of(
@@ -392,7 +401,9 @@ class PartitionLocator(Locator, dict):
392
401
 
393
402
  @partition_values.setter
394
403
  def partition_values(self, partition_values: Optional[PartitionValues]) -> None:
395
- self["partitionValues"] = partition_values
404
+ self["partitionValues"] = (
405
+ partition_values or None
406
+ ) # normalize empty partition values to None
396
407
 
397
408
  @property
398
409
  def partition_id(self) -> Optional[str]:
@@ -468,6 +479,12 @@ class PartitionKey(dict):
468
479
  transform: Optional[Transform] = None,
469
480
  native_object: Optional[Any] = None,
470
481
  ) -> PartitionKey:
482
+ if (
483
+ len(key) > 1
484
+ and transform is not None
485
+ and not transform.is_multi_field_transform
486
+ ):
487
+ raise ValueError(f"{len(key)} keys given for 1-key transform.")
471
488
  return PartitionKey(
472
489
  {
473
490
  "key": key,
@@ -536,6 +553,10 @@ class PartitionKeyList(List[PartitionKey]):
536
553
  self[item] = val = PartitionKey(val)
537
554
  return val
538
555
 
556
+ def __iter__(self):
557
+ for i in range(len(self)):
558
+ yield self[i] # This triggers __getitem__ conversion
559
+
539
560
 
540
561
  class PartitionScheme(dict):
541
562
  @staticmethod
@@ -545,6 +566,40 @@ class PartitionScheme(dict):
545
566
  scheme_id: Optional[str] = None,
546
567
  native_object: Optional[Any] = None,
547
568
  ) -> PartitionScheme:
569
+ # Validate keys if provided
570
+ if keys is not None:
571
+ # Check for empty keys list
572
+ if len(keys) == 0:
573
+ raise ValueError("Partition scheme cannot have empty keys list")
574
+
575
+ # Check for duplicate keys (by field locators and transform types) and names
576
+ seen_key_transform_pairs = set()
577
+ seen_names = set()
578
+ for key in keys:
579
+ # Check for duplicate field locators with identical transform types
580
+ key_tuple = tuple(key.key) if key.key else ()
581
+ transform_type = type(key.transform) if key.transform else None
582
+ key_transform_pair = (key_tuple, transform_type)
583
+
584
+ if key_transform_pair in seen_key_transform_pairs:
585
+ # Use the first field locator for the error message
586
+ key_name = key.key[0] if key.key else "unknown"
587
+ transform_name = (
588
+ transform_type.__name__ if transform_type else "None"
589
+ )
590
+ raise ValueError(
591
+ f"Duplicate partition key found: {key_name} with transform type {transform_name}"
592
+ )
593
+ seen_key_transform_pairs.add(key_transform_pair)
594
+
595
+ # Check for duplicate names (when specified)
596
+ if key.name is not None:
597
+ if key.name in seen_names:
598
+ raise ValueError(
599
+ f"Duplicate partition key name found: {key.name}"
600
+ )
601
+ seen_names.add(key.name)
602
+
548
603
  return PartitionScheme(
549
604
  {
550
605
  "keys": keys,
@@ -565,6 +620,15 @@ class PartitionScheme(dict):
565
620
  return False
566
621
  if not isinstance(other, PartitionScheme):
567
622
  other = PartitionScheme(other)
623
+ # If both have None keys, they are equivalent (for unpartitioned schemes)
624
+ if self.keys is None and other.keys is None:
625
+ return not check_identifiers or (
626
+ self.name == other.name and self.id == other.id
627
+ )
628
+ # If only one has None keys, they are not equivalent
629
+ if self.keys is None or other.keys is None:
630
+ return False
631
+ # Compare keys if both have them
568
632
  for i in range(len(self.keys)):
569
633
  if not self.keys[i].equivalent_to(other.keys[i], check_identifiers):
570
634
  return False
@@ -592,6 +656,13 @@ class PartitionScheme(dict):
592
656
  return self.get("nativeObject")
593
657
 
594
658
 
659
+ UNPARTITIONED_SCHEME = PartitionScheme.of(
660
+ keys=None,
661
+ name=UNPARTITIONED_SCHEME_NAME,
662
+ scheme_id=UNPARTITIONED_SCHEME_ID,
663
+ )
664
+
665
+
595
666
  class PartitionSchemeList(List[PartitionScheme]):
596
667
  @staticmethod
597
668
  def of(items: List[PartitionScheme]) -> PartitionSchemeList:
@@ -608,6 +679,10 @@ class PartitionSchemeList(List[PartitionScheme]):
608
679
  self[item] = val = PartitionScheme(val)
609
680
  return val
610
681
 
682
+ def __iter__(self):
683
+ for i in range(len(self)):
684
+ yield self[i] # This triggers __getitem__ conversion
685
+
611
686
 
612
687
  class PartitionLocatorAliasName(LocatorName):
613
688
  def __init__(self, locator: PartitionLocatorAlias):
@@ -639,8 +714,8 @@ class PartitionLocatorAlias(Locator, dict):
639
714
  ),
640
715
  }
641
716
  )
642
- if parent_partition.state == CommitState.COMMITTED
643
- else None # only committed partitions can be resolved by alias
717
+ if parent_partition.state != CommitState.STAGED
718
+ else None # staged partitions cannot be resolved by alias
644
719
  )
645
720
 
646
721
  @property