deltacat 2.0.0b10__py3-none-any.whl → 2.0.0b12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (298) hide show
  1. deltacat/__init__.py +96 -17
  2. deltacat/api.py +122 -67
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +0 -18
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2435 -279
  12. deltacat/catalog/model/catalog.py +154 -77
  13. deltacat/catalog/model/properties.py +63 -22
  14. deltacat/compute/compactor/compaction_session.py +97 -75
  15. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  16. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  17. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  18. deltacat/compute/compactor/repartition_session.py +8 -21
  19. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  20. deltacat/compute/compactor/steps/materialize.py +9 -7
  21. deltacat/compute/compactor/steps/repartition.py +12 -11
  22. deltacat/compute/compactor/utils/io.py +6 -5
  23. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  24. deltacat/compute/compactor/utils/system_columns.py +3 -1
  25. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  26. deltacat/compute/compactor_v2/constants.py +30 -1
  27. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  28. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  29. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  30. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  31. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  32. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  33. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  34. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  35. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  36. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  37. deltacat/compute/compactor_v2/utils/io.py +11 -4
  38. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  39. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  40. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  41. deltacat/compute/converter/converter_session.py +145 -32
  42. deltacat/compute/converter/model/convert_input.py +26 -19
  43. deltacat/compute/converter/model/convert_input_files.py +33 -16
  44. deltacat/compute/converter/model/convert_result.py +35 -16
  45. deltacat/compute/converter/model/converter_session_params.py +24 -21
  46. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  47. deltacat/compute/converter/pyiceberg/overrides.py +18 -9
  48. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  49. deltacat/compute/converter/steps/convert.py +157 -50
  50. deltacat/compute/converter/steps/dedupe.py +24 -11
  51. deltacat/compute/converter/utils/convert_task_options.py +27 -12
  52. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  53. deltacat/compute/converter/utils/iceberg_columns.py +8 -8
  54. deltacat/compute/converter/utils/io.py +101 -12
  55. deltacat/compute/converter/utils/s3u.py +33 -27
  56. deltacat/compute/janitor.py +205 -0
  57. deltacat/compute/jobs/client.py +25 -12
  58. deltacat/compute/resource_estimation/delta.py +38 -6
  59. deltacat/compute/resource_estimation/model.py +8 -0
  60. deltacat/constants.py +45 -2
  61. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  62. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  63. deltacat/env.py +10 -0
  64. deltacat/examples/basic_logging.py +1 -3
  65. deltacat/examples/compactor/aws/__init__.py +1 -0
  66. deltacat/examples/compactor/bootstrap.py +863 -0
  67. deltacat/examples/compactor/compactor.py +373 -0
  68. deltacat/examples/compactor/explorer.py +473 -0
  69. deltacat/examples/compactor/gcp/__init__.py +1 -0
  70. deltacat/examples/compactor/job_runner.py +439 -0
  71. deltacat/examples/compactor/utils/__init__.py +1 -0
  72. deltacat/examples/compactor/utils/common.py +261 -0
  73. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  74. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  79. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +3 -5
  80. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  81. deltacat/examples/indexer/indexer.py +2 -2
  82. deltacat/examples/indexer/job_runner.py +1 -2
  83. deltacat/exceptions.py +66 -4
  84. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  85. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  86. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +29 -11
  87. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  88. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  89. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  90. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  91. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  92. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  93. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  94. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  95. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  96. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  97. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  98. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  99. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  100. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  101. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  102. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  103. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  104. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  105. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  107. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  108. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  109. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  110. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  111. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  112. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  113. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  114. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  115. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  116. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  117. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  118. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  119. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  120. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  121. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  122. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  123. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  124. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  125. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  126. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  127. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  128. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  129. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  130. deltacat/io/datasource/deltacat_datasource.py +0 -1
  131. deltacat/io/reader/deltacat_read_api.py +1 -1
  132. deltacat/storage/__init__.py +20 -2
  133. deltacat/storage/interface.py +54 -32
  134. deltacat/storage/main/impl.py +1494 -541
  135. deltacat/storage/model/delta.py +27 -3
  136. deltacat/storage/model/locator.py +6 -12
  137. deltacat/storage/model/manifest.py +182 -6
  138. deltacat/storage/model/metafile.py +151 -78
  139. deltacat/storage/model/namespace.py +8 -1
  140. deltacat/storage/model/partition.py +117 -42
  141. deltacat/storage/model/schema.py +2427 -159
  142. deltacat/storage/model/shard.py +6 -2
  143. deltacat/storage/model/sort_key.py +40 -0
  144. deltacat/storage/model/stream.py +9 -2
  145. deltacat/storage/model/table.py +12 -1
  146. deltacat/storage/model/table_version.py +11 -0
  147. deltacat/storage/model/transaction.py +1184 -208
  148. deltacat/storage/model/transform.py +81 -2
  149. deltacat/storage/model/types.py +48 -26
  150. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  151. deltacat/tests/aws/test_s3u.py +2 -31
  152. deltacat/tests/catalog/data/__init__.py +0 -0
  153. deltacat/tests/catalog/main/__init__.py +0 -0
  154. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  155. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  156. deltacat/tests/catalog/model/__init__.py +0 -0
  157. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  158. deltacat/tests/catalog/test_catalogs.py +103 -106
  159. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  160. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  161. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  162. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  163. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  164. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  165. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  166. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  167. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  168. deltacat/tests/compute/conftest.py +8 -44
  169. deltacat/tests/compute/converter/test_convert_session.py +675 -490
  170. deltacat/tests/compute/converter/utils.py +15 -6
  171. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  172. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  173. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  174. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  175. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  176. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  177. deltacat/tests/compute/test_janitor.py +236 -0
  178. deltacat/tests/compute/test_util_common.py +716 -43
  179. deltacat/tests/compute/test_util_constant.py +0 -1
  180. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  181. deltacat/tests/daft/__init__.py +0 -0
  182. deltacat/tests/daft/test_model.py +97 -0
  183. deltacat/tests/experimental/__init__.py +1 -0
  184. deltacat/tests/experimental/catalog/__init__.py +0 -0
  185. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  186. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  187. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  188. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  189. deltacat/tests/experimental/daft/__init__.py +0 -0
  190. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  191. deltacat/tests/experimental/storage/__init__.py +0 -0
  192. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  193. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  194. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  195. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -3
  196. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  197. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  198. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  199. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  200. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  201. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  202. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  203. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  204. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +5 -3
  205. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  206. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  207. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  208. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  209. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  210. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  211. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  212. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  213. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  214. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  215. deltacat/tests/storage/model/test_schema.py +171 -0
  216. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  217. deltacat/tests/storage/model/test_shard.py +3 -1
  218. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  219. deltacat/tests/storage/model/test_transaction.py +393 -48
  220. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  221. deltacat/tests/test_deltacat_api.py +988 -4
  222. deltacat/tests/test_exceptions.py +9 -5
  223. deltacat/tests/test_utils/pyarrow.py +52 -21
  224. deltacat/tests/test_utils/storage.py +23 -34
  225. deltacat/tests/types/__init__.py +0 -0
  226. deltacat/tests/types/test_tables.py +104 -0
  227. deltacat/tests/utils/exceptions.py +22 -0
  228. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  229. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  230. deltacat/tests/utils/test_daft.py +121 -31
  231. deltacat/tests/utils/test_numpy.py +1193 -0
  232. deltacat/tests/utils/test_pandas.py +1106 -0
  233. deltacat/tests/utils/test_polars.py +1040 -0
  234. deltacat/tests/utils/test_pyarrow.py +1370 -89
  235. deltacat/types/media.py +224 -14
  236. deltacat/types/tables.py +2329 -59
  237. deltacat/utils/arguments.py +33 -1
  238. deltacat/utils/daft.py +823 -36
  239. deltacat/utils/export.py +3 -1
  240. deltacat/utils/filesystem.py +100 -0
  241. deltacat/utils/metafile_locator.py +2 -1
  242. deltacat/utils/numpy.py +118 -26
  243. deltacat/utils/pandas.py +577 -48
  244. deltacat/utils/polars.py +658 -27
  245. deltacat/utils/pyarrow.py +1258 -213
  246. deltacat/utils/ray_utils/dataset.py +101 -10
  247. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  248. deltacat/utils/url.py +57 -16
  249. deltacat-2.0.0b12.dist-info/METADATA +1163 -0
  250. deltacat-2.0.0b12.dist-info/RECORD +439 -0
  251. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
  252. deltacat/catalog/iceberg/__init__.py +0 -4
  253. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  254. deltacat/compute/merge_on_read/__init__.py +0 -4
  255. deltacat/compute/merge_on_read/daft.py +0 -40
  256. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  257. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  258. deltacat/daft/daft_scan.py +0 -115
  259. deltacat/daft/model.py +0 -258
  260. deltacat/daft/translator.py +0 -126
  261. deltacat/examples/common/fixtures.py +0 -15
  262. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  263. deltacat/storage/rivulet/__init__.py +0 -11
  264. deltacat/storage/rivulet/feather/__init__.py +0 -5
  265. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  266. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  267. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  268. deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
  269. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  270. deltacat/utils/s3fs.py +0 -21
  271. deltacat-2.0.0b10.dist-info/METADATA +0 -68
  272. deltacat-2.0.0b10.dist-info/RECORD +0 -381
  273. /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
  274. /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
  275. /deltacat/{daft → docs/autogen/schema}/__init__.py +0 -0
  276. /deltacat/{examples/common → docs/autogen/schema/inference}/__init__.py +0 -0
  277. /deltacat/examples/{iceberg → compactor}/__init__.py +0 -0
  278. /deltacat/{storage/iceberg → examples/experimental}/__init__.py +0 -0
  279. /deltacat/{storage/rivulet/arrow → examples/experimental/iceberg}/__init__.py +0 -0
  280. /deltacat/{storage/rivulet/fs → examples/experimental/iceberg/converter}/__init__.py +0 -0
  281. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  282. /deltacat/{storage/rivulet/reader → experimental/catalog}/__init__.py +0 -0
  283. /deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +0 -0
  284. /deltacat/{storage/rivulet/schema → experimental/compatibility}/__init__.py +0 -0
  285. /deltacat/{storage/rivulet/writer → experimental/converter_agent}/__init__.py +0 -0
  286. /deltacat/{tests/storage/rivulet → experimental/converter_agent/beam}/__init__.py +0 -0
  287. /deltacat/{tests/storage/rivulet/fs → experimental/storage}/__init__.py +0 -0
  288. /deltacat/{tests/storage/rivulet/schema → experimental/storage/iceberg}/__init__.py +0 -0
  289. /deltacat/{tests/storage/rivulet/writer → experimental/storage/rivulet/arrow}/__init__.py +0 -0
  290. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/storage/rivulet/fs/__init__.py} +0 -0
  291. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  292. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  293. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  294. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  295. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  296. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  297. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
  298. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,85 @@
1
+ import pytest
2
+
3
+ from deltacat.storage import (
4
+ PartitionKey,
5
+ PartitionScheme,
6
+ IdentityTransform,
7
+ )
8
+
9
+
10
+ def test_partition_scheme_validates_empty_keys():
11
+ # When creating a partition scheme with empty keys list
12
+ with pytest.raises(
13
+ ValueError, match="Partition scheme cannot have empty keys list"
14
+ ):
15
+ PartitionScheme.of(
16
+ keys=[],
17
+ name="test_partition_scheme",
18
+ scheme_id="test_partition_scheme_id",
19
+ )
20
+
21
+
22
+ def test_partition_scheme_validates_duplicate_keys():
23
+ # When creating a partition scheme with duplicate keys
24
+ with pytest.raises(ValueError, match="Duplicate partition key found: col1"):
25
+ PartitionScheme.of(
26
+ keys=[
27
+ PartitionKey.of(
28
+ key=["col1"],
29
+ transform=IdentityTransform.of(),
30
+ ),
31
+ PartitionKey.of(
32
+ key=["col1"], # Duplicate key
33
+ transform=IdentityTransform.of(),
34
+ ),
35
+ ],
36
+ name="test_partition_scheme",
37
+ scheme_id="test_partition_scheme_id",
38
+ )
39
+
40
+
41
+ def test_partition_scheme_validates_duplicate_names():
42
+ # When creating a partition scheme with duplicate partition key names
43
+ with pytest.raises(
44
+ ValueError, match="Duplicate partition key name found: partition_1"
45
+ ):
46
+ PartitionScheme.of(
47
+ keys=[
48
+ PartitionKey.of(
49
+ key=["col1"],
50
+ name="partition_1",
51
+ transform=IdentityTransform.of(),
52
+ ),
53
+ PartitionKey.of(
54
+ key=["col2"], # Different field locator
55
+ name="partition_1", # But duplicate name
56
+ transform=IdentityTransform.of(),
57
+ ),
58
+ ],
59
+ name="test_partition_scheme",
60
+ scheme_id="test_partition_scheme_id",
61
+ )
62
+
63
+
64
+ def test_partition_scheme_allows_valid_keys():
65
+ # When creating a partition scheme with valid keys
66
+ partition_scheme = PartitionScheme.of(
67
+ keys=[
68
+ PartitionKey.of(
69
+ key=["col1"],
70
+ transform=IdentityTransform.of(),
71
+ ),
72
+ PartitionKey.of(
73
+ key=["col2"],
74
+ transform=IdentityTransform.of(),
75
+ ),
76
+ ],
77
+ name="test_partition_scheme",
78
+ scheme_id="test_partition_scheme_id",
79
+ )
80
+
81
+ # Then it should succeed
82
+ assert partition_scheme is not None
83
+ assert len(partition_scheme.keys) == 2
84
+ assert partition_scheme.name == "test_partition_scheme"
85
+ assert partition_scheme.id == "test_partition_scheme_id"
@@ -1,10 +1,13 @@
1
1
  import pytest
2
2
  import pyarrow as pa
3
+ from deltacat.exceptions import SchemaValidationError
3
4
 
4
5
  from deltacat.storage.model.schema import (
5
6
  Schema,
6
7
  Field,
7
8
  BASE_SCHEMA_NAME,
9
+ SchemaConsistencyType,
10
+ SchemaUpdate,
8
11
  )
9
12
 
10
13
 
@@ -306,3 +309,171 @@ def test_empty_schema_fails():
306
309
  Schema.of({})
307
310
  with pytest.raises(ValueError):
308
311
  Schema.of([])
312
+
313
+
314
+ def test_schema_type_promotion_edge_cases():
315
+ """Test edge cases for type promotion with SchemaConsistencyType.NONE."""
316
+ # Test 1: Same type - no promotion
317
+ field_int32 = Field.of(
318
+ pa.field("test", pa.int32()), consistency_type=SchemaConsistencyType.NONE
319
+ )
320
+ data_int32 = pa.array([1, 2, 3], type=pa.int32())
321
+ promoted_data, was_promoted = field_int32.promote_type_if_needed(data_int32)
322
+ assert not was_promoted, "Same type should not trigger promotion"
323
+ assert promoted_data.type == pa.int32(), "Data type should remain int32"
324
+
325
+ # Test 2: int32 to int64 promotion
326
+ data_int64 = pa.array([2147483648], type=pa.int64()) # Value requiring int64
327
+ promoted_data, was_promoted = field_int32.promote_type_if_needed(data_int64)
328
+ assert was_promoted, "int32 field should promote to int64"
329
+ assert promoted_data.type == pa.int64(), "Promoted data should be int64"
330
+
331
+ # Test 3: Nullability preservation
332
+ field_nullable = Field.of(
333
+ pa.field("test", pa.int32(), nullable=True),
334
+ consistency_type=SchemaConsistencyType.NONE,
335
+ )
336
+ data_with_null = pa.array([1, None, 3], type=pa.int32())
337
+ promoted_data, was_promoted = field_nullable.promote_type_if_needed(data_with_null)
338
+ assert not was_promoted, "Same nullable type should not promote"
339
+
340
+ # Test 4: Cross-type promotion (int to float)
341
+ field_int = Field.of(
342
+ pa.field("test", pa.int32()), consistency_type=SchemaConsistencyType.NONE
343
+ )
344
+ data_float = pa.array([1.5, 2.7], type=pa.float64())
345
+ promoted_data, was_promoted = field_int.promote_type_if_needed(data_float)
346
+ assert was_promoted, "int32 should promote to accommodate float64"
347
+ assert pa.types.is_floating(
348
+ promoted_data.type
349
+ ), f"Should promote to float type, got {promoted_data.type}"
350
+
351
+
352
+ def test_schema_update_method(schema_a):
353
+ """Test the Schema.update() convenience method."""
354
+ # Test basic usage
355
+ update = schema_a.update()
356
+ assert isinstance(update, SchemaUpdate)
357
+ assert update.base_schema == schema_a
358
+ assert not update.allow_incompatible_changes
359
+
360
+ # Test with allow_incompatible_changes=True
361
+ update_permissive = schema_a.update(allow_incompatible_changes=True)
362
+ assert isinstance(update_permissive, SchemaUpdate)
363
+ assert update_permissive.base_schema == schema_a
364
+ assert update_permissive.allow_incompatible_changes
365
+
366
+ # Test method chaining with field addition
367
+ new_field = Field.of(pa.field("name", pa.string(), nullable=True), field_id=4)
368
+ updated_schema = schema_a.update().add_field(new_field).apply()
369
+
370
+ assert len(updated_schema.fields) == 2
371
+ assert updated_schema.field("col1") == schema_a.field(
372
+ "col1"
373
+ ) # Original field preserved
374
+ added_field = updated_schema.field("name")
375
+ assert added_field.arrow.name == "name"
376
+ assert added_field.arrow.type == pa.string()
377
+ assert added_field.id == 2 # requested field_id of 4 is ignored and auto-assigned
378
+
379
+
380
+ def test_default_value_type_promotion():
381
+ """Test that default values are correctly cast when field types are promoted."""
382
+
383
+ # Test 1: Unit-level default value casting
384
+ # Create a field with int32 type and default values
385
+ original_field = Field.of(
386
+ pa.field("test_field", pa.int32()),
387
+ past_default=42,
388
+ future_default=100,
389
+ consistency_type=SchemaConsistencyType.NONE,
390
+ )
391
+
392
+ # Test casting to int64
393
+ promoted_past = original_field._cast_default_to_promoted_type(42, pa.int64())
394
+ promoted_future = original_field._cast_default_to_promoted_type(100, pa.int64())
395
+ assert promoted_past == 42
396
+ assert promoted_future == 100
397
+
398
+ # Test casting to float64
399
+ promoted_past_float = original_field._cast_default_to_promoted_type(
400
+ 42, pa.float64()
401
+ )
402
+ promoted_future_float = original_field._cast_default_to_promoted_type(
403
+ 100, pa.float64()
404
+ )
405
+ assert promoted_past_float == 42.0
406
+ assert promoted_future_float == 100.0
407
+
408
+ # Test casting to string
409
+ promoted_past_str = original_field._cast_default_to_promoted_type(42, pa.string())
410
+ promoted_future_str = original_field._cast_default_to_promoted_type(
411
+ 100, pa.string()
412
+ )
413
+ assert promoted_past_str == "42"
414
+ assert promoted_future_str == "100"
415
+
416
+ # Test 2: Test that the default casting logic works correctly
417
+ # Test with None values (should return None)
418
+ none_result = original_field._cast_default_to_promoted_type(None, pa.string())
419
+ assert none_result is None, "None default should remain None"
420
+
421
+ # Test error handling - incompatible cast should raise SchemaValidationError
422
+ with pytest.raises(SchemaValidationError):
423
+ original_field._cast_default_to_promoted_type("not_a_number", pa.int64())
424
+
425
+ # Test with a complex type
426
+ complex_field = Field.of(
427
+ pa.field("complex", pa.list_(pa.int32())),
428
+ consistency_type=SchemaConsistencyType.NONE,
429
+ )
430
+ with pytest.raises(SchemaValidationError):
431
+ complex_field._cast_default_to_promoted_type(42, pa.list_(pa.string()))
432
+
433
+
434
+ def test_default_value_backfill_with_promotion():
435
+ """Test that default values are correctly backfilled when types are promoted."""
436
+
437
+ # Test the interaction between default value casting and binary promotion
438
+ # This represents a common scenario where defaults need to be promoted to binary
439
+ field_with_defaults = Field.of(
440
+ pa.field("test_field", pa.int32()),
441
+ past_default=42,
442
+ future_default=100,
443
+ consistency_type=SchemaConsistencyType.NONE,
444
+ )
445
+
446
+ # Test promotion to string (a common "catch-all" type in type promotion)
447
+ string_past = field_with_defaults._cast_default_to_promoted_type(42, pa.string())
448
+ string_future = field_with_defaults._cast_default_to_promoted_type(100, pa.string())
449
+
450
+ assert string_past == "42", f"Expected '42', got {string_past}"
451
+ assert string_future == "100", f"Expected '100', got {string_future}"
452
+
453
+ # Also test floats to string
454
+ float_field = Field.of(
455
+ pa.field("float_field", pa.float32()),
456
+ past_default=3.14159,
457
+ future_default=2.71828,
458
+ consistency_type=SchemaConsistencyType.NONE,
459
+ )
460
+
461
+ string_past = float_field._cast_default_to_promoted_type(3.14159, pa.string())
462
+ string_future = float_field._cast_default_to_promoted_type(2.71828, pa.string())
463
+
464
+ assert string_past == "3.14159", f"Expected '3.14159', got {string_past}"
465
+ assert string_future == "2.71828", f"Expected '2.71828', got {string_future}"
466
+
467
+ # Test that None defaults are handled correctly
468
+ none_field = Field.of(
469
+ pa.field("none_field", pa.int32()),
470
+ past_default=None,
471
+ future_default=42,
472
+ consistency_type=SchemaConsistencyType.NONE,
473
+ )
474
+
475
+ none_past = none_field._cast_default_to_promoted_type(None, pa.string())
476
+ valid_future = none_field._cast_default_to_promoted_type(42, pa.string())
477
+
478
+ assert none_past is None, f"None should remain None, got {none_past}"
479
+ assert valid_future == "42", f"Expected '42', got {valid_future}"