deltacat 1.1.35__py3-none-any.whl → 2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. deltacat/__init__.py +42 -3
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +168 -0
  4. deltacat/aws/s3u.py +4 -4
  5. deltacat/benchmarking/benchmark_engine.py +82 -0
  6. deltacat/benchmarking/benchmark_report.py +86 -0
  7. deltacat/benchmarking/benchmark_suite.py +11 -0
  8. deltacat/benchmarking/conftest.py +21 -0
  9. deltacat/benchmarking/data/random_row_generator.py +94 -0
  10. deltacat/benchmarking/data/row_generator.py +10 -0
  11. deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
  12. deltacat/catalog/__init__.py +14 -0
  13. deltacat/catalog/delegate.py +199 -106
  14. deltacat/catalog/iceberg/__init__.py +4 -0
  15. deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
  16. deltacat/catalog/iceberg/impl.py +368 -0
  17. deltacat/catalog/iceberg/overrides.py +74 -0
  18. deltacat/catalog/interface.py +273 -76
  19. deltacat/catalog/main/impl.py +720 -0
  20. deltacat/catalog/model/catalog.py +227 -20
  21. deltacat/catalog/model/properties.py +116 -0
  22. deltacat/catalog/model/table_definition.py +32 -1
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +5 -5
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +1 -1
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +1 -1
  32. deltacat/compute/compactor/steps/materialize.py +6 -2
  33. deltacat/compute/compactor/utils/io.py +1 -1
  34. deltacat/compute/compactor/utils/sort_key.py +9 -2
  35. deltacat/compute/compactor_v2/compaction_session.py +2 -3
  36. deltacat/compute/compactor_v2/constants.py +1 -30
  37. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  38. deltacat/compute/compactor_v2/model/merge_input.py +1 -1
  39. deltacat/compute/compactor_v2/private/compaction_utils.py +5 -5
  40. deltacat/compute/compactor_v2/steps/merge.py +11 -80
  41. deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
  42. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  43. deltacat/compute/compactor_v2/utils/io.py +1 -1
  44. deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
  45. deltacat/compute/compactor_v2/utils/task_options.py +23 -43
  46. deltacat/compute/converter/constants.py +4 -0
  47. deltacat/compute/converter/converter_session.py +143 -0
  48. deltacat/compute/converter/model/convert_input.py +69 -0
  49. deltacat/compute/converter/model/convert_input_files.py +61 -0
  50. deltacat/compute/converter/model/converter_session_params.py +99 -0
  51. deltacat/compute/converter/pyiceberg/__init__.py +0 -0
  52. deltacat/compute/converter/pyiceberg/catalog.py +75 -0
  53. deltacat/compute/converter/pyiceberg/overrides.py +135 -0
  54. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
  55. deltacat/compute/converter/steps/__init__.py +0 -0
  56. deltacat/compute/converter/steps/convert.py +211 -0
  57. deltacat/compute/converter/steps/dedupe.py +60 -0
  58. deltacat/compute/converter/utils/__init__.py +0 -0
  59. deltacat/compute/converter/utils/convert_task_options.py +88 -0
  60. deltacat/compute/converter/utils/converter_session_utils.py +109 -0
  61. deltacat/compute/converter/utils/iceberg_columns.py +82 -0
  62. deltacat/compute/converter/utils/io.py +43 -0
  63. deltacat/compute/converter/utils/s3u.py +133 -0
  64. deltacat/compute/resource_estimation/delta.py +1 -19
  65. deltacat/constants.py +47 -1
  66. deltacat/env.py +51 -0
  67. deltacat/examples/__init__.py +0 -0
  68. deltacat/examples/basic_logging.py +101 -0
  69. deltacat/examples/common/__init__.py +0 -0
  70. deltacat/examples/common/fixtures.py +15 -0
  71. deltacat/examples/hello_world.py +27 -0
  72. deltacat/examples/iceberg/__init__.py +0 -0
  73. deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
  74. deltacat/examples/iceberg/iceberg_reader.py +149 -0
  75. deltacat/exceptions.py +51 -9
  76. deltacat/logs.py +4 -1
  77. deltacat/storage/__init__.py +118 -28
  78. deltacat/storage/iceberg/__init__.py +0 -0
  79. deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
  80. deltacat/storage/iceberg/impl.py +737 -0
  81. deltacat/storage/iceberg/model.py +709 -0
  82. deltacat/storage/interface.py +217 -134
  83. deltacat/storage/main/__init__.py +0 -0
  84. deltacat/storage/main/impl.py +2077 -0
  85. deltacat/storage/model/delta.py +118 -71
  86. deltacat/storage/model/interop.py +24 -0
  87. deltacat/storage/model/list_result.py +8 -0
  88. deltacat/storage/model/locator.py +93 -3
  89. deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
  90. deltacat/storage/model/metafile.py +1316 -0
  91. deltacat/storage/model/namespace.py +34 -18
  92. deltacat/storage/model/partition.py +362 -37
  93. deltacat/storage/model/scan/__init__.py +0 -0
  94. deltacat/storage/model/scan/push_down.py +19 -0
  95. deltacat/storage/model/scan/scan_plan.py +10 -0
  96. deltacat/storage/model/scan/scan_task.py +34 -0
  97. deltacat/storage/model/schema.py +892 -0
  98. deltacat/storage/model/shard.py +47 -0
  99. deltacat/storage/model/sort_key.py +170 -13
  100. deltacat/storage/model/stream.py +208 -80
  101. deltacat/storage/model/table.py +123 -29
  102. deltacat/storage/model/table_version.py +322 -46
  103. deltacat/storage/model/transaction.py +757 -0
  104. deltacat/storage/model/transform.py +198 -61
  105. deltacat/storage/model/types.py +111 -13
  106. deltacat/storage/rivulet/__init__.py +11 -0
  107. deltacat/storage/rivulet/arrow/__init__.py +0 -0
  108. deltacat/storage/rivulet/arrow/serializer.py +75 -0
  109. deltacat/storage/rivulet/dataset.py +744 -0
  110. deltacat/storage/rivulet/dataset_executor.py +87 -0
  111. deltacat/storage/rivulet/feather/__init__.py +5 -0
  112. deltacat/storage/rivulet/feather/file_reader.py +136 -0
  113. deltacat/storage/rivulet/feather/serializer.py +35 -0
  114. deltacat/storage/rivulet/fs/__init__.py +0 -0
  115. deltacat/storage/rivulet/fs/file_provider.py +105 -0
  116. deltacat/storage/rivulet/fs/file_store.py +130 -0
  117. deltacat/storage/rivulet/fs/input_file.py +76 -0
  118. deltacat/storage/rivulet/fs/output_file.py +86 -0
  119. deltacat/storage/rivulet/logical_plan.py +105 -0
  120. deltacat/storage/rivulet/metastore/__init__.py +0 -0
  121. deltacat/storage/rivulet/metastore/delta.py +190 -0
  122. deltacat/storage/rivulet/metastore/json_sst.py +105 -0
  123. deltacat/storage/rivulet/metastore/sst.py +82 -0
  124. deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  125. deltacat/storage/rivulet/mvp/Table.py +101 -0
  126. deltacat/storage/rivulet/mvp/__init__.py +5 -0
  127. deltacat/storage/rivulet/parquet/__init__.py +5 -0
  128. deltacat/storage/rivulet/parquet/data_reader.py +0 -0
  129. deltacat/storage/rivulet/parquet/file_reader.py +127 -0
  130. deltacat/storage/rivulet/parquet/serializer.py +37 -0
  131. deltacat/storage/rivulet/reader/__init__.py +0 -0
  132. deltacat/storage/rivulet/reader/block_scanner.py +378 -0
  133. deltacat/storage/rivulet/reader/data_reader.py +136 -0
  134. deltacat/storage/rivulet/reader/data_scan.py +63 -0
  135. deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
  136. deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
  137. deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
  138. deltacat/storage/rivulet/reader/query_expression.py +99 -0
  139. deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
  140. deltacat/storage/rivulet/schema/__init__.py +0 -0
  141. deltacat/storage/rivulet/schema/datatype.py +128 -0
  142. deltacat/storage/rivulet/schema/schema.py +251 -0
  143. deltacat/storage/rivulet/serializer.py +40 -0
  144. deltacat/storage/rivulet/serializer_factory.py +42 -0
  145. deltacat/storage/rivulet/writer/__init__.py +0 -0
  146. deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
  147. deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
  148. deltacat/tests/_io/__init__.py +1 -0
  149. deltacat/tests/catalog/test_catalogs.py +324 -0
  150. deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
  151. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  152. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  153. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  154. deltacat/tests/compute/compact_partition_test_cases.py +19 -53
  155. deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
  156. deltacat/tests/compute/compactor/utils/test_io.py +6 -8
  157. deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
  158. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
  159. deltacat/tests/compute/conftest.py +75 -0
  160. deltacat/tests/compute/converter/__init__.py +0 -0
  161. deltacat/tests/compute/converter/conftest.py +80 -0
  162. deltacat/tests/compute/converter/test_convert_session.py +478 -0
  163. deltacat/tests/compute/converter/utils.py +123 -0
  164. deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
  165. deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
  166. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
  167. deltacat/tests/compute/test_compact_partition_params.py +3 -3
  168. deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
  169. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
  170. deltacat/tests/compute/test_util_common.py +19 -12
  171. deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
  172. deltacat/tests/local_deltacat_storage/__init__.py +76 -103
  173. deltacat/tests/storage/__init__.py +0 -0
  174. deltacat/tests/storage/conftest.py +25 -0
  175. deltacat/tests/storage/main/__init__.py +0 -0
  176. deltacat/tests/storage/main/test_main_storage.py +1399 -0
  177. deltacat/tests/storage/model/__init__.py +0 -0
  178. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  179. deltacat/tests/storage/model/test_metafile_io.py +2535 -0
  180. deltacat/tests/storage/model/test_schema.py +308 -0
  181. deltacat/tests/storage/model/test_shard.py +22 -0
  182. deltacat/tests/storage/model/test_table_version.py +110 -0
  183. deltacat/tests/storage/model/test_transaction.py +308 -0
  184. deltacat/tests/storage/rivulet/__init__.py +0 -0
  185. deltacat/tests/storage/rivulet/conftest.py +149 -0
  186. deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
  187. deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
  188. deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
  189. deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
  190. deltacat/tests/storage/rivulet/test_dataset.py +406 -0
  191. deltacat/tests/storage/rivulet/test_manifest.py +67 -0
  192. deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
  193. deltacat/tests/storage/rivulet/test_utils.py +122 -0
  194. deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
  195. deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
  196. deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
  197. deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  198. deltacat/tests/test_deltacat_api.py +39 -0
  199. deltacat/tests/test_utils/filesystem.py +14 -0
  200. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  201. deltacat/tests/test_utils/pyarrow.py +8 -15
  202. deltacat/tests/test_utils/storage.py +266 -3
  203. deltacat/tests/utils/test_daft.py +3 -3
  204. deltacat/tests/utils/test_pyarrow.py +0 -432
  205. deltacat/types/partial_download.py +1 -1
  206. deltacat/types/tables.py +1 -1
  207. deltacat/utils/export.py +59 -0
  208. deltacat/utils/filesystem.py +320 -0
  209. deltacat/utils/metafile_locator.py +73 -0
  210. deltacat/utils/pyarrow.py +36 -183
  211. deltacat-2.0.dist-info/METADATA +65 -0
  212. deltacat-2.0.dist-info/RECORD +347 -0
  213. deltacat/aws/redshift/__init__.py +0 -19
  214. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  215. deltacat/io/dataset.py +0 -73
  216. deltacat/io/read_api.py +0 -143
  217. deltacat/storage/model/delete_parameters.py +0 -40
  218. deltacat/storage/model/partition_spec.py +0 -71
  219. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
  220. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
  221. deltacat-1.1.35.dist-info/METADATA +0 -64
  222. deltacat-1.1.35.dist-info/RECORD +0 -219
  223. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  224. /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
  225. /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
  226. /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
  227. /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
  228. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  229. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  230. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  231. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  232. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  233. {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
  234. {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
  235. {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -50,11 +50,6 @@ from deltacat.utils.placement import (
50
50
  )
51
51
  from deltacat import logs
52
52
 
53
- DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
54
- "db_file_path",
55
- "deltacat/tests/local_deltacat_storage/db_test.sqlite",
56
- )
57
-
58
53
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
59
54
 
60
55
 
@@ -80,14 +75,6 @@ def mock_aws_credential():
80
75
  yield
81
76
 
82
77
 
83
- @pytest.fixture(autouse=True, scope="module")
84
- def cleanup_the_database_file_after_all_compaction_session_package_tests_complete():
85
- # make sure the database file is deleted after all the compactor package tests are completed
86
- yield
87
- if os.path.exists(DATABASE_FILE_PATH_VALUE):
88
- os.remove(DATABASE_FILE_PATH_VALUE)
89
-
90
-
91
78
  @pytest.fixture(scope="module")
92
79
  def s3_resource():
93
80
  with mock_s3():
@@ -108,32 +95,6 @@ FUNCTION scoped fixtures
108
95
  """
109
96
 
110
97
 
111
- @pytest.fixture(scope="function")
112
- def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
113
- # see deltacat/tests/local_deltacat_storage/README.md for documentation
114
- kwargs_for_local_deltacat_storage: Dict[str, Any] = {
115
- DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
116
- }
117
- yield kwargs_for_local_deltacat_storage
118
- if os.path.exists(DATABASE_FILE_PATH_VALUE):
119
- os.remove(DATABASE_FILE_PATH_VALUE)
120
-
121
-
122
- @pytest.fixture(autouse=True, scope="function")
123
- def enable_bucketing_spec_validation(monkeypatch):
124
- """
125
- Enable the bucketing spec validation for all tests.
126
- This will help catch hash bucket drift in testing.
127
- """
128
- import deltacat.compute.compactor_v2.steps.merge
129
-
130
- monkeypatch.setattr(
131
- deltacat.compute.compactor_v2.steps.merge,
132
- "BUCKETING_SPEC_COMPLIANCE_PROFILE",
133
- "ASSERT",
134
- )
135
-
136
-
137
98
  @pytest.mark.parametrize(
138
99
  [
139
100
  "test_name",
@@ -209,7 +170,7 @@ def enable_bucketing_spec_validation(monkeypatch):
209
170
  )
210
171
  def test_compact_partition_incremental(
211
172
  s3_resource: ServiceResource,
212
- offer_local_deltacat_storage_kwargs: Dict[str, Any],
173
+ local_deltacat_storage_kwargs: Dict[str, Any],
213
174
  test_name: str,
214
175
  primary_keys: Set[str],
215
176
  sort_keys: Dict[str, str],
@@ -235,7 +196,7 @@ def test_compact_partition_incremental(
235
196
  ):
236
197
  import deltacat.tests.local_deltacat_storage as ds
237
198
 
238
- ds_mock_kwargs: Dict[str, Any] = offer_local_deltacat_storage_kwargs
199
+ ds_mock_kwargs: Dict[str, Any] = local_deltacat_storage_kwargs
239
200
 
240
201
  # setup
241
202
  partition_keys = partition_keys_param
@@ -247,7 +208,6 @@ def test_compact_partition_incremental(
247
208
  source_table_name,
248
209
  source_table_version,
249
210
  ) = create_src_w_deltas_destination_plus_destination(
250
- primary_keys,
251
211
  sort_keys,
252
212
  partition_keys,
253
213
  input_deltas,
@@ -48,12 +48,6 @@ from deltacat.utils.placement import (
48
48
  PlacementGroupManager,
49
49
  )
50
50
 
51
- DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
52
- "db_file_path",
53
- "deltacat/tests/local_deltacat_storage/db_test.sqlite",
54
- )
55
-
56
-
57
51
  """
58
52
  MODULE scoped fixtures
59
53
  """
@@ -76,13 +70,6 @@ def mock_aws_credential():
76
70
  yield
77
71
 
78
72
 
79
- @pytest.fixture(autouse=True, scope="module")
80
- def cleanup_the_database_file_after_all_compaction_session_package_tests_complete():
81
- # make sure the database file is deleted after all the compactor package tests are completed
82
- if os.path.exists(DATABASE_FILE_PATH_VALUE):
83
- os.remove(DATABASE_FILE_PATH_VALUE)
84
-
85
-
86
73
  @pytest.fixture(scope="module")
87
74
  def s3_resource(mock_aws_credential):
88
75
  with mock_s3():
@@ -98,37 +85,6 @@ def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
98
85
  yield
99
86
 
100
87
 
101
- """
102
- FUNCTION scoped fixtures
103
- """
104
-
105
-
106
- @pytest.fixture(scope="function")
107
- def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
108
- # see deltacat/tests/local_deltacat_storage/README.md for documentation
109
- kwargs_for_local_deltacat_storage: Dict[str, Any] = {
110
- DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
111
- }
112
- yield kwargs_for_local_deltacat_storage
113
- if os.path.exists(DATABASE_FILE_PATH_VALUE):
114
- os.remove(DATABASE_FILE_PATH_VALUE)
115
-
116
-
117
- @pytest.fixture(autouse=True, scope="function")
118
- def enable_bucketing_spec_validation(monkeypatch):
119
- """
120
- Enable the bucketing spec validation for all tests.
121
- This will help catch hash bucket drift in testing.
122
- """
123
- import deltacat.compute.compactor_v2.steps.merge
124
-
125
- monkeypatch.setattr(
126
- deltacat.compute.compactor_v2.steps.merge,
127
- "BUCKETING_SPEC_COMPLIANCE_PROFILE",
128
- "ASSERT",
129
- )
130
-
131
-
132
88
  @pytest.mark.parametrize(
133
89
  [
134
90
  "test_name",
@@ -240,7 +196,6 @@ def test_compact_partition_rebase_multiple_rounds_same_source_and_destination(
240
196
  rebased_table_stream,
241
197
  _,
242
198
  ) = multiple_rounds_create_src_w_deltas_destination_rebase_w_deltas_strategy(
243
- primary_keys,
244
199
  sort_keys,
245
200
  partition_keys_param,
246
201
  input_deltas_param,
@@ -347,7 +302,11 @@ def test_compact_partition_rebase_multiple_rounds_same_source_and_destination(
347
302
  actual_rebase_compacted_table = pa.concat_tables(tables)
348
303
  # if no primary key is specified then sort by sort_key for consistent assertion
349
304
  sorting_cols: List[Any] = (
350
- [(val, "ascending") for val in primary_keys] if primary_keys else sort_keys
305
+ [(val, "ascending") for val in primary_keys]
306
+ if primary_keys
307
+ else [pa_key for key in sort_keys for pa_key in key.arrow]
308
+ if sort_keys
309
+ else []
351
310
  )
352
311
  rebase_expected_compact_partition_result = (
353
312
  rebase_expected_compact_partition_result.combine_chunks().sort_by(
@@ -23,7 +23,7 @@ class TestCompactPartitionParams(unittest.TestCase):
23
23
  "tableVersion": "1",
24
24
  },
25
25
  "streamId": "foobar",
26
- "storageType": "fooType",
26
+ "format": "fooType",
27
27
  },
28
28
  "partitionValues": [],
29
29
  "partitionId": None,
@@ -47,7 +47,7 @@ class TestCompactPartitionParams(unittest.TestCase):
47
47
  "table_version": "1",
48
48
  },
49
49
  "streamId": "foobar",
50
- "storageType": "fooType",
50
+ "format": "fooType",
51
51
  },
52
52
  "partitionValues": [],
53
53
  "partitionId": "79612ea39ac5493eae925abe60767d42",
@@ -67,7 +67,7 @@ class TestCompactPartitionParams(unittest.TestCase):
67
67
  "tableVersion": "2",
68
68
  },
69
69
  "streamId": "foobar",
70
- "storageType": "fooType",
70
+ "format": "fooType",
71
71
  },
72
72
  "partitionValues": [],
73
73
  "partitionId": "79612ea39ac5493eae925abe60767d42",
@@ -48,12 +48,6 @@ from deltacat.utils.placement import (
48
48
  PlacementGroupManager,
49
49
  )
50
50
 
51
- DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
52
- "db_file_path",
53
- "deltacat/tests/local_deltacat_storage/db_test.sqlite",
54
- )
55
-
56
-
57
51
  """
58
52
  MODULE scoped fixtures
59
53
  """
@@ -76,13 +70,6 @@ def mock_aws_credential():
76
70
  yield
77
71
 
78
72
 
79
- @pytest.fixture(autouse=True, scope="module")
80
- def cleanup_the_database_file_after_all_compaction_session_package_tests_complete():
81
- # make sure the database file is deleted after all the compactor package tests are completed
82
- if os.path.exists(DATABASE_FILE_PATH_VALUE):
83
- os.remove(DATABASE_FILE_PATH_VALUE)
84
-
85
-
86
73
  @pytest.fixture(scope="module")
87
74
  def s3_resource(mock_aws_credential):
88
75
  with mock_s3():
@@ -98,37 +85,6 @@ def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
98
85
  yield
99
86
 
100
87
 
101
- """
102
- FUNCTION scoped fixtures
103
- """
104
-
105
-
106
- @pytest.fixture(scope="function")
107
- def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
108
- # see deltacat/tests/local_deltacat_storage/README.md for documentation
109
- kwargs_for_local_deltacat_storage: Dict[str, Any] = {
110
- DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
111
- }
112
- yield kwargs_for_local_deltacat_storage
113
- if os.path.exists(DATABASE_FILE_PATH_VALUE):
114
- os.remove(DATABASE_FILE_PATH_VALUE)
115
-
116
-
117
- @pytest.fixture(autouse=True, scope="function")
118
- def enable_bucketing_spec_validation(monkeypatch):
119
- """
120
- Enable the bucketing spec validation for all tests.
121
- This will help catch hash bucket drift in testing.
122
- """
123
- import deltacat.compute.compactor_v2.steps.merge
124
-
125
- monkeypatch.setattr(
126
- deltacat.compute.compactor_v2.steps.merge,
127
- "BUCKETING_SPEC_COMPLIANCE_PROFILE",
128
- "ASSERT",
129
- )
130
-
131
-
132
88
  @pytest.mark.parametrize(
133
89
  [
134
90
  "test_name",
@@ -239,7 +195,6 @@ def test_compact_partition_rebase_same_source_and_destination(
239
195
  _,
240
196
  rebased_table_stream,
241
197
  ) = create_src_w_deltas_destination_rebase_w_deltas_strategy(
242
- primary_keys,
243
198
  sort_keys,
244
199
  partition_keys,
245
200
  input_deltas_param,
@@ -351,7 +306,7 @@ def test_compact_partition_rebase_same_source_and_destination(
351
306
  if primary_keys:
352
307
  sorting_cols.extend([(val, "ascending") for val in primary_keys])
353
308
  if sort_keys:
354
- sorting_cols.extend(sort_keys)
309
+ sorting_cols.extend([pa_key for key in sort_keys for pa_key in key.arrow])
355
310
 
356
311
  rebase_expected_compact_partition_result = (
357
312
  rebase_expected_compact_partition_result.combine_chunks().sort_by(
@@ -52,12 +52,6 @@ from deltacat.compute.compactor.model.compaction_session_audit_info import (
52
52
  CompactionSessionAuditInfo,
53
53
  )
54
54
 
55
- DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
56
- "db_file_path",
57
- "deltacat/tests/local_deltacat_storage/db_test.sqlite",
58
- )
59
-
60
-
61
55
  """
62
56
  MODULE scoped fixtures
63
57
  """
@@ -80,13 +74,6 @@ def mock_aws_credential():
80
74
  yield
81
75
 
82
76
 
83
- @pytest.fixture(autouse=True, scope="module")
84
- def cleanup_the_database_file_after_all_compaction_session_package_tests_complete():
85
- # make sure the database file is deleted after all the compactor package tests are completed
86
- if os.path.exists(DATABASE_FILE_PATH_VALUE):
87
- os.remove(DATABASE_FILE_PATH_VALUE)
88
-
89
-
90
77
  @pytest.fixture(scope="module")
91
78
  def s3_resource(mock_aws_credential):
92
79
  with mock_s3():
@@ -102,37 +89,6 @@ def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
102
89
  yield
103
90
 
104
91
 
105
- """
106
- FUNCTION scoped fixtures
107
- """
108
-
109
-
110
- @pytest.fixture(scope="function")
111
- def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
112
- # see deltacat/tests/local_deltacat_storage/README.md for documentation
113
- kwargs_for_local_deltacat_storage: Dict[str, Any] = {
114
- DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
115
- }
116
- yield kwargs_for_local_deltacat_storage
117
- if os.path.exists(DATABASE_FILE_PATH_VALUE):
118
- os.remove(DATABASE_FILE_PATH_VALUE)
119
-
120
-
121
- @pytest.fixture(autouse=True, scope="function")
122
- def enable_bucketing_spec_validation(monkeypatch):
123
- """
124
- Enable the bucketing spec validation for all tests.
125
- This will help catch hash bucket drift in testing.
126
- """
127
- import deltacat.compute.compactor_v2.steps.merge
128
-
129
- monkeypatch.setattr(
130
- deltacat.compute.compactor_v2.steps.merge,
131
- "BUCKETING_SPEC_COMPLIANCE_PROFILE",
132
- "ASSERT",
133
- )
134
-
135
-
136
92
  @pytest.mark.parametrize(
137
93
  [
138
94
  "test_name",
@@ -244,7 +200,6 @@ def test_compact_partition_rebase_then_incremental(
244
200
  destination_table_stream,
245
201
  rebased_table_stream,
246
202
  ) = create_src_w_deltas_destination_rebase_w_deltas_strategy(
247
- primary_keys,
248
203
  sort_keys,
249
204
  partition_keys,
250
205
  input_deltas_param,
@@ -308,7 +263,11 @@ def test_compact_partition_rebase_then_incremental(
308
263
  actual_rebase_compacted_table = pa.concat_tables(tables)
309
264
  # if no primary key is specified then sort by sort_key for consistent assertion
310
265
  sorting_cols: List[Any] = (
311
- [(val, "ascending") for val in primary_keys] if primary_keys else sort_keys
266
+ [(val, "ascending") for val in primary_keys]
267
+ if primary_keys
268
+ else [pa_key for key in sort_keys for pa_key in key.arrow]
269
+ if sort_keys
270
+ else []
312
271
  )
313
272
  rebase_expected_compact_partition_result = (
314
273
  rebase_expected_compact_partition_result.combine_chunks().sort_by(sorting_cols)
@@ -1,7 +1,7 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
3
  from enum import Enum
4
- from typing import Any, Dict, List, Optional, Set
4
+ from typing import Any, Dict, List, Optional
5
5
  import datetime as dt
6
6
  from boto3.resources.base import ServiceResource
7
7
  from datetime import timezone
@@ -27,11 +27,18 @@ from deltacat.compute.compactor.model.compaction_session_audit_info import (
27
27
  CompactionSessionAuditInfo,
28
28
  )
29
29
 
30
- from deltacat.storage.model.partition import PartitionLocator
30
+ from deltacat.storage.model.partition import (
31
+ PartitionLocator,
32
+ PartitionScheme,
33
+ PartitionKey as PartitionSchemeKey,
34
+ )
31
35
  from deltacat.storage.model.stream import StreamLocator
32
36
  from deltacat.storage.model.table_version import TableVersionLocator
33
37
  from deltacat.storage.model.table import TableLocator
34
38
  from deltacat.storage.model.namespace import NamespaceLocator
39
+ from deltacat.storage.model.sort_key import (
40
+ SortScheme,
41
+ )
35
42
  from deltacat.compute.compactor.model.compactor_version import CompactorVersion
36
43
 
37
44
 
@@ -77,7 +84,6 @@ def _create_table(
77
84
  namespace: str,
78
85
  table_name: str,
79
86
  table_version: str,
80
- primary_keys: Set[str],
81
87
  sort_keys: Optional[List[Any]],
82
88
  partition_keys: Optional[List[PartitionKey]],
83
89
  ds_mock_kwargs: Optional[Dict[str, Any]],
@@ -86,13 +92,20 @@ def _create_table(
86
92
  from deltacat.types.media import ContentType
87
93
 
88
94
  ds.create_namespace(namespace, {}, **ds_mock_kwargs)
95
+ partition_scheme = (
96
+ PartitionScheme.of(
97
+ [PartitionSchemeKey.of(key.key_name) for key in partition_keys]
98
+ )
99
+ if partition_keys
100
+ else None
101
+ )
102
+ sort_scheme = SortScheme.of(sort_keys) if sort_keys else None
89
103
  ds.create_table_version(
90
104
  namespace,
91
105
  table_name,
92
106
  table_version,
93
- primary_key_column_names=list(primary_keys),
94
- sort_keys=sort_keys,
95
- partition_keys=partition_keys,
107
+ sort_keys=sort_scheme,
108
+ partition_scheme=partition_scheme,
96
109
  supported_content_types=[ContentType.PARQUET],
97
110
  **ds_mock_kwargs,
98
111
  )
@@ -100,7 +113,6 @@ def _create_table(
100
113
 
101
114
 
102
115
  def create_src_table(
103
- primary_keys: Set[str],
104
116
  sort_keys: Optional[List[Any]],
105
117
  partition_keys: Optional[List[PartitionKey]],
106
118
  ds_mock_kwargs: Optional[Dict[str, Any]],
@@ -112,7 +124,6 @@ def create_src_table(
112
124
  source_namespace,
113
125
  source_table_name,
114
126
  source_table_version,
115
- primary_keys,
116
127
  sort_keys,
117
128
  partition_keys,
118
129
  ds_mock_kwargs,
@@ -120,7 +131,6 @@ def create_src_table(
120
131
 
121
132
 
122
133
  def create_destination_table(
123
- primary_keys: Set[str],
124
134
  sort_keys: Optional[List[Any]],
125
135
  partition_keys: Optional[List[PartitionKey]],
126
136
  ds_mock_kwargs: Optional[Dict[str, Any]],
@@ -132,7 +142,6 @@ def create_destination_table(
132
142
  destination_namespace,
133
143
  destination_table_name,
134
144
  destination_table_version,
135
- primary_keys,
136
145
  sort_keys,
137
146
  partition_keys,
138
147
  ds_mock_kwargs,
@@ -140,7 +149,6 @@ def create_destination_table(
140
149
 
141
150
 
142
151
  def create_rebase_table(
143
- primary_keys: Set[str],
144
152
  sort_keys: Optional[List[Any]],
145
153
  partition_keys: Optional[List[PartitionKey]],
146
154
  ds_mock_kwargs: Optional[Dict[str, Any]],
@@ -152,7 +160,6 @@ def create_rebase_table(
152
160
  rebasing_namespace,
153
161
  rebasing_table_name,
154
162
  rebasing_table_version,
155
- primary_keys,
156
163
  sort_keys,
157
164
  partition_keys,
158
165
  ds_mock_kwargs,
@@ -1,6 +1,6 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
- from typing import Any, Dict, List, Optional, Set, Tuple
3
+ from typing import Any, Dict, List, Optional, Tuple
4
4
  import pyarrow as pa
5
5
 
6
6
  from deltacat.tests.compute.test_util_common import (
@@ -38,7 +38,7 @@ def _add_deltas_to_partition(
38
38
  delta_data,
39
39
  partition,
40
40
  delta_type,
41
- delete_parameters=delete_parameters,
41
+ entry_params=delete_parameters,
42
42
  **ds_mock_kwargs,
43
43
  )
44
44
  incremental_delta = ds.commit_delta(
@@ -87,7 +87,7 @@ def create_incremental_deltas_on_source_table(
87
87
  incremental_data,
88
88
  src_partition,
89
89
  incremental_delta_type,
90
- delete_parameters=incremental_delete_parameters,
90
+ entry_params=incremental_delete_parameters,
91
91
  **ds_mock_kwargs,
92
92
  ),
93
93
  **ds_mock_kwargs,
@@ -113,7 +113,6 @@ def create_incremental_deltas_on_source_table(
113
113
 
114
114
 
115
115
  def create_src_w_deltas_destination_plus_destination(
116
- primary_keys: Set[str],
117
116
  sort_keys: Optional[List[Any]],
118
117
  partition_keys: Optional[List[PartitionKey]],
119
118
  input_deltas: pa.Table,
@@ -125,7 +124,7 @@ def create_src_w_deltas_destination_plus_destination(
125
124
  import deltacat.tests.local_deltacat_storage as ds
126
125
 
127
126
  source_namespace, source_table_name, source_table_version = create_src_table(
128
- primary_keys, sort_keys, partition_keys, ds_mock_kwargs
127
+ sort_keys, partition_keys, ds_mock_kwargs
129
128
  )
130
129
 
131
130
  source_table_stream: Stream = ds.get_stream(
@@ -158,9 +157,7 @@ def create_src_w_deltas_destination_plus_destination(
158
157
  destination_table_namespace,
159
158
  destination_table_name,
160
159
  destination_table_version,
161
- ) = create_destination_table(
162
- primary_keys, sort_keys, partition_keys, ds_mock_kwargs
163
- )
160
+ ) = create_destination_table(sort_keys, partition_keys, ds_mock_kwargs)
164
161
  else:
165
162
  # not creating a table as in-place
166
163
  destination_table_namespace = source_namespace
@@ -184,7 +181,6 @@ def create_src_w_deltas_destination_plus_destination(
184
181
 
185
182
 
186
183
  def create_src_w_deltas_destination_rebase_w_deltas_strategy(
187
- primary_keys: Set[str],
188
184
  sort_keys: Optional[List[Any]],
189
185
  partition_keys: Optional[List[PartitionKey]],
190
186
  input_deltas: pa.Table,
@@ -198,7 +194,7 @@ def create_src_w_deltas_destination_rebase_w_deltas_strategy(
198
194
 
199
195
  last_stream_position = current_time_ms()
200
196
  source_namespace, source_table_name, source_table_version = create_src_table(
201
- primary_keys, sort_keys, partition_keys, ds_mock_kwargs
197
+ sort_keys, partition_keys, ds_mock_kwargs
202
198
  )
203
199
 
204
200
  source_table_stream: Stream = ds.get_stream(
@@ -230,15 +226,13 @@ def create_src_w_deltas_destination_rebase_w_deltas_strategy(
230
226
  destination_table_namespace,
231
227
  destination_table_name,
232
228
  destination_table_version,
233
- ) = create_destination_table(
234
- primary_keys, sort_keys, partition_keys, ds_mock_kwargs
235
- )
229
+ ) = create_destination_table(sort_keys, partition_keys, ds_mock_kwargs)
236
230
  # create the rebase table
237
231
  (
238
232
  rebase_table_namespace,
239
233
  rebase_table_name,
240
234
  rebase_table_version,
241
- ) = create_rebase_table(primary_keys, sort_keys, partition_keys, ds_mock_kwargs)
235
+ ) = create_rebase_table(sort_keys, partition_keys, ds_mock_kwargs)
242
236
  rebasing_table_stream: Stream = ds.get_stream(
243
237
  namespace=rebase_table_namespace,
244
238
  table_name=rebase_table_name,
@@ -280,7 +274,6 @@ def create_src_w_deltas_destination_rebase_w_deltas_strategy(
280
274
 
281
275
 
282
276
  def multiple_rounds_create_src_w_deltas_destination_rebase_w_deltas_strategy(
283
- primary_keys: Set[str],
284
277
  sort_keys: Optional[List[Any]],
285
278
  partition_keys: Optional[List[PartitionKey]],
286
279
  input_deltas: List[pa.Table],
@@ -291,7 +284,7 @@ def multiple_rounds_create_src_w_deltas_destination_rebase_w_deltas_strategy(
291
284
  from deltacat.storage import Partition, Stream
292
285
 
293
286
  source_namespace, source_table_name, source_table_version = create_src_table(
294
- primary_keys, sort_keys, partition_keys, ds_mock_kwargs
287
+ sort_keys, partition_keys, ds_mock_kwargs
295
288
  )
296
289
 
297
290
  source_table_stream: Stream = ds.get_stream(
@@ -316,7 +309,7 @@ def multiple_rounds_create_src_w_deltas_destination_rebase_w_deltas_strategy(
316
309
  input_delta,
317
310
  staged_partition,
318
311
  input_delta_type,
319
- delete_parameters=input_delta_parameters,
312
+ entry_params=input_delta_parameters,
320
313
  **ds_mock_kwargs,
321
314
  )
322
315
  ds.commit_delta(
@@ -336,15 +329,13 @@ def multiple_rounds_create_src_w_deltas_destination_rebase_w_deltas_strategy(
336
329
  destination_table_namespace,
337
330
  destination_table_name,
338
331
  destination_table_version,
339
- ) = create_destination_table(
340
- primary_keys, sort_keys, partition_keys, ds_mock_kwargs
341
- )
332
+ ) = create_destination_table(sort_keys, partition_keys, ds_mock_kwargs)
342
333
  # create the rebase table
343
334
  (
344
335
  rebase_table_namespace,
345
336
  rebase_table_name,
346
337
  rebase_table_version,
347
- ) = create_rebase_table(primary_keys, sort_keys, partition_keys, ds_mock_kwargs)
338
+ ) = create_rebase_table(sort_keys, partition_keys, ds_mock_kwargs)
348
339
  rebasing_table_stream: Stream = ds.get_stream(
349
340
  namespace=rebase_table_namespace,
350
341
  table_name=rebase_table_name,
@@ -366,7 +357,7 @@ def multiple_rounds_create_src_w_deltas_destination_rebase_w_deltas_strategy(
366
357
  input_delta,
367
358
  staged_partition,
368
359
  input_delta_type,
369
- delete_parameters=input_delta_parameters,
360
+ entry_params=input_delta_parameters,
370
361
  **ds_mock_kwargs,
371
362
  )
372
363
  ds.commit_delta(