deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,23 @@
1
1
  from __future__ import annotations
2
- from typing import List
2
+
3
+ import base64
3
4
  from enum import Enum
5
+ from typing import Dict, Any, Optional
6
+ import pyarrow as pa
7
+
8
+ from deltacat.constants import METAFILE_FORMAT, METAFILE_FORMAT_JSON
4
9
 
5
10
 
6
11
  class TransformName(str, Enum):
7
12
  IDENTITY = "identity"
8
13
  BUCKET = "bucket"
14
+ YEAR = "year"
15
+ MONTH = "month"
16
+ DAY = "day"
17
+ HOUR = "hour"
18
+ TRUNCATE = "truncate"
19
+ VOID = "void"
20
+ UNKNOWN = "unknown"
9
21
 
10
22
 
11
23
  class TransformParameters(dict):
@@ -17,63 +29,42 @@ class TransformParameters(dict):
17
29
  pass
18
30
 
19
31
 
20
- class IdentityTransformParameters(TransformParameters):
32
+ class BucketingStrategy(str, Enum):
21
33
  """
22
- This class is used to pass parameters to the identity transform
34
+ A bucketing strategy for the transform
23
35
  """
24
36
 
25
- @staticmethod
26
- def of(column_name: str) -> IdentityTransformParameters:
27
- identify_transform_parameters = IdentityTransformParameters()
28
- identify_transform_parameters["columnName"] = column_name
29
- return identify_transform_parameters
30
-
31
- @property
32
- def column_name(self) -> str:
33
- """
34
- The name of the column to use for identity transform
35
- """
36
- return self["columnName"]
37
+ # Default DeltaCAT SHA-1 based hash bucketing strategy.
38
+ DEFAULT = "default"
37
39
 
38
- @column_name.setter
39
- def column_name(self, value: str) -> None:
40
- self["columnName"] = value
40
+ # Iceberg-compliant murmur3 based hash bucketing strategy.
41
+ ICEBERG = "iceberg"
41
42
 
42
43
 
43
- class BucketingStrategy(str, Enum):
44
+ class TruncateStrategy(str, Enum):
44
45
  """
45
- A bucketing strategy for the transform
46
+ A truncation strategy for the transform
46
47
  """
47
48
 
48
- # Uses default deltacat bucketing strategy.
49
- # This strategy supports hashing on composite keys
50
- # and uses SHA1 hashing for determining the bucket.
51
- # If no columns passed, it will use a random UUID
52
- # for determining the bucket.
49
+ # Default DeltaCAT truncate strategy.
53
50
  DEFAULT = "default"
54
51
 
55
- # Uses iceberg compliant bucketing strategy.
56
- # As indicated in the iceberg spec, it does not support
57
- # composite keys and uses murmur3 hash for determining
58
- # the bucket.
59
- # See https://iceberg.apache.org/spec/#partitioning
52
+ # Iceberg-compliant truncate strategy.
60
53
  ICEBERG = "iceberg"
61
54
 
62
55
 
63
56
  class BucketTransformParameters(TransformParameters):
64
57
  """
65
- Encapsulates parameters for the bucket transform.
58
+ Parameters for the bucket transform.
66
59
  """
67
60
 
61
+ @staticmethod
68
62
  def of(
69
- self,
70
63
  num_buckets: int,
71
- column_names: List[str],
72
- bucketing_strategy: BucketingStrategy,
64
+ bucketing_strategy: BucketingStrategy = BucketingStrategy.DEFAULT,
73
65
  ) -> BucketTransformParameters:
74
66
  bucket_transform_parameters = BucketTransformParameters()
75
67
  bucket_transform_parameters["numBuckets"] = num_buckets
76
- bucket_transform_parameters["columnNames"] = column_names
77
68
  bucket_transform_parameters["bucketingStrategy"] = bucketing_strategy
78
69
 
79
70
  return bucket_transform_parameters
@@ -81,47 +72,272 @@ class BucketTransformParameters(TransformParameters):
81
72
  @property
82
73
  def num_buckets(self) -> int:
83
74
  """
84
- The total number of buckets to create for values of the column
75
+ The total number of buckets to create.
85
76
  """
86
77
  return self["numBuckets"]
87
78
 
88
79
  @property
89
- def column_names(self) -> List[str]:
80
+ def bucketing_strategy(self) -> BucketingStrategy:
81
+ """
82
+ The bucketing strategy to use.
83
+ """
84
+ return BucketingStrategy(self["bucketingStrategy"])
85
+
86
+
87
+ class TruncateTransformParameters(TransformParameters):
88
+ """
89
+ Parameters for the truncate transform.
90
+ """
91
+
92
+ @staticmethod
93
+ def of(
94
+ width: int,
95
+ truncate_strategy: TruncateStrategy = TruncateStrategy.DEFAULT,
96
+ ) -> TruncateTransformParameters:
97
+ truncate_transform_parameters = TruncateTransformParameters()
98
+ truncate_transform_parameters["width"] = width
99
+ truncate_transform_parameters["truncateStrategy"] = truncate_strategy
100
+ return truncate_transform_parameters
101
+
102
+ @property
103
+ def width(self) -> int:
90
104
  """
91
- An ordered list of unique column names from the table schema
92
- to use for bucketings.
105
+ The width to truncate the field to.
93
106
  """
94
- return self["columnNames"]
107
+ return self["width"]
95
108
 
96
109
  @property
97
- def bucketing_strategy(self) -> BucketingStrategy:
110
+ def truncate_strategy(self) -> TruncateStrategy:
98
111
  """
99
- The bucketing strategy to used.
112
+ The truncate strategy to use.
100
113
  """
101
- return self["bucketingStrategy"]
114
+ return TruncateStrategy(self["truncateStrategy"])
102
115
 
103
116
 
104
117
  class Transform(dict):
105
118
  """
106
- A transform is represents how a particular column value can be
107
- transformed into a new value. This is mostly used in the context
108
- of partitioning the data files in a table.
119
+ A transform represents how a particular column value can be
120
+ transformed into a new value. For example, transforms may be used
121
+ to determine partition or sort values for table records.
122
+ """
123
+
124
+ @property
125
+ def name(self) -> TransformName:
126
+ return TransformName(self["name"])
127
+
128
+ @name.setter
129
+ def name(self, name: TransformName) -> None:
130
+ self["name"] = name
131
+
132
+ @property
133
+ def parameters(self) -> Optional[TransformParameters]:
134
+ return NAME_TO_TRANSFORM[self.name].parameters
135
+
136
+ @parameters.setter
137
+ def parameters(
138
+ self,
139
+ parameters: Optional[TransformParameters] = None,
140
+ ) -> None:
141
+ NAME_TO_TRANSFORM[self.name].parameters = parameters
142
+
143
+ @property
144
+ def return_type(self) -> Optional[pa.DataType]:
145
+ """
146
+ The PyArrow data type that this transform returns.
147
+ A return value of "None" indicates that the return type is the same
148
+ as the source type. Transforms that always return null return pa.null().
149
+ """
150
+ return_type = self.get("return_type")
151
+ if return_type is not None:
152
+ schema_bytes = (
153
+ base64.b64decode(return_type)
154
+ if METAFILE_FORMAT == METAFILE_FORMAT_JSON
155
+ else return_type
156
+ )
157
+ return_type = pa.ipc.read_schema(
158
+ pa.py_buffer(schema_bytes),
159
+ )[0].type
160
+ return return_type
161
+
162
+ @return_type.setter
163
+ def return_type(self, return_type: pa.Schema) -> None:
164
+ """
165
+ Set the PyArrow data type that this transform returns.
166
+ """
167
+ self["return_type"] = return_type.serialize().to_pybytes()
168
+
169
+ @property
170
+ def is_multi_field_transform(self) -> bool:
171
+ """
172
+ Whether this transform is a multi-field transform.
173
+ """
174
+ return False
175
+
176
+
177
+ class BucketTransform(Transform):
178
+ """
179
+ A transform that hashes field values into a fixed number of buckets.
180
+ Returns a PyArrow int32() type.
109
181
  """
110
182
 
111
183
  @staticmethod
112
- def of(
113
- name: TransformName,
114
- parameters: TransformParameters,
115
- ) -> Transform:
116
- partition_transform = Transform()
117
- partition_transform["name"] = name
118
- partition_transform["parameters"] = parameters
119
- return partition_transform
184
+ def of(parameters: BucketTransformParameters) -> BucketTransform:
185
+ transform = BucketTransform()
186
+ transform.name = TransformName.BUCKET
187
+ transform.parameters = parameters
188
+ transform.return_type = pa.schema([("return_type", pa.int32())])
189
+ return transform
120
190
 
121
191
  @property
122
- def name(self) -> TransformName:
123
- return self["name"]
192
+ def parameters(self) -> BucketTransformParameters:
193
+ val: Dict[str, Any] = self.get("parameters")
194
+ if val is not None and not isinstance(val, BucketTransformParameters.__class__):
195
+ self["parameters"] = val = BucketTransformParameters(val)
196
+ return val
197
+
198
+ @parameters.setter
199
+ def parameters(
200
+ self,
201
+ parameters: Optional[BucketTransformParameters] = None,
202
+ ) -> None:
203
+ self["parameters"] = parameters
124
204
 
125
205
  @property
126
- def parameters(self) -> TransformParameters:
127
- return self["parameters"]
206
+ def is_multi_field_transform(self) -> bool:
207
+ return True
208
+
209
+
210
+ class TruncateTransform(Transform):
211
+ """
212
+ A transform that truncates field values to a fixed width.
213
+ Returns the same type as the input field.
214
+ """
215
+
216
+ @staticmethod
217
+ def of(parameters: TruncateTransformParameters) -> TruncateTransform:
218
+ transform = TruncateTransform()
219
+ transform.name = TransformName.TRUNCATE
220
+ transform.parameters = parameters
221
+ return transform
222
+
223
+ @property
224
+ def parameters(self) -> TruncateTransformParameters:
225
+ val: Dict[str, Any] = self.get("parameters")
226
+ if val is not None and not isinstance(val, TruncateTransformParameters):
227
+ self["parameters"] = val = TruncateTransformParameters(val)
228
+ return val
229
+
230
+ @parameters.setter
231
+ def parameters(
232
+ self,
233
+ parameters: Optional[TruncateTransformParameters] = None,
234
+ ) -> None:
235
+ self["parameters"] = parameters
236
+
237
+
238
+ class IdentityTransform(Transform):
239
+ """
240
+ A no-op transform that returns unmodified field values.
241
+ Returns the same PyArrow type as the input.
242
+ """
243
+
244
+ @staticmethod
245
+ def of() -> IdentityTransform:
246
+ transform = IdentityTransform()
247
+ transform.name = TransformName.IDENTITY
248
+ return transform
249
+
250
+
251
+ class HourTransform(Transform):
252
+ """
253
+ A transform that returns the hour of a datetime value.
254
+ Returns a PyArrow int32 type representing the hour (0-23).
255
+ """
256
+
257
+ @staticmethod
258
+ def of() -> HourTransform:
259
+ transform = HourTransform()
260
+ transform.name = TransformName.HOUR
261
+ transform.return_type = pa.schema([("return_type", pa.int32())])
262
+ return transform
263
+
264
+
265
+ class DayTransform(Transform):
266
+ """
267
+ A transform that returns the day of a datetime value.
268
+ Returns a PyArrow int32 type representing the day (1-31).
269
+ """
270
+
271
+ @staticmethod
272
+ def of() -> DayTransform:
273
+ transform = DayTransform()
274
+ transform.name = TransformName.DAY
275
+ transform.return_type = pa.schema([("return_type", pa.int32())])
276
+ return transform
277
+
278
+
279
+ class MonthTransform(Transform):
280
+ """
281
+ A transform that returns the month of a datetime value.
282
+ Returns a PyArrow int32 type representing the month (1-12).
283
+ """
284
+
285
+ @staticmethod
286
+ def of() -> MonthTransform:
287
+ transform = MonthTransform()
288
+ transform.name = TransformName.MONTH
289
+ transform.return_type = pa.schema([("return_type", pa.int32())])
290
+ return transform
291
+
292
+
293
+ class YearTransform(Transform):
294
+ """
295
+ A transform that returns the year of a datetime value.
296
+ Returns a PyArrow int32 type representing the year.
297
+ """
298
+
299
+ @staticmethod
300
+ def of() -> YearTransform:
301
+ transform = YearTransform()
302
+ transform.name = TransformName.YEAR
303
+ transform.return_type = pa.schema([("return_type", pa.int32())])
304
+ return transform
305
+
306
+
307
+ class VoidTransform(Transform):
308
+ """
309
+ A transform that coerces all field values to None.
310
+ Returns a PyArrow null type.
311
+ """
312
+
313
+ @staticmethod
314
+ def of() -> VoidTransform:
315
+ transform = VoidTransform()
316
+ transform.name = TransformName.VOID
317
+ transform.return_type = pa.schema([("return_type", pa.null())])
318
+ return transform
319
+
320
+
321
+ class UnknownTransform(Transform):
322
+ """
323
+ An unknown or invalid transform.
324
+ """
325
+
326
+ @staticmethod
327
+ def of() -> UnknownTransform:
328
+ transform = UnknownTransform()
329
+ transform.name = TransformName.UNKNOWN
330
+ return transform
331
+
332
+
333
+ NAME_TO_TRANSFORM: Dict[TransformName, Transform] = {
334
+ TransformName.IDENTITY: IdentityTransform,
335
+ TransformName.BUCKET: BucketTransform,
336
+ TransformName.YEAR: YearTransform,
337
+ TransformName.MONTH: MonthTransform,
338
+ TransformName.DAY: DayTransform,
339
+ TransformName.HOUR: HourTransform,
340
+ TransformName.TRUNCATE: TruncateTransform,
341
+ TransformName.VOID: VoidTransform,
342
+ TransformName.UNKNOWN: UnknownTransform,
343
+ }
@@ -1,16 +1,41 @@
1
+ from __future__ import annotations
2
+
1
3
  from enum import Enum
2
4
  from typing import List, Union
3
5
 
4
- from pyarrow.parquet import ParquetFile
5
6
  import numpy as np
6
7
  import pandas as pd
7
8
  import pyarrow as pa
8
- from ray.data.dataset import Dataset
9
+ import polars as pl
10
+ from ray.data.dataset import Dataset as RayDataset
9
11
  from daft import DataFrame as DaftDataFrame
10
12
 
11
- LocalTable = Union[pa.Table, pd.DataFrame, np.ndarray, ParquetFile]
12
- LocalDataset = List[LocalTable]
13
- DistributedDataset = Union[Dataset, DaftDataFrame]
13
+ from deltacat.constants import (
14
+ RUNNING_TXN_DIR_NAME,
15
+ PAUSED_TXN_DIR_NAME,
16
+ FAILED_TXN_DIR_NAME,
17
+ SUCCESS_TXN_DIR_NAME,
18
+ )
19
+
20
+ LocalTable = Union[
21
+ pa.Table,
22
+ pd.DataFrame,
23
+ pl.DataFrame,
24
+ np.ndarray,
25
+ pa.parquet.ParquetFile,
26
+ ]
27
+ LocalDataset = Union[LocalTable, List[LocalTable]]
28
+ DistributedDataset = Union[RayDataset, DaftDataFrame]
29
+ Dataset = Union[LocalDataset, DistributedDataset]
30
+
31
+
32
+ class StreamFormat(str, Enum):
33
+ DELTACAT = "deltacat"
34
+ ICEBERG = "iceberg"
35
+ HIVE = "hive"
36
+ HUDI = "hudi"
37
+ DELTA_LAKE = "delta_lake"
38
+ SQLITE3 = "SQLITE3" # used by tests
14
39
 
15
40
 
16
41
  class DeltaType(str, Enum):
@@ -19,7 +44,81 @@ class DeltaType(str, Enum):
19
44
  DELETE = "delete"
20
45
 
21
46
 
47
+ class TransactionOperationType(str, Enum):
48
+ CREATE = "create"
49
+ UPDATE = "update"
50
+ REPLACE = "replace"
51
+ DELETE = "delete"
52
+
53
+ READ_SIBLINGS = "read_siblings"
54
+ READ_CHILDREN = "read_children"
55
+ READ_LATEST = "read_latest"
56
+ READ_EXISTS = "read_exists"
57
+
58
+ @staticmethod
59
+ def write_operations():
60
+ return {
61
+ TransactionOperationType.CREATE,
62
+ TransactionOperationType.UPDATE,
63
+ TransactionOperationType.REPLACE,
64
+ TransactionOperationType.DELETE,
65
+ }
66
+
67
+ @staticmethod
68
+ def read_operations():
69
+ return {
70
+ TransactionOperationType.READ_SIBLINGS,
71
+ TransactionOperationType.READ_CHILDREN,
72
+ TransactionOperationType.READ_LATEST,
73
+ TransactionOperationType.READ_EXISTS,
74
+ }
75
+
76
+ def is_write_operation(self) -> bool:
77
+ return self in TransactionOperationType.write_operations()
78
+
79
+ def is_read_operation(self) -> bool:
80
+ return self in TransactionOperationType.read_operations()
81
+
82
+
83
+ class TransactionStatus(str, Enum):
84
+ """
85
+ Transaction user status types. Every transaction status maps to a distinct
86
+ transaction log directory.
87
+ """
88
+
89
+ SUCCESS = "SUCCESS"
90
+ RUNNING = "RUNNING"
91
+ PAUSED = "PAUSED"
92
+ FAILED = "FAILED"
93
+
94
+ def dir_name(self) -> str:
95
+ if self == TransactionStatus.RUNNING:
96
+ return RUNNING_TXN_DIR_NAME
97
+ elif self == TransactionStatus.PAUSED:
98
+ return PAUSED_TXN_DIR_NAME
99
+ elif self == TransactionStatus.FAILED:
100
+ return FAILED_TXN_DIR_NAME
101
+ elif self == TransactionStatus.SUCCESS:
102
+ return SUCCESS_TXN_DIR_NAME
103
+
104
+
105
+ class TransactionState(str, Enum):
106
+ """
107
+ Transaction system state types. Transaction states do not map to distinct transaction log directories,
108
+ but can be inferred by its presence in one or more directories. These states are used to infer whether
109
+ to run system activities like transaction cleanup jobs.
110
+ """
111
+
112
+ FAILED = "FAILED"
113
+ PURGED = "PURGED"
114
+ TIMEOUT = "TIMEOUT"
115
+ RUNNING = "RUNNING"
116
+ SUCCESS = "SUCCESS"
117
+ PAUSED = "PAUSED"
118
+
119
+
22
120
  class LifecycleState(str, Enum):
121
+ CREATED = "created"
23
122
  UNRELEASED = "unreleased"
24
123
  ACTIVE = "active"
25
124
  DEPRECATED = "deprecated"
@@ -35,22 +134,45 @@ class CommitState(str, Enum):
35
134
 
36
135
  class SchemaConsistencyType(str, Enum):
37
136
  """
38
- Schemas are optional for DeltaCAT tables and can be used to inform the data
39
- consistency checks run for each field. If a schema is present, it can be
40
- used to enforce the following column-level data consistency policies at
41
- table load time:
137
+ DeltaCAT table schemas can be used to inform the data consistency checks
138
+ run for each field. When present, the schema can be used to enforce the
139
+ following field-level data consistency policies at table load time:
42
140
 
43
- NONE: No consistency checks are run. May be mixed with the below two
44
- policies by specifying column names to pass through together with
45
- column names to coerce/validate.
141
+ NONE: No consistency checks are run.
46
142
 
47
- COERCE: Coerce fields to fit the schema whenever possible. An explicit
48
- subset of column names to coerce may optionally be specified.
143
+ COERCE: Coerce fields to fit the schema whenever possible.
49
144
 
50
- VALIDATE: Raise an error for any fields that don't fit the schema. An
51
- explicit subset of column names to validate may optionally be specified.
145
+ VALIDATE: Raise an error for any fields that don't fit the schema.
52
146
  """
53
147
 
54
148
  NONE = "none"
55
149
  COERCE = "coerce"
56
150
  VALIDATE = "validate"
151
+
152
+
153
+ class SortOrder(str, Enum):
154
+ ASCENDING = "ascending"
155
+ DESCENDING = "descending"
156
+
157
+ @classmethod
158
+ def _missing_(cls, value: str):
159
+ # pyiceberg.table.sorting.SortDirection mappings
160
+ if value.lower() == "asc":
161
+ return SortOrder.ASCENDING
162
+ elif value.lower() == "desc":
163
+ return SortOrder.DESCENDING
164
+ return None
165
+
166
+
167
+ class NullOrder(str, Enum):
168
+ AT_START = "at_start"
169
+ AT_END = "at_end"
170
+
171
+ @classmethod
172
+ def _missing_(cls, value: str):
173
+ # pyiceberg.table.sorting.NullOrder mappings
174
+ if value.lower() == "nulls-first":
175
+ return NullOrder.AT_START
176
+ elif value.lower() == "nulls-last":
177
+ return NullOrder.AT_END
178
+ return None
File without changes
@@ -0,0 +1,26 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional
3
+
4
+ from deltacat.storage.model.scan.push_down import Pushdown
5
+ from deltacat.storage.model.scan.scan_plan import ScanPlan
6
+
7
+
8
+ class ScanPlanner(ABC):
9
+ @abstractmethod
10
+ def create_scan_plan(
11
+ self,
12
+ table_name: str,
13
+ namespace: Optional[str] = None,
14
+ pushdown: Optional[Pushdown] = None,
15
+ ) -> ScanPlan:
16
+ """Return a ScanPlan for a given DeltaCAT Table after applying pushdown predicates
17
+
18
+ Args:
19
+ table: Name of the table
20
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
21
+ pushdown: Pushdown predicates used to filter partitions/data files
22
+
23
+ Returns:
24
+ a ScanPlan object containing list of ScanTasks
25
+ """
26
+ pass
@@ -0,0 +1 @@
1
+ # NOTE - this module is renamed because it is shadowing the stdlib io module when running tests in Pycharm
File without changes
File without changes
@@ -45,7 +45,11 @@ class TestCloudpickleBugFix(unittest.TestCase):
45
45
  def test_sanity(self):
46
46
  ray.init(local_mode=True, ignore_reinit_error=True)
47
47
 
48
- result = ray.get(calculate_pickled_length.remote(AnyObject()))
49
-
50
- self.assertTrue(result[0] < 1000)
51
- self.assertTrue(result[1] >= 5000000)
48
+ try:
49
+ result = ray.get(calculate_pickled_length.remote(AnyObject()))
50
+
51
+ self.assertTrue(result[0] < 1000)
52
+ self.assertTrue(result[1] >= 5000000)
53
+ finally:
54
+ if ray.is_initialized():
55
+ ray.shutdown()
@@ -2,9 +2,8 @@ import unittest
2
2
 
3
3
  import botocore
4
4
 
5
- from deltacat.aws.constants import RETRYABLE_TRANSIENT_ERRORS
6
- from deltacat.aws.s3u import UuidBlockWritePathProvider, CapturedBlockWritePaths
7
-
5
+ from deltacat.constants import RETRYABLE_TRANSIENT_ERRORS
6
+ from deltacat.types.tables import CapturedBlockWritePaths, UuidBlockWritePathProvider
8
7
 
9
8
  import os
10
9
  from unittest import mock
@@ -99,34 +98,6 @@ class TestDownloadUpload(unittest.TestCase):
99
98
 
100
99
  assert mock_s3.put_object.call_count > 3
101
100
 
102
- @patch("deltacat.aws.s3u.UPLOAD_SLICED_TABLE_RETRY_STOP_AFTER_DELAY", 1)
103
- @patch("deltacat.aws.s3u.ManifestEntry")
104
- @patch("deltacat.aws.s3u._get_metadata")
105
- @patch("deltacat.aws.s3u.CapturedBlockWritePaths")
106
- def test_upload_sliced_table_retry(
107
- self,
108
- mock_captured_block_write_paths,
109
- mock_get_metadata,
110
- mock_manifest_entry,
111
- ):
112
- mock_manifest_entry.from_s3_obj_url.side_effect = OSError(
113
- "Please reduce your request rate.."
114
- )
115
- mock_get_metadata.return_value = [mock.MagicMock()]
116
- cbwp = CapturedBlockWritePaths()
117
- cbwp._write_paths = ["s3_write_path"]
118
- cbwp._block_refs = [mock.MagicMock()]
119
- mock_captured_block_write_paths.return_value = cbwp
120
- with pytest.raises(RetryError):
121
- s3u.upload_sliced_table(
122
- mock.MagicMock(),
123
- "s3-prefix",
124
- mock.MagicMock(),
125
- mock.MagicMock(),
126
- mock.MagicMock(),
127
- mock.MagicMock(),
128
- )
129
-
130
101
  @patch("deltacat.aws.s3u.UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY", 1)
131
102
  @patch("deltacat.aws.s3u.s3_client_cache")
132
103
  def test_upload_transient_error_retry(self, mock_s3_client_cache):
File without changes
File without changes