deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,417 @@
1
+ # from deltacat.compute import index
2
+ import subprocess
3
+ import socket
4
+ import os
5
+ import time
6
+ import re
7
+
8
+ import deltacat as dc
9
+
10
+ from dataclasses import dataclass
11
+
12
+ from typing import Set, Optional, Dict, Any, Union
13
+
14
+ from ray.job_submission import JobSubmissionClient, JobStatus
15
+
16
+ from deltacat.utils.performance import timed_invocation
17
+
18
+
19
+ def _run_cmd(cmd: str) -> None:
20
+ exit_code = int(os.system(cmd))
21
+ assert exit_code == 0, f"`{cmd}` failed. Exit code: {exit_code}"
22
+
23
+
24
+ def _ray_up(
25
+ cluster_cfg: str, cluster_name_override: str = None, restart_only: bool = False
26
+ ) -> None:
27
+ restart_flag = "--no-restart" if not restart_only else "--restart-only"
28
+ cluster_name_option = (
29
+ f"-n '{cluster_name_override}'" if cluster_name_override else ""
30
+ )
31
+ print(f"Starting Ray cluster from '{cluster_cfg}'")
32
+ _run_cmd(
33
+ f"ray up '{cluster_cfg}' -y --no-config-cache {restart_flag} {cluster_name_option} --disable-usage-stats"
34
+ )
35
+ print(f"Started Ray cluster from '{cluster_cfg}'")
36
+
37
+
38
+ def _is_port_in_use(port: Union[int, str]) -> bool:
39
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
40
+ return s.connect_ex(("localhost", int(port))) == 0
41
+
42
+
43
+ def _is_dashboard_running(port: Union[int, str]) -> bool:
44
+ return _is_port_in_use(port)
45
+
46
+
47
+ def _ray_dashboard_up(
48
+ cluster_cfg: str, port: Union[str, int], timeout_seconds=15
49
+ ) -> None:
50
+ print(f"Starting Ray Dashboard for Ray cluster '{cluster_cfg}'")
51
+ _run_cmd(f"ray dashboard '{cluster_cfg}' --port {port} &")
52
+ start = time.monotonic()
53
+ dashboard_is_up = False
54
+ while time.monotonic() - start <= timeout_seconds:
55
+ if _is_dashboard_running(port):
56
+ dashboard_is_up = True
57
+ break
58
+ time.sleep(0.1)
59
+ if not dashboard_is_up:
60
+ raise TimeoutError(
61
+ f"Timed out after waiting {timeout_seconds} seconds for dashboard "
62
+ f"to establish connection on port {port}."
63
+ )
64
+ print(f"Started Ray Dashboard for Ray cluster '{cluster_cfg}'")
65
+
66
+
67
+ def _get_head_node_ip(cluster_cfg: str) -> str:
68
+ print(f"Getting Ray cluster head node IP for '{cluster_cfg}'")
69
+ cmd = f"ray get-head-ip '{cluster_cfg}'"
70
+ proc = subprocess.run(
71
+ cmd,
72
+ shell=True,
73
+ capture_output=True,
74
+ text=True,
75
+ check=True,
76
+ )
77
+ # the head node IP should be the last line printed to stdout
78
+ # TODO(pdames): add IPv6 support
79
+ head_node_ip = proc.stdout.splitlines()[-1]
80
+ if not re.match(
81
+ r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$",
82
+ head_node_ip,
83
+ ):
84
+ print(
85
+ f"Failed to find Ray Head Node IP Address in `{cmd}` "
86
+ f"output: {proc.stdout}"
87
+ )
88
+ raise RuntimeError("No Ray Head Node IP Address Found")
89
+ print(f"Ray cluster head node IP for '{cluster_cfg}': {head_node_ip}")
90
+ return head_node_ip
91
+
92
+
93
+ def _ray_down_cmd(cluster_cfg: str) -> str:
94
+ return f"ray down '{cluster_cfg}' -y"
95
+
96
+
97
+ def _ray_down(cluster_cfg: str) -> None:
98
+ print(f"Destroying Ray cluster for '{cluster_cfg}'")
99
+ _run_cmd(_ray_down_cmd(cluster_cfg))
100
+ print(f"Destroyed Ray cluster for '{cluster_cfg}'")
101
+
102
+
103
+ def _ray_cluster_running(cluster_cfg: str) -> bool:
104
+ try:
105
+ _get_head_node_ip(cluster_cfg)
106
+ except Exception as e:
107
+ print(f"Get Head Node IP Failed with Exception: {e}")
108
+ print(f"Assuming Ray Cluster is Not Running")
109
+ return False
110
+ return True
111
+
112
+
113
+ @dataclass(frozen=True)
114
+ class DeltaCatJobRunResult:
115
+ job_id: str
116
+ job_status: JobStatus
117
+ job_logs: Any
118
+
119
+
120
+ class DeltaCatJobClient(JobSubmissionClient):
121
+ @staticmethod
122
+ def of(
123
+ cluster_cfg_file_path: str = "./deltacat.yaml",
124
+ *,
125
+ launch_cluster: bool = True,
126
+ start_dashboard: bool = True,
127
+ restart_ray: bool = False,
128
+ head_node_ip: str = None,
129
+ dashboard_wait_time_seconds: int = 30,
130
+ port: Union[int, str] = "8265",
131
+ cluster_name_override: str = None,
132
+ ):
133
+ job_submission_client_url = None
134
+ try:
135
+ # launch Ray cluster if necessary
136
+ if cluster_cfg_file_path:
137
+ if launch_cluster:
138
+ if not _ray_cluster_running(cluster_cfg_file_path) or restart_ray:
139
+ _ray_up(cluster_cfg_file_path, cluster_name_override)
140
+ elif restart_ray:
141
+ if _ray_cluster_running(cluster_cfg_file_path):
142
+ _ray_up(
143
+ cluster_cfg_file_path, restart_ray, cluster_name_override
144
+ )
145
+ else:
146
+ raise RuntimeError(
147
+ f"Cannot Restart Ray: Ray Cluster for "
148
+ f"`{cluster_cfg_file_path}` not found."
149
+ )
150
+ dashboard_running = _is_dashboard_running(port)
151
+ if not dashboard_running and start_dashboard:
152
+ _ray_dashboard_up(
153
+ cluster_cfg=cluster_cfg_file_path,
154
+ port=port,
155
+ timeout_seconds=dashboard_wait_time_seconds,
156
+ )
157
+ dashboard_running = True
158
+ if not head_node_ip:
159
+ head_node_ip = (
160
+ "127.0.0.1"
161
+ # use dashboard port forwarding on localhost
162
+ if dashboard_running
163
+ # fetch the remote head node IP
164
+ else _get_head_node_ip(cluster_cfg_file_path)
165
+ )
166
+ else:
167
+ head_node_ip = "127.0.0.1"
168
+ job_submission_client_url = f"http://{head_node_ip}:{port}"
169
+ print(
170
+ f"Initializing Ray Job Submission Client with URL: "
171
+ f"{job_submission_client_url}"
172
+ )
173
+ client = JobSubmissionClient(f"http://{head_node_ip}:{port}")
174
+ # the below class change is safe as long as we only add new methods
175
+ # to the wrapped JobSubmissionClient that don't alter its internal
176
+ # state
177
+ client.__class__ = DeltaCatJobClient
178
+ return client
179
+ except Exception as e:
180
+ print(f"Unexpected error while initializing Ray Job Client: {e}")
181
+ if job_submission_client_url:
182
+ print(
183
+ f"Please ensure that Ray was installed with a job server "
184
+ f'enabled via `pip install -U "ray[default]"` and '
185
+ f"that http://{head_node_ip}:{port} is accessible. You "
186
+ f"can optionally run `ray dashboard` to forward the "
187
+ f"remote Ray head node port to a local port (default 8265) "
188
+ f'then run `ray_job_client("127.0.0.1", 8265)` '
189
+ f"to connect via localhost."
190
+ )
191
+ if cluster_cfg_file_path:
192
+ print(
193
+ f"If you're done submitting jobs, ensure that the remote "
194
+ f"Ray Cluster is shut down by running: "
195
+ f"{_ray_down_cmd(cluster_cfg_file_path)}"
196
+ )
197
+ raise e
198
+
199
+ def run_job(
200
+ self,
201
+ *,
202
+ entrypoint: str,
203
+ runtime_env: Optional[Dict[str, Any]] = None,
204
+ timeout_seconds: int = 600,
205
+ **kwargs,
206
+ ) -> DeltaCatJobRunResult:
207
+ """
208
+ Synchronously submit and run a Ray job. This method combines Ray job submission and monitoring by submitting
209
+ the job to the Ray Job Server, waiting for the job to complete,
210
+ validating the job's terminal status, retrieving and returning job run
211
+ result information if successful.
212
+
213
+ Args:
214
+ entrypoint: The entry point for the job to be executed (module
215
+ or script to run)
216
+ runtime_env: Runtime environment configuration for the job.
217
+ Some commonly used keys include `working_dir` (directory
218
+ containing the job code), `pip` (list of pip packages to
219
+ install), and `env_vars` (environment variables for the job).
220
+ timeout_seconds: Maximum time in seconds to wait for job completion.
221
+ Default to 600 seconds (10 minutes).
222
+ kwargs: Additional keyword arguments to pass to the job submission.
223
+
224
+ Returns:
225
+ Final results from the successful job run execution.
226
+
227
+ Raises:
228
+ RuntimeError: If the job fails or terminates with status other
229
+ than SUCCEEDED.
230
+ TimeoutError: If the job doesn't complete within the specified
231
+ timeout period
232
+
233
+ Example:
234
+ >>> client = job_client()
235
+ >>> logs = client.run_job(
236
+ ... # Shell command to run job
237
+ ... entrypoint="my_script.py",
238
+ ... runtime_env={
239
+ ... # Path to the local directory containing my_script.py
240
+ ... "working_dir": "./",
241
+ ... # Pip dependencies to install
242
+ ... "pip": ["pandas", "numpy"],
243
+ ... # System environment variables to set
244
+ ... "env_vars": {"DATA_PATH": "/path/to/data"},
245
+ ... },
246
+ ... timeout_seconds=1200
247
+ ... )
248
+ """
249
+
250
+ job_id = self.submit_job(
251
+ entrypoint=entrypoint,
252
+ runtime_env=runtime_env,
253
+ **kwargs,
254
+ )
255
+ job_status, latency = timed_invocation(
256
+ self.await_job,
257
+ job_id,
258
+ timeout_seconds=timeout_seconds,
259
+ )
260
+ job_logs = self.get_job_logs(job_id)
261
+ if job_status != JobStatus.SUCCEEDED:
262
+ print(f"Job `{job_id}` logs: ")
263
+ print(job_logs)
264
+ raise RuntimeError(f"Job `{job_id}` terminated with status: {job_status}")
265
+ return DeltaCatJobRunResult(
266
+ job_id=job_id,
267
+ job_status=job_status,
268
+ job_logs=job_logs,
269
+ )
270
+
271
+ def await_job(
272
+ self,
273
+ job_id: str,
274
+ await_status: Set[JobStatus] = {
275
+ JobStatus.SUCCEEDED,
276
+ JobStatus.STOPPED,
277
+ JobStatus.FAILED,
278
+ },
279
+ *,
280
+ timeout_seconds: int = 600,
281
+ ) -> JobStatus:
282
+ """
283
+ Polls a job's status until it matches the desired status or times out.
284
+
285
+ This function continuously checks the status of a specified job using the
286
+ provided client. It will keep polling until either the desired status is
287
+ reached or the timeout period expires.
288
+
289
+ Args:
290
+ job_id: The unique identifier of the job to monitor.
291
+ await_status: Set of :class:`ray.job_submission.JobStatus` to wait for.
292
+ The function will return when the job reaches any of these states.
293
+ timeout_seconds: Maximum time to wait in seconds.
294
+ Defaults to 600 seconds (10 minutes).
295
+
296
+ Returns:
297
+ The final status of the job.
298
+
299
+ Raises:
300
+ TimeoutError: If the desired status is not reached within the
301
+ specified timeout period.
302
+
303
+ Example:
304
+ >>>
305
+ >>> client = job_client()
306
+ >>> job_id = client.submit_job(
307
+ >>> # Shell command to run job
308
+ >>> entrypoint=f"python copy.py --source '{source}' --dest '{dest}'",
309
+ >>> # Path to the local directory containing copy.py
310
+ >>> runtime_env={"working_dir": "./"},
311
+ >>> )
312
+ >>> # wait for the job to reach a terminal state
313
+ >>> client.await_job(job_id)
314
+ """
315
+ start = time.monotonic()
316
+ terminal_status = None
317
+ while time.monotonic() - start <= timeout_seconds:
318
+ status = self.get_job_status(job_id)
319
+ if status in await_status:
320
+ terminal_status = status
321
+ break
322
+ time.sleep(0.1)
323
+ if not terminal_status:
324
+ self.stop_job(job_id)
325
+ raise TimeoutError(
326
+ f"Timed out after waiting {timeout_seconds} seconds for job "
327
+ f"`{job_id}` status: {status}"
328
+ )
329
+ return terminal_status
330
+
331
+
332
+ def local_job_client(*args, **kwargs) -> DeltaCatJobClient:
333
+ """
334
+ Create a DeltaCAT Job Client that can be used to submit jobs to a local Ray
335
+ cluster. Initializes Ray if it's not already running.
336
+
337
+ Args:
338
+ *args: Positional arguments to pass to `deltacat.init()`.
339
+ **kwargs: Keyword arguments to pass to `deltacat.init()`.
340
+ Returns:
341
+ DeltaCatJobClient: A client instance that can be used to submit and
342
+ manage local Ray jobs.
343
+
344
+ Raises:
345
+ RuntimeError: If a local Ray Job Server cannot be found.
346
+ """
347
+ # force reinitialization to ensure that we can get the Ray context
348
+ kwargs["force"] = True
349
+ context = dc.init(*args, **kwargs)
350
+ if context is None:
351
+ raise RuntimeError("Failed to retrieve Ray context.")
352
+ if context.dashboard_url:
353
+ head_node_ip, port = context.dashboard_url.split(":")
354
+ else:
355
+ # the Ray Dashboard URL is also the Ray Job Server URL
356
+ raise RuntimeError(
357
+ "Ray Job Server not found! Please reinstall Ray using "
358
+ "`pip install -U `ray[default]`"
359
+ )
360
+ return DeltaCatJobClient.of(
361
+ None,
362
+ launch_cluster=False,
363
+ start_dashboard=False,
364
+ head_node_ip=head_node_ip,
365
+ port=port,
366
+ )
367
+
368
+
369
+ def job_client(
370
+ cluster_cfg_file_path: str = "./deltacat.yaml",
371
+ *,
372
+ launch_cluster: bool = True,
373
+ start_dashboard: bool = True,
374
+ restart_ray: bool = False,
375
+ head_node_ip: str = None,
376
+ dashboard_wait_time_seconds: int = 15,
377
+ port: Union[str, int] = "8265",
378
+ cluster_name_override: str = None,
379
+ ) -> DeltaCatJobClient:
380
+ """
381
+ Create a DeltaCAT Job Client that can be used to submit jobs to a remote
382
+ Ray cluster.
383
+
384
+ Args:
385
+ cluster_cfg_file_path: Path to the Ray Cluster Launcher
386
+ Config file. Defaults to "./deltacat.yaml".
387
+ launch_cluster : Whether to launch a new Ray cluster.
388
+ Defaults to True.
389
+ start_dashboard: Whether to start the Ray dashboard.
390
+ Defaults to True.
391
+ restart_ray: Whether to restart Ray if it's already
392
+ running. Defaults to False.
393
+ head_node_ip: IP address of the Ray cluster head node.
394
+ If None, will use the configuration from the cluster config file.
395
+ Defaults to None.
396
+ dashboard_wait_time_seconds: Time in seconds to wait for the Ray
397
+ dashboard to start if `start_dashboard` is True.
398
+ port: Port number for the Ray
399
+ dashboard/job server. Defaults to "8265".
400
+
401
+ Returns:
402
+ DeltaCatJobClient: A client instance that can be used to submit and
403
+ manage jobs on the Ray cluster.
404
+
405
+ Raises:
406
+ RuntimeError: If the Ray Job Server is not found.
407
+ """
408
+ return DeltaCatJobClient.of(
409
+ cluster_cfg_file_path,
410
+ launch_cluster=launch_cluster,
411
+ start_dashboard=start_dashboard,
412
+ restart_ray=restart_ray,
413
+ head_node_ip=head_node_ip,
414
+ dashboard_wait_time_seconds=dashboard_wait_time_seconds,
415
+ port=port,
416
+ cluster_name_override=cluster_name_override,
417
+ )
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Optional, Dict, Any
2
+ from typing import Optional, Dict, Any, List
3
3
  from deltacat import logs
4
4
  from deltacat.storage import (
5
5
  Delta,
@@ -61,6 +61,7 @@ def _estimate_resources_required_to_process_delta_using_previous_inflation(
61
61
  def _estimate_resources_required_to_process_delta_using_type_params(
62
62
  delta: Delta,
63
63
  operation_type: OperationType,
64
+ all_column_names: List[str],
64
65
  estimate_resources_params: EstimateResourcesParams,
65
66
  deltacat_storage: unimplemented_deltacat_storage,
66
67
  deltacat_storage_kwargs: Dict[str, Any],
@@ -113,6 +114,7 @@ def _estimate_resources_required_to_process_delta_using_type_params(
113
114
  """
114
115
  appended = append_content_type_params(
115
116
  delta=delta,
117
+ all_column_names=all_column_names,
116
118
  deltacat_storage=deltacat_storage,
117
119
  deltacat_storage_kwargs=deltacat_storage_kwargs,
118
120
  file_reader_kwargs_provider=file_reader_kwargs_provider,
@@ -289,10 +291,18 @@ def estimate_resources_required_to_process_delta(
289
291
  estimate_resources_params.resource_estimation_method
290
292
  )
291
293
 
294
+ all_column_names = deltacat_storage.get_table_version_column_names(
295
+ delta.locator.namespace,
296
+ delta.locator.table_name,
297
+ delta.locator.table_version,
298
+ **deltacat_storage_kwargs,
299
+ )
300
+
292
301
  for func in functions:
293
302
  resources = func(
294
303
  delta=delta,
295
304
  operation_type=operation_type,
305
+ all_column_names=all_column_names,
296
306
  estimate_resources_params=estimate_resources_params,
297
307
  deltacat_storage=deltacat_storage,
298
308
  deltacat_storage_kwargs=deltacat_storage_kwargs,
deltacat/constants.py CHANGED
@@ -1,4 +1,10 @@
1
- from deltacat.utils.common import env_string
1
+ from __future__ import annotations
2
+
3
+ import botocore.exceptions
4
+
5
+ from daft.exceptions import DaftTransientError
6
+ from deltacat.utils.common import env_string, env_bool
7
+ from deltacat.utils.common import env_integer
2
8
 
3
9
  # Environment variables
4
10
  DELTACAT_SYS_LOG_LEVEL = env_string("DELTACAT_SYS_LOG_LEVEL", "DEBUG")
@@ -30,6 +36,25 @@ DELTACAT_APP_DEBUG_LOG_BASE_FILE_NAME = env_string(
30
36
  )
31
37
  # A json context which will be logged along with other context args.
32
38
  DELTACAT_LOGGER_CONTEXT = env_string("DELTACAT_LOGGER_CONTEXT", None)
39
+ DELTACAT_LOGGER_USE_SINGLE_HANDLER = env_bool(
40
+ "DELTACAT_LOGGER_USE_SINGLE_HANDLER",
41
+ False,
42
+ )
43
+ DELTACAT_ROOT = env_string(
44
+ "DELTACAT_ROOT",
45
+ "",
46
+ )
47
+
48
+ # CLI Args
49
+ METAFILE_FORMAT_JSON = "json"
50
+ METAFILE_FORMAT_MSGPACK = "msgpack"
51
+ METAFILE_FORMAT = env_string("METAFILE_FORMAT", METAFILE_FORMAT_MSGPACK)
52
+ SUPPORTED_METAFILE_FORMATS = [METAFILE_FORMAT_JSON, METAFILE_FORMAT_MSGPACK]
53
+ METAFILE_EXT = {
54
+ "json": ".json",
55
+ "msgpack": ".mpk",
56
+ }[METAFILE_FORMAT]
57
+
33
58
 
34
59
  # Byte Units
35
60
  BYTES_PER_KIBIBYTE = 2**10
@@ -41,6 +66,11 @@ BYTES_PER_PEBIBYTE = 2**50
41
66
  SIGNED_INT64_MIN_VALUE = -(2**63)
42
67
  SIGNED_INT64_MAX_VALUE = 2**63 - 1
43
68
 
69
+ # Time Units
70
+ NANOS_PER_SEC = 1_000_000_000
71
+ MICROS_PER_SEC = 1_000_000
72
+ MILLIS_PER_SEC = 1000
73
+
44
74
  # Inflation multiplier from snappy-compressed parquet to pyarrow.
45
75
  # This should be kept larger than actual average inflation multipliers.
46
76
  # Note that this is a very rough guess since actual observed pyarrow
@@ -58,3 +88,62 @@ MEMORY_TO_HASH_BUCKET_COUNT_RATIO = 0.0512 * BYTES_PER_TEBIBYTE
58
88
 
59
89
  # The number of bytes allocated to null values in string physical type in parquet
60
90
  NULL_SIZE_BYTES = 4
91
+
92
+ # Metastore Constants
93
+ REVISION_DIR_NAME: str = "rev"
94
+ TXN_DIR_NAME: str = "txn"
95
+ RUNNING_TXN_DIR_NAME: str = "running"
96
+ FAILED_TXN_DIR_NAME: str = "failed"
97
+ PAUSED_TXN_DIR_NAME: str = "paused"
98
+ SUCCESS_TXN_DIR_NAME: str = "success"
99
+ DATA_FILE_DIR_NAME: str = "data"
100
+ REV_DIR_NAME: str = "rev"
101
+ TXN_PART_SEPARATOR = "_"
102
+
103
+ # Storage interface defaults
104
+ # These defaults should be applied in catalog interface implementations
105
+ # Storage interface implementations should be agnostic to defaults and require full information
106
+ DEFAULT_CATALOG = "default"
107
+ DEFAULT_NAMESPACE = "default"
108
+ DEFAULT_TABLE_VERSION = "1"
109
+ DEFAULT_STREAM_ID = "stream"
110
+ DEFAULT_PARTITION_ID = "partition"
111
+ DEFAULT_PARTITION_VALUES = ["default"]
112
+
113
+ # Transaction Status constants
114
+ SUCCESSFULLY_CLEANED = "cleaned"
115
+ CURRENTLY_CLEANING = "cleaning"
116
+ TIMEOUT_TXN = "timedout"
117
+
118
+ # operation timeout constants
119
+ OPERATION_TIMEOUTS = {
120
+ "create": 5,
121
+ "update": 3,
122
+ "delete": 4,
123
+ "read_siblings": 2,
124
+ "read_children": 2,
125
+ "read_latest": 3,
126
+ "read_exists": 1,
127
+ }
128
+ # Upload/Download Retry Defaults
129
+ UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY = env_integer(
130
+ "UPLOAD_DOWNLOAD_RETRY_STOP_AFTER_DELAY", 10 * 60
131
+ )
132
+ UPLOAD_SLICED_TABLE_RETRY_STOP_AFTER_DELAY = env_integer(
133
+ "UPLOAD_SLICED_TABLE_RETRY_STOP_AFTER_DELAY", 30 * 60
134
+ )
135
+ DOWNLOAD_MANIFEST_ENTRY_RETRY_STOP_AFTER_DELAY = env_integer(
136
+ "DOWNLOAD_MANIFEST_ENTRY_RETRY_STOP_AFTER_DELAY", 30 * 60
137
+ )
138
+ DEFAULT_FILE_READ_TIMEOUT_MS = env_integer(
139
+ "DEFAULT_FILE_READ_TIMEOUT_MS", 300_000
140
+ ) # 5 mins
141
+ RETRYABLE_TRANSIENT_ERRORS = (
142
+ OSError,
143
+ botocore.exceptions.ConnectionError,
144
+ botocore.exceptions.HTTPClientError,
145
+ botocore.exceptions.NoCredentialsError,
146
+ botocore.exceptions.ConnectTimeoutError,
147
+ botocore.exceptions.ReadTimeoutError,
148
+ DaftTransientError,
149
+ )
File without changes
File without changes
File without changes
File without changes