deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,394 +0,0 @@
1
- # Allow classes to use self-referencing Type hints in Python 3.7.
2
- from __future__ import annotations
3
-
4
- import itertools
5
- import logging
6
- from typing import Any, Dict, List, Optional
7
- from uuid import uuid4
8
- from enum import Enum
9
-
10
- from deltacat import logs
11
-
12
- logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
13
-
14
-
15
- class EntryType(str, Enum):
16
- """
17
- Enum representing all possible content categories of an manifest entry file
18
- """
19
-
20
- DATA = "data"
21
- POSITIONAL_DELETE = "positional_delete"
22
- EQUALITY_DELETE = "equality_delete"
23
-
24
- @classmethod
25
- def get_default(cls):
26
- return EntryType.DATA
27
-
28
- @classmethod
29
- def list(cls):
30
- return [c.value for c in EntryType]
31
-
32
-
33
- class EntryFileParams(dict):
34
- """
35
- Represents parameters relevant to the underlying contents of manifest entry. Contains all parameters required to support DELETEs
36
- equality_column_names: List of column names that would be used to determine row equality for equality deletes. Relevant only to equality deletes
37
- position: Ordinal position of a deleted row in the target data file identified by uri, starting at 0. Relevant only to positional deletes
38
- """
39
-
40
- @staticmethod
41
- def of(
42
- equality_column_names: Optional[List[str]] = None,
43
- position: Optional[int] = None,
44
- ) -> EntryFileParams:
45
- entry_file_params = EntryFileParams()
46
- if equality_column_names is not None:
47
- entry_file_params["equality_column_names"] = equality_column_names
48
- if position is not None:
49
- entry_file_params["position"] = position
50
- return entry_file_params
51
-
52
- @property
53
- def equality_column_names(self) -> Optional[List[str]]:
54
- return self.get("equality_column_names")
55
-
56
- @property
57
- def url(self) -> Optional[str]:
58
- return self.get("url")
59
-
60
- @property
61
- def position(self) -> Optional[int]:
62
- return self.get("position")
63
-
64
-
65
- class Manifest(dict):
66
- @staticmethod
67
- def _build_manifest(
68
- meta: Optional[ManifestMeta],
69
- entries: Optional[ManifestEntryList],
70
- author: Optional[ManifestAuthor] = None,
71
- uuid: str = None,
72
- entry_type: Optional[EntryType] = None,
73
- ) -> Manifest:
74
- if not uuid:
75
- uuid = str(uuid4())
76
- manifest = Manifest()
77
- manifest["id"] = uuid
78
- if meta is not None:
79
- manifest["meta"] = meta
80
- if entries is not None:
81
- manifest["entries"] = entries
82
- if author is not None:
83
- manifest["author"] = author
84
- if entry_type is not None:
85
- manifest["entry_type"] = entry_type.value
86
- return manifest
87
-
88
- @staticmethod
89
- def of(
90
- entries: ManifestEntryList,
91
- author: Optional[ManifestAuthor] = None,
92
- uuid: str = None,
93
- entry_type: Optional[EntryType] = None,
94
- ) -> Manifest:
95
- if not uuid:
96
- uuid = str(uuid4())
97
- total_record_count = 0
98
- total_content_length = 0
99
- total_source_content_length = 0
100
- content_type = None
101
- content_encoding = None
102
- partition_values_set = set()
103
- partition_values = None
104
- if entries:
105
- content_type = entries[0].meta.content_type
106
- content_encoding = entries[0].meta.content_encoding
107
- for entry in entries:
108
- meta = entry.meta
109
- if meta.content_type != content_type:
110
- content_type = None
111
- if meta.content_encoding != content_encoding:
112
- content_encoding = None
113
- entry_content_type = meta.content_type
114
- if entry_content_type != content_type:
115
- msg = (
116
- f"Expected all manifest entries to have content "
117
- f"type '{content_type}' but found "
118
- f"'{entry_content_type}'"
119
- )
120
- raise ValueError(msg)
121
- entry_content_encoding = meta["content_encoding"]
122
- if entry_content_encoding != content_encoding:
123
- msg = (
124
- f"Expected all manifest entries to have content "
125
- f"encoding '{content_encoding}' but found "
126
- f"'{entry_content_encoding}'"
127
- )
128
- raise ValueError(msg)
129
- total_record_count += meta.record_count or 0
130
- total_content_length += meta.content_length or 0
131
- total_source_content_length += meta.source_content_length or 0
132
- if len(partition_values_set) <= 1:
133
- partition_values_set.add(entry.meta.partition_values)
134
-
135
- if len(partition_values_set) == 1:
136
- partition_values = partition_values_set.pop()
137
-
138
- meta = ManifestMeta.of(
139
- total_record_count,
140
- total_content_length,
141
- content_type,
142
- content_encoding,
143
- total_source_content_length,
144
- entry_type=entry_type,
145
- partition_values=partition_values,
146
- )
147
- manifest = Manifest._build_manifest(meta, entries, author, uuid, entry_type)
148
- return manifest
149
-
150
- @staticmethod
151
- def merge_manifests(
152
- manifests: List[Manifest], author: Optional[ManifestAuthor] = None
153
- ) -> Manifest:
154
- all_entries = ManifestEntryList(
155
- itertools.chain(*[m.entries for m in manifests])
156
- )
157
- merged_manifest = Manifest.of(all_entries, author)
158
- return merged_manifest
159
-
160
- @property
161
- def meta(self) -> Optional[ManifestMeta]:
162
- val: Dict[str, Any] = self.get("meta")
163
- if val is not None and not isinstance(val, ManifestMeta):
164
- self["meta"] = val = ManifestMeta(val)
165
- return val
166
-
167
- @property
168
- def entries(self) -> Optional[ManifestEntryList]:
169
- val: List[ManifestEntry] = self.get("entries")
170
- if val is not None and not isinstance(val, ManifestEntryList):
171
- self["entries"] = val = ManifestEntryList.of(val)
172
- return val
173
-
174
- @property
175
- def id(self) -> str:
176
- return self["id"]
177
-
178
- @property
179
- def author(self) -> Optional[ManifestAuthor]:
180
- val: Dict[str, Any] = self.get("author")
181
- if val is not None and not isinstance(val, ManifestAuthor):
182
- self["author"] = val = ManifestAuthor(val)
183
- return val
184
-
185
-
186
- class ManifestMeta(dict):
187
- @staticmethod
188
- def of(
189
- record_count: Optional[int],
190
- content_length: Optional[int],
191
- content_type: Optional[str],
192
- content_encoding: Optional[str],
193
- source_content_length: Optional[int] = None,
194
- credentials: Optional[Dict[str, str]] = None,
195
- content_type_parameters: Optional[List[Dict[str, str]]] = None,
196
- entry_type: Optional[EntryType] = None,
197
- partition_values: Optional[List[str]] = None,
198
- ) -> ManifestMeta:
199
- manifest_meta = ManifestMeta()
200
- if record_count is not None:
201
- manifest_meta["record_count"] = record_count
202
- if content_length is not None:
203
- manifest_meta["content_length"] = content_length
204
- if source_content_length is not None:
205
- manifest_meta["source_content_length"] = source_content_length
206
- if content_type is not None:
207
- manifest_meta["content_type"] = content_type
208
- if content_type_parameters is not None:
209
- manifest_meta["content_type_parameters"] = content_type_parameters
210
- if content_encoding is not None:
211
- manifest_meta["content_encoding"] = content_encoding
212
- if credentials is not None:
213
- manifest_meta["credentials"] = credentials
214
- if entry_type is not None:
215
- manifest_meta["entry_type"] = entry_type.value
216
- if partition_values is not None:
217
- manifest_meta["partition_values"] = partition_values
218
- return manifest_meta
219
-
220
- @property
221
- def record_count(self) -> Optional[int]:
222
- return self.get("record_count")
223
-
224
- @property
225
- def content_length(self) -> Optional[int]:
226
- return self.get("content_length")
227
-
228
- @property
229
- def content_type(self) -> Optional[str]:
230
- return self.get("content_type")
231
-
232
- @property
233
- def content_encoding(self) -> Optional[str]:
234
- return self.get("content_encoding")
235
-
236
- @property
237
- def source_content_length(self) -> Optional[int]:
238
- return self.get("source_content_length")
239
-
240
- @property
241
- def content_type_parameters(self) -> Optional[List[Dict[str, str]]]:
242
- return self.get("content_type_parameters")
243
-
244
- @content_type_parameters.setter
245
- def content_type_parameters(self, params: List[Dict[str, str]]) -> None:
246
- self["content_type_parameters"] = params
247
-
248
- @property
249
- def credentials(self) -> Optional[Dict[str, str]]:
250
- return self.get("credentials")
251
-
252
- @property
253
- def entry_type(self) -> Optional[EntryType]:
254
- val = self.get("entry_type")
255
- if val is not None:
256
- return EntryType(self["entry_type"])
257
- return val
258
-
259
- @property
260
- def partition_values(self) -> Optional[List[str]]:
261
- return self.get("partition_values")
262
-
263
-
264
- class ManifestAuthor(dict):
265
- @staticmethod
266
- def of(name: Optional[str], version: Optional[str]) -> ManifestAuthor:
267
- manifest_author = ManifestAuthor()
268
- if name is not None:
269
- manifest_author["name"] = name
270
- if version is not None:
271
- manifest_author["version"] = version
272
- return manifest_author
273
-
274
- @property
275
- def name(self) -> Optional[str]:
276
- return self.get("name")
277
-
278
- @property
279
- def version(self) -> Optional[str]:
280
- return self.get("version")
281
-
282
-
283
- class ManifestEntry(dict):
284
- @staticmethod
285
- def of(
286
- url: Optional[str],
287
- meta: Optional[ManifestMeta],
288
- mandatory: bool = True,
289
- uri: Optional[str] = None,
290
- uuid: Optional[str] = None,
291
- entry_type: Optional[EntryType] = None,
292
- entry_file_params: Optional[EntryFileParams] = None,
293
- ) -> ManifestEntry:
294
- manifest_entry = ManifestEntry()
295
- if not (uri or url):
296
- raise ValueError("No URI or URL specified for manifest entry contents.")
297
- if (uri and url) and (uri != url):
298
- raise ValueError(f"Manifest entry URI ({uri}) != URL ({url})")
299
- if url:
300
- manifest_entry["url"] = manifest_entry["uri"] = url
301
- elif uri:
302
- manifest_entry["url"] = manifest_entry["uri"] = uri
303
- if meta is not None:
304
- manifest_entry["meta"] = meta
305
- if mandatory is not None:
306
- manifest_entry["mandatory"] = mandatory
307
- if uuid is not None:
308
- manifest_entry["id"] = uuid
309
- if entry_type is not None:
310
- manifest_entry["entry_type"] = entry_type.value
311
- if entry_file_params is not None:
312
- if entry_file_params.get("url") != manifest_entry.get("url"):
313
- msg = (
314
- f"Expected manifest entry url: {manifest_entry.url}"
315
- f" and entry_file_params: '{entry_file_params.url}' to match"
316
- )
317
- raise ValueError(msg)
318
- manifest_entry["entry_file_params"] = entry_file_params
319
- return manifest_entry
320
-
321
- @staticmethod
322
- def from_s3_obj_url(
323
- url: str,
324
- record_count: int,
325
- source_content_length: Optional[int] = None,
326
- **s3_client_kwargs,
327
- ) -> ManifestEntry:
328
- from deltacat.aws import s3u as s3_utils
329
-
330
- s3_obj = s3_utils.get_object_at_url(url, **s3_client_kwargs)
331
- logger.debug(f"Building manifest entry from {url}: {s3_obj}")
332
- manifest_entry_meta = ManifestMeta.of(
333
- record_count,
334
- s3_obj["ContentLength"],
335
- s3_obj["ContentType"],
336
- s3_obj["ContentEncoding"],
337
- source_content_length,
338
- )
339
- manifest_entry = ManifestEntry.of(url, manifest_entry_meta)
340
- return manifest_entry
341
-
342
- @property
343
- def uri(self) -> Optional[str]:
344
- return self.get("uri")
345
-
346
- @property
347
- def url(self) -> Optional[str]:
348
- return self.get("url")
349
-
350
- @property
351
- def meta(self) -> Optional[ManifestMeta]:
352
- val: Dict[str, Any] = self.get("meta")
353
- if val is not None and not isinstance(val, ManifestMeta):
354
- self["meta"] = val = ManifestMeta(val)
355
- return val
356
-
357
- @property
358
- def mandatory(self) -> bool:
359
- return self["mandatory"]
360
-
361
- @property
362
- def id(self) -> Optional[str]:
363
- return self.get("id")
364
-
365
- @property
366
- def entry_type(self) -> Optional[EntryType]:
367
- val = self.get("entry_type")
368
- if val is not None:
369
- return EntryType(self["entry_type"])
370
- return val
371
-
372
- @property
373
- def entry_file_params(self) -> Optional[EntryFileParams]:
374
- val: Dict[str, Any] = self.get("entry_file_params")
375
- if val is not None and not isinstance(val, EntryFileParams):
376
- self["entry_file_params"] = val = EntryFileParams(val)
377
- return val
378
-
379
-
380
- class ManifestEntryList(List[ManifestEntry]):
381
- @staticmethod
382
- def of(entries: List[ManifestEntry]) -> ManifestEntryList:
383
- manifest_entries = ManifestEntryList()
384
- for entry in entries:
385
- if entry is not None and not isinstance(entry, ManifestEntry):
386
- entry = ManifestEntry(entry)
387
- manifest_entries.append(entry)
388
- return manifest_entries
389
-
390
- def __getitem__(self, item):
391
- val = super().__getitem__(item)
392
- if val is not None and not isinstance(val, ManifestEntry):
393
- self[item] = val = ManifestEntry(val)
394
- return val