deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,53 +1,48 @@
1
- from typing import Any, Dict, List, Optional, Set, Union
1
+ from typing import Any, Dict, List, Optional, Union
2
2
 
3
- import pyarrow as pa
4
- import ray
5
-
6
- from deltacat.catalog.model.catalog import Catalog, all_catalogs
3
+ from deltacat.catalog.model.catalog import get_catalog
7
4
  from deltacat.catalog.model.table_definition import TableDefinition
8
- from deltacat.storage.model.sort_key import SortKey
5
+ from deltacat.storage.model.partition import (
6
+ Partition,
7
+ PartitionLocator,
8
+ PartitionScheme,
9
+ )
10
+ from deltacat.storage.model.sort_key import SortScheme
9
11
  from deltacat.storage.model.list_result import ListResult
10
- from deltacat.storage.model.namespace import Namespace
12
+ from deltacat.storage.model.namespace import Namespace, NamespaceProperties
13
+ from deltacat.storage.model.schema import (
14
+ Schema,
15
+ SchemaUpdateOperations,
16
+ )
17
+ from deltacat.storage.model.table import TableProperties
18
+ from deltacat.storage.model.table_version import TableVersionProperties
11
19
  from deltacat.storage.model.types import (
12
- DistributedDataset,
20
+ Dataset,
13
21
  LifecycleState,
14
- LocalDataset,
15
- LocalTable,
16
- SchemaConsistencyType,
22
+ StreamFormat,
23
+ )
24
+ from deltacat.storage.model.transaction import (
25
+ Transaction,
26
+ get_current_transaction,
17
27
  )
18
28
  from deltacat.types.media import ContentType
19
- from deltacat.types.tables import TableWriteMode
20
-
21
-
22
- def _get_catalog(name: Optional[str] = None) -> Catalog:
23
- if not all_catalogs:
24
- raise ValueError(
25
- "No catalogs available! Call "
26
- "`deltacat.init(catalogs={...})` to register one or more "
27
- "catalogs then retry."
28
- )
29
- catalog = (
30
- ray.get(all_catalogs.get.remote(name))
31
- if name
32
- else ray.get(all_catalogs.default.remote())
33
- )
34
- if not catalog:
35
- available_catalogs = ray.get(all_catalogs.all.remote()).values()
36
- raise ValueError(
37
- f"Catalog '{name}' not found. Available catalogs: " f"{available_catalogs}."
38
- )
39
- return catalog
29
+ from deltacat.types.tables import (
30
+ DatasetType,
31
+ TableWriteMode,
32
+ )
40
33
 
41
34
 
42
35
  # table functions
43
36
  def write_to_table(
44
- data: Union[LocalTable, LocalDataset, DistributedDataset],
37
+ data: Dataset,
45
38
  table: str,
39
+ *args,
46
40
  namespace: Optional[str] = None,
47
- catalog: Optional[str] = None,
41
+ table_version: Optional[str] = None,
48
42
  mode: TableWriteMode = TableWriteMode.AUTO,
49
43
  content_type: ContentType = ContentType.PARQUET,
50
- *args,
44
+ transaction: Optional[Transaction] = None,
45
+ catalog: Optional[str] = None,
51
46
  **kwargs,
52
47
  ) -> None:
53
48
  """Write local or distributed data to a table. Raises an error if the
@@ -56,229 +51,709 @@ def write_to_table(
56
51
  When creating a table, all `create_table` parameters may be optionally
57
52
  specified as additional keyword arguments. When appending to, or replacing,
58
53
  an existing table, all `alter_table` parameters may be optionally specified
59
- as additional keyword arguments."""
60
- _get_catalog(catalog).impl.write_to_table(
61
- data, table, namespace, mode, content_type, *args, **kwargs
54
+ as additional keyword arguments.
55
+
56
+ Args:
57
+ data: Local or distributed data to write to the table.
58
+ table: Name of the table to write to.
59
+ namespace: Optional namespace for the table. Uses default if not specified.
60
+ table_version: Optional version of the table to write to. If specified,
61
+ will create this version if it doesn't exist (in CREATE mode) or
62
+ get this version if it exists (in other modes). If not specified,
63
+ uses the latest version.
64
+ mode: Write mode (AUTO, CREATE, APPEND, REPLACE, MERGE, DELETE).
65
+ content_type: Content type used to write the data files. Defaults to PARQUET.
66
+ transaction: Optional transaction to append write operations to instead of
67
+ creating and committing a new transaction.
68
+ **kwargs: Additional keyword arguments.
69
+ """
70
+ if (transaction or get_current_transaction()) and catalog:
71
+ raise ValueError(
72
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
73
+ )
74
+ catalog_obj = get_catalog(catalog)
75
+ catalog_obj.impl.write_to_table(
76
+ data,
77
+ table,
78
+ *args,
79
+ namespace=namespace,
80
+ table_version=table_version,
81
+ mode=mode,
82
+ content_type=content_type,
83
+ transaction=transaction,
84
+ inner=catalog_obj.inner,
85
+ **kwargs,
62
86
  )
63
87
 
64
88
 
65
89
  def read_table(
66
90
  table: str,
91
+ *args,
67
92
  namespace: Optional[str] = None,
93
+ table_version: Optional[str] = None,
94
+ read_as: DatasetType = DatasetType.DAFT,
95
+ partition_filter: Optional[List[Union[Partition, PartitionLocator]]] = None,
96
+ max_parallelism: Optional[int] = None,
97
+ columns: Optional[List[str]] = None,
98
+ file_path_column: Optional[str] = None,
99
+ transaction: Optional[Transaction] = None,
68
100
  catalog: Optional[str] = None,
69
- *args,
70
101
  **kwargs,
71
- ) -> DistributedDataset:
72
- """Read a table into a distributed dataset."""
73
- return _get_catalog(catalog).impl.read_table(table, namespace, *args, **kwargs)
102
+ ) -> Dataset:
103
+ """Read a table into a dataset.
104
+
105
+ Args:
106
+ table: Name of the table to read.
107
+ namespace: Optional namespace of the table. Uses default if not specified.
108
+ table_version: Optional specific version of the table to read.
109
+ read_as: Dataset type to use for reading table files. Defaults to DatasetType.DAFT.
110
+ partition_filter: Optional list of partitions to read from.
111
+ max_parallelism: Optional maximum parallelism for data download. Defaults to the number of
112
+ available CPU cores for local dataset type reads (i.e., members of DatasetType.local())
113
+ and 100 for distributed dataset type reads (i.e., members of DatasetType.distributed()).
114
+ columns: Optional list of columns to include in the result.
115
+ file_path_column: Optional column name to add file paths to the result.
116
+ transaction: Optional transaction to chain this read operation to. If provided, uncommitted
117
+ changes from the transaction will be visible to this read operation.
118
+ **kwargs: Additional keyword arguments.
119
+
120
+ Returns:
121
+ Dataset containing the table data.
122
+ """
123
+ if (transaction or get_current_transaction()) and catalog:
124
+ raise ValueError(
125
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
126
+ )
127
+ catalog_obj = get_catalog(catalog)
128
+ return catalog_obj.impl.read_table(
129
+ table,
130
+ *args,
131
+ namespace=namespace,
132
+ table_version=table_version,
133
+ read_as=read_as,
134
+ partition_filter=partition_filter,
135
+ max_parallelism=max_parallelism,
136
+ columns=columns,
137
+ file_path_column=file_path_column,
138
+ transaction=transaction,
139
+ inner=catalog_obj.inner,
140
+ **kwargs,
141
+ )
74
142
 
75
143
 
76
144
  def alter_table(
77
145
  table: str,
146
+ *args,
78
147
  namespace: Optional[str] = None,
79
- catalog: Optional[str] = None,
148
+ table_version: Optional[str] = None,
80
149
  lifecycle_state: Optional[LifecycleState] = None,
81
- schema_updates: Optional[Dict[str, Any]] = None,
150
+ schema_updates: Optional[SchemaUpdateOperations] = None,
82
151
  partition_updates: Optional[Dict[str, Any]] = None,
83
- primary_keys: Optional[Set[str]] = None,
84
- sort_keys: Optional[List[SortKey]] = None,
85
- description: Optional[str] = None,
86
- properties: Optional[Dict[str, str]] = None,
87
- *args,
152
+ sort_scheme: Optional[SortScheme] = None,
153
+ table_description: Optional[str] = None,
154
+ table_version_description: Optional[str] = None,
155
+ table_properties: Optional[TableProperties] = None,
156
+ table_version_properties: Optional[TableVersionProperties] = None,
157
+ transaction: Optional[Transaction] = None,
158
+ catalog: Optional[str] = None,
88
159
  **kwargs,
89
160
  ) -> None:
90
- """Alter table definition."""
91
- _get_catalog(catalog).impl.alter_table(
161
+ """Alter deltacat table/table_version definition.
162
+
163
+ Modifies various aspects of a table's metadata including lifecycle state,
164
+ schema, partitioning, sort keys, description, and properties.
165
+
166
+ Args:
167
+ table: Name of the table to alter.
168
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
169
+ table_version: Optional specific version of the table to alter. Defaults to the latest active version.
170
+ lifecycle_state: New lifecycle state for the table.
171
+ schema_updates: Schema updates to apply.
172
+ partition_updates: Partition scheme updates to apply.
173
+ sort_scheme: New sort scheme.
174
+ table_description: New description for the table.
175
+ table_version_description: New description for the table version. Defaults to `table_description` if not specified.
176
+ table_properties: New table properties.
177
+ table_version_properties: New table version properties. Defaults to the current parent table properties if not specified.
178
+ transaction: Optional transaction to use. If None, creates a new transaction.
179
+
180
+ Returns:
181
+ None
182
+
183
+ Raises:
184
+ TableNotFoundError: If the table does not already exist.
185
+ TableVersionNotFoundError: If the specified table version or active table version does not exist.
186
+ """
187
+ if (transaction or get_current_transaction()) and catalog:
188
+ raise ValueError(
189
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
190
+ )
191
+ catalog_obj = get_catalog(catalog)
192
+ catalog_obj.impl.alter_table(
92
193
  table,
93
- namespace,
94
- lifecycle_state,
95
- schema_updates,
96
- partition_updates,
97
- primary_keys,
98
- sort_keys,
99
- description,
100
- properties,
101
194
  *args,
195
+ namespace=namespace,
196
+ table_version=table_version,
197
+ lifecycle_state=lifecycle_state,
198
+ schema_updates=schema_updates,
199
+ partition_updates=partition_updates,
200
+ sort_scheme=sort_scheme,
201
+ table_description=table_description,
202
+ table_version_description=table_version_description,
203
+ table_properties=table_properties,
204
+ table_version_properties=table_version_properties,
205
+ transaction=transaction,
206
+ inner=catalog_obj.inner,
102
207
  **kwargs,
103
208
  )
104
209
 
105
210
 
106
211
  def create_table(
107
212
  table: str,
213
+ *args,
108
214
  namespace: Optional[str] = None,
109
- catalog: Optional[str] = None,
110
- lifecycle_state: Optional[LifecycleState] = None,
111
- schema: Optional[Union[pa.Schema, str, bytes]] = None,
112
- schema_consistency: Optional[Dict[str, SchemaConsistencyType]] = None,
113
- partition_keys: Optional[List[Dict[str, Any]]] = None,
114
- primary_keys: Optional[Set[str]] = None,
115
- sort_keys: Optional[List[SortKey]] = None,
116
- description: Optional[str] = None,
117
- properties: Optional[Dict[str, str]] = None,
118
- permissions: Optional[Dict[str, Any]] = None,
215
+ table_version: Optional[str] = None,
216
+ lifecycle_state: Optional[LifecycleState] = LifecycleState.ACTIVE,
217
+ schema: Optional[Schema] = None,
218
+ partition_scheme: Optional[PartitionScheme] = None,
219
+ sort_keys: Optional[SortScheme] = None,
220
+ table_description: Optional[str] = None,
221
+ table_version_description: Optional[str] = None,
222
+ table_properties: Optional[TableProperties] = None,
223
+ table_version_properties: Optional[TableVersionProperties] = None,
224
+ namespace_properties: Optional[NamespaceProperties] = None,
119
225
  content_types: Optional[List[ContentType]] = None,
120
- replace_existing_table: bool = False,
121
- *args,
226
+ fail_if_exists: bool = True,
227
+ transaction: Optional[Transaction] = None,
228
+ catalog: Optional[str] = None,
122
229
  **kwargs,
123
230
  ) -> TableDefinition:
124
- """Create an empty table. Raises an error if the table already exists and
125
- `replace_existing_table` is False."""
126
- return _get_catalog(catalog).impl.create_table(
231
+ """Create an empty table in the catalog.
232
+
233
+ If a namespace isn't provided, the table will be created within the default deltacat namespace.
234
+ Additionally if the provided namespace does not exist, it will be created for you.
235
+
236
+ Args:
237
+ table: Name of the table to create.
238
+ namespace: Optional namespace for the table. Uses default namespace if not specified.
239
+ version: Optional version identifier for the table.
240
+ lifecycle_state: Lifecycle state of the new table. Defaults to ACTIVE.
241
+ schema: Schema definition for the table.
242
+ partition_scheme: Optional partitioning scheme for the table.
243
+ sort_keys: Optional sort keys for the table.
244
+ table_description: Optional description of the table.
245
+ table_version_description: Optional description for the table version.
246
+ table_properties: Optional properties for the table.
247
+ table_version_properties: Optional properties for the table version. Defaults to the current parent table properties if not specified.
248
+ namespace_properties: Optional properties for the namespace if it needs to be created.
249
+ content_types: Optional list of allowed content types for the table.
250
+ fail_if_exists: If True, raises an error if table already exists. If False, returns existing table.
251
+ transaction: Optional transaction to use. If None, creates a new transaction.
252
+
253
+ Returns:
254
+ TableDefinition object for the created or existing table.
255
+
256
+ Raises:
257
+ TableAlreadyExistsError: If the table already exists and fail_if_exists is True.
258
+ NamespaceNotFoundError: If the provided namespace does not exist.
259
+ """
260
+ if (transaction or get_current_transaction()) and catalog:
261
+ raise ValueError(
262
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
263
+ )
264
+ catalog_obj = get_catalog(catalog)
265
+ return catalog_obj.impl.create_table(
127
266
  table,
128
- namespace,
129
- lifecycle_state,
130
- schema,
131
- schema_consistency,
132
- partition_keys,
133
- primary_keys,
134
- sort_keys,
135
- description,
136
- properties,
137
- permissions,
138
- content_types,
139
- replace_existing_table,
140
267
  *args,
268
+ namespace=namespace,
269
+ table_version=table_version,
270
+ lifecycle_state=lifecycle_state,
271
+ schema=schema,
272
+ partition_scheme=partition_scheme,
273
+ sort_keys=sort_keys,
274
+ table_description=table_description,
275
+ table_version_description=table_version_description,
276
+ table_version_properties=table_version_properties,
277
+ table_properties=table_properties,
278
+ namespace_properties=namespace_properties,
279
+ content_types=content_types,
280
+ fail_if_exists=fail_if_exists,
281
+ transaction=transaction,
282
+ inner=catalog_obj.inner,
141
283
  **kwargs,
142
284
  )
143
285
 
144
286
 
145
287
  def drop_table(
146
288
  table: str,
289
+ *args,
147
290
  namespace: Optional[str] = None,
148
- catalog: Optional[str] = None,
291
+ table_version: Optional[str] = None,
149
292
  purge: bool = False,
150
- *args,
293
+ transaction: Optional[Transaction] = None,
294
+ catalog: Optional[str] = None,
151
295
  **kwargs,
152
296
  ) -> None:
153
- """Drop a table from the catalog and optionally purge it. Raises an error
154
- if the table does not exist."""
155
- _get_catalog(catalog).impl.drop_table(table, namespace, purge, *args, **kwargs)
297
+ """Drop a table from the catalog and optionally purges underlying data.
298
+
299
+ Args:
300
+ name: Name of the table to drop.
301
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
302
+ table_version: Optional specific version of the table to drop. Defaults to the latest active version.
303
+ purge: If True, permanently delete the table data. If False, only remove from catalog.
304
+ transaction: Optional transaction to use. If None, creates a new transaction.
305
+
306
+ Returns:
307
+ None
308
+
309
+ Raises:
310
+ TableNotFoundError: If the table does not exist.
311
+ TableVersionNotFoundError: If the table version does not exist.
312
+ """
313
+ if (transaction or get_current_transaction()) and catalog:
314
+ raise ValueError(
315
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
316
+ )
317
+ catalog_obj = get_catalog(catalog)
318
+ catalog_obj.impl.drop_table(
319
+ table,
320
+ *args,
321
+ namespace=namespace,
322
+ table_version=table_version,
323
+ purge=purge,
324
+ transaction=transaction,
325
+ inner=catalog_obj.inner,
326
+ **kwargs,
327
+ )
156
328
 
157
329
 
158
330
  def refresh_table(
159
331
  table: str,
332
+ *args,
160
333
  namespace: Optional[str] = None,
334
+ table_version: Optional[str] = None,
335
+ transaction: Optional[Transaction] = None,
161
336
  catalog: Optional[str] = None,
162
- *args,
163
337
  **kwargs,
164
338
  ) -> None:
165
- """Refresh metadata cached on the Ray cluster for the given table."""
166
- _get_catalog(catalog).impl.refresh_table(table, namespace, *args, **kwargs)
339
+ """Refresh metadata cached on the Ray cluster for the given table.
340
+
341
+ Args:
342
+ table: Name of the table to refresh.
343
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
344
+ table_version: Optional specific version of the table to refresh. Defaults to the latest active version.
345
+ transaction: Optional transaction to use. If None, creates a new transaction.
346
+
347
+ Returns:
348
+ None
349
+ """
350
+ if (transaction or get_current_transaction()) and catalog:
351
+ raise ValueError(
352
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
353
+ )
354
+ catalog_obj = get_catalog(catalog)
355
+ catalog_obj.impl.refresh_table(
356
+ table,
357
+ *args,
358
+ namespace=namespace,
359
+ table_version=table_version,
360
+ transaction=transaction,
361
+ inner=catalog_obj.inner,
362
+ **kwargs,
363
+ )
167
364
 
168
365
 
169
366
  def list_tables(
170
- namespace: Optional[str] = None, catalog: Optional[str] = None, *args, **kwargs
367
+ *args,
368
+ namespace: Optional[str] = None,
369
+ table: Optional[str] = None,
370
+ transaction: Optional[Transaction] = None,
371
+ catalog: Optional[str] = None,
372
+ **kwargs,
171
373
  ) -> ListResult[TableDefinition]:
172
- """List a page of table definitions. Raises an error if the given namespace
173
- does not exist."""
174
- return _get_catalog(catalog).impl.list_tables(namespace, *args, **kwargs)
374
+ """List a page of table definitions.
375
+
376
+ Args:
377
+ namespace: Optional namespace to list tables from. Uses default namespace if not specified.
378
+ table: Optional table to list its table versions. If not specified, lists the latest active version of each table in the namespace.
379
+ transaction: Optional transaction to use. If None, creates a new transaction.
380
+
381
+ Returns:
382
+ ListResult containing TableDefinition objects for tables in the namespace.
383
+ """
384
+ if (transaction or get_current_transaction()) and catalog:
385
+ raise ValueError(
386
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
387
+ )
388
+ catalog_obj = get_catalog(catalog)
389
+ return catalog_obj.impl.list_tables(
390
+ *args,
391
+ namespace=namespace,
392
+ table=table,
393
+ transaction=transaction,
394
+ inner=catalog_obj.inner,
395
+ **kwargs,
396
+ )
175
397
 
176
398
 
177
399
  def get_table(
178
400
  table: str,
401
+ *args,
179
402
  namespace: Optional[str] = None,
403
+ table_version: Optional[str] = None,
404
+ stream_format: StreamFormat = StreamFormat.DELTACAT,
405
+ transaction: Optional[Transaction] = None,
180
406
  catalog: Optional[str] = None,
181
- *args,
182
407
  **kwargs,
183
408
  ) -> Optional[TableDefinition]:
184
- """Get table definition metadata. Returns None if the given table does not
185
- exist."""
186
- return _get_catalog(catalog).impl.get_table(table, namespace, *args, **kwargs)
409
+ """Get table definition metadata.
410
+
411
+ Args:
412
+ name: Name of the table to retrieve.
413
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
414
+ table_version: Optional specific version of the table to retrieve. Defaults to the latest active version.
415
+ stream_format: Optional stream format to retrieve. Defaults to DELTACAT.
416
+ transaction: Optional transaction to use. If None, creates a new transaction.
417
+
418
+ Returns:
419
+ Deltacat TableDefinition if the table exists, None otherwise. The table definition's table version will be
420
+ None if the requested version is not found. The table definition's stream will be None if the requested stream
421
+ format is not found.
422
+ """
423
+ if (transaction or get_current_transaction()) and catalog:
424
+ raise ValueError(
425
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
426
+ )
427
+ catalog_obj = get_catalog(catalog)
428
+ return catalog_obj.impl.get_table(
429
+ table,
430
+ *args,
431
+ namespace=namespace,
432
+ table_version=table_version,
433
+ stream_format=stream_format,
434
+ transaction=transaction,
435
+ inner=catalog_obj.inner,
436
+ **kwargs,
437
+ )
187
438
 
188
439
 
189
440
  def truncate_table(
190
441
  table: str,
442
+ *args,
191
443
  namespace: Optional[str] = None,
444
+ table_version: Optional[str] = None,
445
+ transaction: Optional[Transaction] = None,
192
446
  catalog: Optional[str] = None,
193
- *args,
194
447
  **kwargs,
195
448
  ) -> None:
196
- """Truncate table data. Raises an error if the table does not exist."""
197
- _get_catalog(catalog).impl.truncate_table(table, namespace, *args, **kwargs)
449
+ """Truncate table data.
450
+
451
+ Args:
452
+ table: Name of the table to truncate.
453
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
454
+ table_version: Optional specific version of the table to truncate. Defaults to the latest active version.
455
+ transaction: Optional transaction to use. If None, creates a new transaction.
456
+
457
+ Returns:
458
+ None
459
+ """
460
+ if (transaction or get_current_transaction()) and catalog:
461
+ raise ValueError(
462
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
463
+ )
464
+ catalog_obj = get_catalog(catalog)
465
+ catalog_obj.impl.truncate_table(
466
+ table,
467
+ *args,
468
+ namespace=namespace,
469
+ table_version=table_version,
470
+ transaction=transaction,
471
+ inner=catalog_obj.inner,
472
+ **kwargs,
473
+ )
198
474
 
199
475
 
200
476
  def rename_table(
201
477
  table: str,
202
478
  new_name: str,
479
+ *args,
203
480
  namespace: Optional[str] = None,
481
+ transaction: Optional[Transaction] = None,
204
482
  catalog: Optional[str] = None,
205
- *args,
206
483
  **kwargs,
207
484
  ) -> None:
208
- """Rename a table."""
209
- _get_catalog(catalog).impl.rename_table(table, new_name, namespace, *args, **kwargs)
485
+ """Rename an existing table.
486
+
487
+ Args:
488
+ table: Current name of the table.
489
+ new_name: New name for the table.
490
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
491
+ transaction: Optional transaction to use. If None, creates a new transaction.
492
+
493
+ Returns:
494
+ None
495
+
496
+ Raises:
497
+ TableNotFoundError: If the table does not exist.
498
+ """
499
+ if (transaction or get_current_transaction()) and catalog:
500
+ raise ValueError(
501
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
502
+ )
503
+ catalog_obj = get_catalog(catalog)
504
+ catalog_obj.impl.rename_table(
505
+ table,
506
+ new_name,
507
+ *args,
508
+ namespace=namespace,
509
+ transaction=transaction,
510
+ inner=catalog_obj.inner,
511
+ **kwargs,
512
+ )
210
513
 
211
514
 
212
515
  def table_exists(
213
516
  table: str,
517
+ *args,
214
518
  namespace: Optional[str] = None,
519
+ table_version: Optional[str] = None,
520
+ stream_format: StreamFormat = StreamFormat.DELTACAT,
521
+ transaction: Optional[Transaction] = None,
215
522
  catalog: Optional[str] = None,
216
- *args,
217
523
  **kwargs,
218
524
  ) -> bool:
219
- """Returns True if the given table exists, False if not."""
220
- return _get_catalog(catalog).impl.table_exists(table, namespace, *args, **kwargs)
525
+ """Check if a table exists in the catalog.
526
+
527
+ Args:
528
+ table: Name of the table to check.
529
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
530
+ table_version: Optional specific version of the table to check. Defaults to the latest active version.
531
+ stream_format: Optional stream format to check. Defaults to DELTACAT.
532
+ transaction: Optional transaction to use. If None, creates a new transaction.
533
+
534
+ Returns:
535
+ True if the table exists, False otherwise.
536
+ """
537
+ if (transaction or get_current_transaction()) and catalog:
538
+ raise ValueError(
539
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
540
+ )
541
+ catalog_obj = get_catalog(catalog)
542
+ return catalog_obj.impl.table_exists(
543
+ table,
544
+ *args,
545
+ namespace=namespace,
546
+ table_version=table_version,
547
+ stream_format=stream_format,
548
+ transaction=transaction,
549
+ inner=catalog_obj.inner,
550
+ **kwargs,
551
+ )
221
552
 
222
553
 
223
554
  # namespace functions
224
555
  def list_namespaces(
225
- catalog: Optional[str] = None, *args, **kwargs
556
+ *args,
557
+ transaction: Optional[Transaction] = None,
558
+ catalog: Optional[str] = None,
559
+ **kwargs,
226
560
  ) -> ListResult[Namespace]:
227
- """List a page of table namespaces."""
228
- return _get_catalog(catalog).impl.list_namespaces(*args, **kwargs)
561
+ """List a page of table namespaces.
562
+
563
+ Args:
564
+ transaction: Optional transaction to use. If None, creates a new transaction.
565
+
566
+ Returns:
567
+ ListResult containing Namespace objects.
568
+ """
569
+ if (transaction or get_current_transaction()) and catalog:
570
+ raise ValueError(
571
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
572
+ )
573
+ catalog_obj = get_catalog(catalog)
574
+ return catalog_obj.impl.list_namespaces(
575
+ *args,
576
+ transaction=transaction,
577
+ inner=catalog_obj.inner,
578
+ **kwargs,
579
+ )
229
580
 
230
581
 
231
582
  def get_namespace(
232
- namespace: str, catalog: Optional[str] = None, *args, **kwargs
583
+ namespace: str,
584
+ *args,
585
+ transaction: Optional[Transaction] = None,
586
+ catalog: Optional[str] = None,
587
+ **kwargs,
233
588
  ) -> Optional[Namespace]:
234
- """Get table namespace metadata for the specified table namespace. Returns
235
- None if the given namespace does not exist."""
236
- return _get_catalog(catalog).impl.get_namespace(namespace, *args, **kwargs)
589
+ """Get metadata for a specific table namespace.
590
+
591
+ Args:
592
+ namespace: Name of the namespace to retrieve.
593
+ transaction: Optional transaction to use. If None, creates a new transaction.
594
+
595
+ Returns:
596
+ Namespace object if the namespace exists, None otherwise.
597
+ """
598
+ if (transaction or get_current_transaction()) and catalog:
599
+ raise ValueError(
600
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
601
+ )
602
+ catalog_obj = get_catalog(catalog)
603
+ return catalog_obj.impl.get_namespace(
604
+ namespace,
605
+ *args,
606
+ transaction=transaction,
607
+ inner=catalog_obj.inner,
608
+ **kwargs,
609
+ )
237
610
 
238
611
 
239
612
  def namespace_exists(
240
- namespace: str, catalog: Optional[str] = None, *args, **kwargs
613
+ namespace: str,
614
+ *args,
615
+ transaction: Optional[Transaction] = None,
616
+ catalog: Optional[str] = None,
617
+ **kwargs,
241
618
  ) -> bool:
242
- """Returns True if the given table namespace exists, False if not."""
243
- return _get_catalog(catalog).impl.namespace_exists(namespace, *args, **kwargs)
619
+ """Check if a namespace exists.
620
+
621
+ Args:
622
+ namespace: Name of the namespace to check.
623
+ transaction: Optional transaction to use. If None, creates a new transaction.
624
+
625
+ Returns:
626
+ True if the namespace exists, False otherwise.
627
+ """
628
+ if (transaction or get_current_transaction()) and catalog:
629
+ raise ValueError(
630
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
631
+ )
632
+ catalog_obj = get_catalog(catalog)
633
+ return catalog_obj.impl.namespace_exists(
634
+ namespace,
635
+ *args,
636
+ transaction=transaction,
637
+ inner=catalog_obj.inner,
638
+ **kwargs,
639
+ )
244
640
 
245
641
 
246
642
  def create_namespace(
247
643
  namespace: str,
248
- permissions: Dict[str, Any],
249
- catalog: Optional[str] = None,
250
644
  *args,
645
+ properties: Optional[NamespaceProperties] = None,
646
+ transaction: Optional[Transaction] = None,
647
+ catalog: Optional[str] = None,
251
648
  **kwargs,
252
649
  ) -> Namespace:
253
- """Creates a table namespace with the given name and permissions. Returns
254
- the created namespace. Raises an error if the namespace already exists."""
255
- return _get_catalog(catalog).impl.create_namespace(
256
- namespace, permissions, *args, **kwargs
650
+ """Create a new namespace.
651
+
652
+ Args:
653
+ namespace: Name of the namespace to create.
654
+ properties: Optional properties for the namespace.
655
+ transaction: Optional transaction to use. If None, creates a new transaction.
656
+
657
+ Returns:
658
+ Created Namespace object.
659
+
660
+ Raises:
661
+ NamespaceAlreadyExistsError: If the namespace already exists.
662
+ """
663
+ if (transaction or get_current_transaction()) and catalog:
664
+ raise ValueError(
665
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
666
+ )
667
+ catalog_obj = get_catalog(catalog)
668
+ return catalog_obj.impl.create_namespace(
669
+ namespace,
670
+ *args,
671
+ properties=properties,
672
+ transaction=transaction,
673
+ inner=catalog_obj.inner,
674
+ **kwargs,
257
675
  )
258
676
 
259
677
 
260
678
  def alter_namespace(
261
679
  namespace: str,
262
- catalog: Optional[str] = None,
263
- permissions: Optional[Dict[str, Any]] = None,
264
- new_namespace: Optional[str] = None,
265
680
  *args,
681
+ properties: Optional[NamespaceProperties] = None,
682
+ new_namespace: Optional[str] = None,
683
+ transaction: Optional[Transaction] = None,
684
+ catalog: Optional[str] = None,
266
685
  **kwargs,
267
686
  ) -> None:
268
- """Alter table namespace definition."""
269
- _get_catalog(catalog).impl.alter_namespace(
270
- namespace, permissions, new_namespace, *args, **kwargs
687
+ """Alter a namespace definition.
688
+
689
+ Args:
690
+ namespace: Name of the namespace to alter.
691
+ properties: Optional new properties for the namespace.
692
+ new_namespace: Optional new name for the namespace.
693
+ transaction: Optional transaction to use. If None, creates a new transaction.
694
+
695
+ Returns:
696
+ None
697
+ """
698
+ if (transaction or get_current_transaction()) and catalog:
699
+ raise ValueError(
700
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
701
+ )
702
+ catalog_obj = get_catalog(catalog)
703
+ catalog_obj.impl.alter_namespace(
704
+ namespace,
705
+ *args,
706
+ properties=properties,
707
+ new_namespace=new_namespace,
708
+ transaction=transaction,
709
+ inner=catalog_obj.inner,
710
+ **kwargs,
271
711
  )
272
712
 
273
713
 
274
714
  def drop_namespace(
275
- namespace: str, catalog: Optional[str] = None, purge: bool = False, *args, **kwargs
715
+ namespace: str,
716
+ *args,
717
+ purge: bool = False,
718
+ transaction: Optional[Transaction] = None,
719
+ catalog: Optional[str] = None,
720
+ **kwargs,
276
721
  ) -> None:
277
- """Drop the given namespace and all of its tables from the catalog,
278
- optionally purging them."""
279
- _get_catalog(catalog).impl.drop_namespace(namespace, purge, *args, **kwargs)
722
+ """Drop a namespace and all of its tables from the catalog.
723
+
724
+ Args:
725
+ namespace: Name of the namespace to drop.
726
+ purge: If True, permanently delete all table data in the namespace.
727
+ If False, only removes the namespace from the catalog.
728
+ transaction: Optional transaction to use. If None, creates a new transaction.
729
+
730
+ Returns:
731
+ None
732
+ """
733
+ if (transaction or get_current_transaction()) and catalog:
734
+ raise ValueError(
735
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
736
+ )
737
+ catalog_obj = get_catalog(catalog)
738
+ catalog_obj.impl.drop_namespace(
739
+ namespace,
740
+ *args,
741
+ purge=purge,
742
+ transaction=transaction,
743
+ inner=catalog_obj.inner,
744
+ **kwargs,
745
+ )
280
746
 
281
747
 
282
- def default_namespace(catalog: Optional[str] = None) -> str:
283
- """Returns the default namespace for the catalog."""
284
- return _get_catalog(catalog).impl.default_namespace()
748
+ def default_namespace(
749
+ *args,
750
+ catalog: Optional[str] = None,
751
+ **kwargs,
752
+ ) -> str:
753
+ """Return the default namespace for the catalog.
754
+
755
+ Returns:
756
+ Name of the default namespace.
757
+ """
758
+ catalog_obj = get_catalog(catalog)
759
+ return catalog_obj.impl.default_namespace(*args, inner=catalog_obj.inner, **kwargs)