deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,61 @@
1
+ import logging
2
+ import json
3
+ import pyarrow as pa
4
+ import pyarrow.parquet
5
+ import pyarrow.feather
6
+ from typing import Callable, Dict
7
+
8
+ from deltacat.experimental.storage.rivulet.reader.query_expression import (
9
+ QueryExpression,
10
+ )
11
+ from deltacat import logs
12
+
13
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
14
+
15
+
16
+ def export_parquet(dataset, file_uri: str, query: QueryExpression = QueryExpression()):
17
+ records = dataset.scan(query).to_arrow()
18
+ table = pa.Table.from_batches(records)
19
+ pyarrow.parquet.write_table(table, file_uri)
20
+
21
+
22
+ def export_feather(dataset, file_uri: str, query: QueryExpression = QueryExpression()):
23
+ records = dataset.scan(query).to_arrow()
24
+ table = pa.Table.from_batches(records)
25
+ pyarrow.feather.write_feather(table, file_uri)
26
+
27
+
28
+ def export_json(dataset, file_uri: str, query: QueryExpression = QueryExpression()):
29
+ with open(file_uri, "w") as f:
30
+ for batch in dataset.scan(query).to_pydict():
31
+ json.dump(batch, f, indent=2)
32
+ f.write("\n")
33
+
34
+
35
+ def export_dataset(dataset, file_uri: str, format: str = "parquet", query=None):
36
+ """
37
+ Export the dataset to a file.
38
+
39
+ TODO: Make this pluggable for custom formats.
40
+
41
+ Args:
42
+ dataset: The dataset to export.
43
+ file_uri: The URI to write the dataset to.
44
+ format: The format to write the dataset in. Options are [parquet, feather, json].
45
+ query: QueryExpression to filter the dataset before exporting.
46
+ """
47
+ # Supported format handlers
48
+ export_handlers: Dict[str, Callable] = {
49
+ "parquet": export_parquet,
50
+ "feather": export_feather,
51
+ "json": export_json,
52
+ }
53
+
54
+ if format not in export_handlers:
55
+ raise ValueError(
56
+ f"Unsupported format: {format}. Supported formats are {list(export_handlers.keys())}"
57
+ )
58
+
59
+ export_handlers[format](dataset, file_uri, query or QueryExpression())
60
+
61
+ logger.info(f"Dataset exported to {file_uri} in {format} format.")
@@ -0,0 +1,450 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from typing import Optional, Tuple, Union, List
5
+ from datetime import timedelta
6
+ from enum import Enum
7
+
8
+ import sys
9
+ import urllib
10
+ import pathlib
11
+
12
+ import pyarrow as pa
13
+ from pyarrow.fs import (
14
+ _resolve_filesystem_and_path,
15
+ FileSelector,
16
+ FileInfo,
17
+ FileType,
18
+ FileSystem,
19
+ FSSpecHandler,
20
+ PyFileSystem,
21
+ GcsFileSystem,
22
+ LocalFileSystem,
23
+ S3FileSystem,
24
+ AzureFileSystem,
25
+ HadoopFileSystem,
26
+ )
27
+
28
+ _LOCAL_SCHEME = "local"
29
+
30
+
31
+ class FilesystemType(str, Enum):
32
+ LOCAL = "local"
33
+ S3 = "s3"
34
+ GCS = "gcs"
35
+ AZURE = "azure"
36
+ HADOOP = "hadoop"
37
+ UNKNOWN = "unknown"
38
+
39
+ @classmethod
40
+ def from_filesystem(cls, filesystem: FileSystem) -> FilesystemType:
41
+ if isinstance(filesystem, LocalFileSystem):
42
+ return cls.LOCAL
43
+ elif isinstance(filesystem, S3FileSystem):
44
+ return cls.S3
45
+ elif isinstance(filesystem, GcsFileSystem):
46
+ return cls.GCS
47
+ elif isinstance(filesystem, AzureFileSystem):
48
+ return cls.AZURE
49
+ elif isinstance(filesystem, HadoopFileSystem):
50
+ return cls.HADOOP
51
+ else:
52
+ return cls.UNKNOWN
53
+
54
+ @classmethod
55
+ def to_filesystem(cls, filesystem_type: FilesystemType) -> FileSystem:
56
+ if filesystem_type == cls.LOCAL:
57
+ return LocalFileSystem()
58
+ elif filesystem_type == cls.S3:
59
+ return S3FileSystem()
60
+ elif filesystem_type == cls.GCS:
61
+ return GcsFileSystem()
62
+ elif filesystem_type == cls.AZURE:
63
+ return AzureFileSystem()
64
+ elif filesystem_type == cls.HADOOP:
65
+ return HadoopFileSystem()
66
+ else:
67
+ raise ValueError(f"Unsupported filesystem type: {filesystem_type}")
68
+
69
+
70
+ def resolve_paths_and_filesystem(
71
+ paths: Union[str, List[str]],
72
+ filesystem: FileSystem = None,
73
+ ) -> Tuple[List[str], FileSystem]:
74
+ """
75
+ Resolves and normalizes all provided paths, infers a filesystem from the
76
+ paths or validates the provided filesystem against the paths and ensures
77
+ that all paths use the same filesystem.
78
+
79
+ Args:
80
+ paths: A single file/directory path or a list of file/directory paths.
81
+ A list of paths can contain both files and directories.
82
+ filesystem: The filesystem implementation that should be used for
83
+ reading these files. If None, a filesystem will be inferred. If not
84
+ None, the provided filesystem will still be validated against all
85
+ filesystems inferred from the provided paths to ensure
86
+ compatibility.
87
+ """
88
+ if isinstance(paths, str):
89
+ paths = [paths]
90
+ if isinstance(paths, pathlib.Path):
91
+ paths = [str(paths)]
92
+ elif not isinstance(paths, list) or any(not isinstance(p, str) for p in paths):
93
+ raise ValueError(
94
+ "Expected `paths` to be a `str`, `pathlib.Path`, or `list[str]`, but got "
95
+ f"`{paths}`."
96
+ )
97
+ elif len(paths) == 0:
98
+ raise ValueError("Must provide at least one path.")
99
+
100
+ need_unwrap_path_protocol = True
101
+ if filesystem and not isinstance(filesystem, FileSystem):
102
+ err_msg = (
103
+ f"The filesystem passed must either conform to "
104
+ f"pyarrow.fs.FileSystem, or "
105
+ f"fsspec.spec.AbstractFileSystem. The provided "
106
+ f"filesystem was: {filesystem}"
107
+ )
108
+ try:
109
+ import fsspec
110
+ from fsspec.implementations.http import HTTPFileSystem
111
+ except ModuleNotFoundError:
112
+ # If filesystem is not a pyarrow filesystem and fsspec isn't
113
+ # installed, then filesystem is neither a pyarrow filesystem nor
114
+ # an fsspec filesystem, so we raise a TypeError.
115
+ raise TypeError(err_msg) from None
116
+ if not isinstance(filesystem, fsspec.spec.AbstractFileSystem):
117
+ raise TypeError(err_msg) from None
118
+ if isinstance(filesystem, HTTPFileSystem):
119
+ # If filesystem is fsspec HTTPFileSystem, the protocol/scheme of paths
120
+ # should not be unwrapped/removed, because HTTPFileSystem expects full file
121
+ # paths including protocol/scheme. This is different behavior compared to
122
+ # file systems implementation in pyarrow.fs.FileSystem.
123
+ need_unwrap_path_protocol = False
124
+
125
+ filesystem = PyFileSystem(FSSpecHandler(filesystem))
126
+
127
+ resolved_paths = []
128
+ for path in paths:
129
+ path = _resolve_custom_scheme(path)
130
+ try:
131
+ resolved_filesystem, resolved_path = _resolve_filesystem_and_path(
132
+ path, filesystem
133
+ )
134
+ except pa.lib.ArrowInvalid as e:
135
+ if "Cannot parse URI" in str(e):
136
+ resolved_filesystem, resolved_path = _resolve_filesystem_and_path(
137
+ _encode_url(path), filesystem
138
+ )
139
+ resolved_path = _decode_url(resolved_path)
140
+ elif "Unrecognized filesystem type in URI" in str(e):
141
+ scheme = urllib.parse.urlparse(path, allow_fragments=False).scheme
142
+ if scheme in ["http", "https"]:
143
+ # If scheme of path is HTTP and filesystem is not resolved,
144
+ # try to use fsspec HTTPFileSystem. This expects fsspec is
145
+ # installed.
146
+ try:
147
+ from fsspec.implementations.http import HTTPFileSystem
148
+ except ModuleNotFoundError:
149
+ raise ImportError(
150
+ "Please install fsspec to read files from HTTP."
151
+ ) from None
152
+
153
+ resolved_filesystem = PyFileSystem(FSSpecHandler(HTTPFileSystem()))
154
+ resolved_path = path
155
+ need_unwrap_path_protocol = False
156
+ else:
157
+ raise
158
+ else:
159
+ raise
160
+ if filesystem is None:
161
+ if isinstance(resolved_filesystem, GcsFileSystem):
162
+ # Configure a retry time limit for GcsFileSystem so that it
163
+ # doesn't hang forever trying to get file info (e.g., when
164
+ # trying to get a public file w/o anonymous=True).
165
+ filesystem = GcsFileSystem(
166
+ retry_time_limit=timedelta(seconds=60),
167
+ )
168
+ else:
169
+ filesystem = resolved_filesystem
170
+ elif need_unwrap_path_protocol:
171
+ resolved_path = _unwrap_protocol(resolved_path)
172
+ resolved_path = filesystem.normalize_path(resolved_path)
173
+ resolved_paths.append(resolved_path)
174
+ return resolved_paths, filesystem
175
+
176
+
177
+ def resolve_path_and_filesystem(
178
+ path: str,
179
+ filesystem: Optional[FileSystem] = None,
180
+ ) -> Tuple[str, FileSystem]:
181
+ """
182
+ Resolves and normalizes the provided path, infers a filesystem from the
183
+ path or validates the provided filesystem against the path.
184
+
185
+ Args:
186
+ path: A single file/directory path.
187
+ filesystem: The filesystem implementation that should be used for
188
+ reading these files. If None, a filesystem will be inferred. If not
189
+ None, the provided filesystem will still be validated against all
190
+ filesystems inferred from the provided paths to ensure
191
+ compatibility.
192
+ """
193
+ paths, filesystem = resolve_paths_and_filesystem(
194
+ paths=path,
195
+ filesystem=filesystem,
196
+ )
197
+ assert len(paths) == 1, len(paths)
198
+ return paths[0], filesystem
199
+
200
+
201
+ def list_directory(
202
+ path: str,
203
+ filesystem: FileSystem,
204
+ exclude_prefixes: Optional[List[str]] = None,
205
+ ignore_missing_path: bool = False,
206
+ recursive: bool = False,
207
+ ) -> List[Tuple[str, int]]:
208
+ """
209
+ Expand the provided directory path to a list of file paths.
210
+
211
+ Args:
212
+ path: The directory path to expand.
213
+ filesystem: The filesystem implementation that should be used for
214
+ reading these files.
215
+ exclude_prefixes: The file relative path prefixes that should be
216
+ excluded from the returned file set. Default excluded prefixes are
217
+ "." and "_".
218
+ recursive: Whether to expand subdirectories or not.
219
+
220
+ Returns:
221
+ An iterator of (file_path, file_size) tuples.
222
+ """
223
+ if exclude_prefixes is None:
224
+ exclude_prefixes = [".", "_"]
225
+
226
+ selector = FileSelector(
227
+ base_dir=path,
228
+ recursive=recursive,
229
+ allow_not_found=ignore_missing_path,
230
+ )
231
+ try:
232
+ files = filesystem.get_file_info(selector)
233
+ except OSError as e:
234
+ if isinstance(e, FileNotFoundError):
235
+ files = []
236
+ else:
237
+ _handle_read_os_error(e, path)
238
+ base_path = selector.base_dir
239
+ out = []
240
+ for file_ in files:
241
+ file_path = file_.path
242
+ if not file_path.startswith(base_path):
243
+ continue
244
+ relative = file_path[len(base_path) :]
245
+ if any(relative.startswith(prefix) for prefix in exclude_prefixes):
246
+ continue
247
+ out.append((file_path, file_.size))
248
+ # We sort the paths to guarantee a stable order.
249
+ return sorted(out)
250
+
251
+
252
+ def get_file_info(
253
+ path: str,
254
+ filesystem: FileSystem,
255
+ ignore_missing_path: bool = False,
256
+ ) -> FileInfo:
257
+ """Get the file info for the provided path."""
258
+ try:
259
+ file_info = filesystem.get_file_info(path)
260
+ except OSError as e:
261
+ _handle_read_os_error(e, path)
262
+ if file_info.type == FileType.NotFound and not ignore_missing_path:
263
+ raise FileNotFoundError(path)
264
+
265
+ return file_info
266
+
267
+
268
+ def write_file(
269
+ path: str,
270
+ data: Union[str, bytes],
271
+ filesystem: Optional[FileSystem] = None,
272
+ ) -> None:
273
+ """
274
+ Write data to a file using any filesystem.
275
+
276
+ Args:
277
+ path: The file path to write to.
278
+ data: The data to write (string or bytes).
279
+ filesystem: The filesystem implementation to use. If None, will be inferred from the path.
280
+ """
281
+ resolved_path, resolved_filesystem = resolve_path_and_filesystem(
282
+ path=path,
283
+ filesystem=filesystem,
284
+ )
285
+
286
+ # Convert string to bytes if necessary
287
+ if isinstance(data, str):
288
+ data = data.encode("utf-8")
289
+
290
+ with resolved_filesystem.open_output_stream(resolved_path) as f:
291
+ f.write(data)
292
+
293
+
294
+ def read_file(
295
+ path: str,
296
+ filesystem: Optional[FileSystem] = None,
297
+ fail_if_not_found: bool = True,
298
+ ) -> Optional[bytes]:
299
+ """
300
+ Read data from a file using any filesystem.
301
+
302
+ Args:
303
+ path: The file path to read from.
304
+ filesystem: The filesystem implementation to use. If None, will be inferred from the path.
305
+ fail_if_not_found: Whether to raise an error if the file is not found.
306
+
307
+ Returns:
308
+ The file data as bytes, or None if file not found and fail_if_not_found is False.
309
+ """
310
+ try:
311
+ resolved_path, resolved_filesystem = resolve_path_and_filesystem(
312
+ path=path,
313
+ filesystem=filesystem,
314
+ )
315
+
316
+ with resolved_filesystem.open_input_stream(resolved_path) as f:
317
+ return f.read()
318
+ except FileNotFoundError:
319
+ if fail_if_not_found:
320
+ raise
321
+ return None
322
+
323
+
324
+ def _handle_read_os_error(
325
+ error: OSError,
326
+ paths: Union[str, List[str]],
327
+ ) -> str:
328
+ # NOTE: this is not comprehensive yet, and should be extended as more errors arise.
329
+ # NOTE: The latter patterns are raised in Arrow 10+, while the former is raised in
330
+ # Arrow < 10.
331
+ aws_error_pattern = (
332
+ r"^(?:(.*)AWS Error \[code \d+\]: No response body\.(.*))|"
333
+ r"(?:(.*)AWS Error UNKNOWN \(HTTP status 400\) during HeadObject operation: "
334
+ r"No response body\.(.*))|"
335
+ r"(?:(.*)AWS Error ACCESS_DENIED during HeadObject operation: No response "
336
+ r"body\.(.*))$"
337
+ )
338
+ gcp_error_pattern = (
339
+ r"^(?:(.*)google::cloud::Status\(UNAVAILABLE:(.*?)Couldn't resolve host name)"
340
+ )
341
+ if re.match(aws_error_pattern, str(error)):
342
+ # Specially handle AWS error when reading files, to give a clearer error
343
+ # message to avoid confusing users. The real issue is most likely that the AWS
344
+ # S3 file credentials have not been properly configured yet.
345
+ if isinstance(paths, str):
346
+ # Quote to highlight single file path in error message for better
347
+ # readability. List of file paths will be shown up as ['foo', 'boo'],
348
+ # so only quote single file path here.
349
+ paths = f'"{paths}"'
350
+ raise OSError(
351
+ (
352
+ f"Failing to read AWS S3 file(s): {paths}. "
353
+ "Please check that file exists and has properly configured access. "
354
+ "You can also run AWS CLI command to get more detailed error message "
355
+ "(e.g., aws s3 ls <file-name>). "
356
+ "See https://awscli.amazonaws.com/v2/documentation/api/latest/reference/s3/index.html " # noqa
357
+ "and https://arrow.apache.org/docs/python/generated/pyarrow.fs.S3FileSystem.html "
358
+ "for more information."
359
+ )
360
+ )
361
+ elif re.match(gcp_error_pattern, str(error)):
362
+ # Special handling for GCP errors (e.g., handling the special case of
363
+ # requiring the filesystem to be instantiated with anonymous access to
364
+ # read public files).
365
+ if isinstance(paths, str):
366
+ paths = f'"{paths}"'
367
+ raise OSError(
368
+ (
369
+ f"Failing to read GCP GS file(s): {paths}. "
370
+ "Please check that file exists and has properly configured access. "
371
+ "If this is a public file, please instantiate a filesystem with "
372
+ "anonymous access via `pyarrow.fs.GcsFileSystem(anonymous=True)` "
373
+ "to read it. See https://google.aip.dev/auth/4110 and "
374
+ "https://arrow.apache.org/docs/python/generated/pyarrow.fs.GcsFileSystem.html" # noqa
375
+ "for more information."
376
+ )
377
+ )
378
+
379
+ else:
380
+ raise error
381
+
382
+
383
+ def _is_local_windows_path(path: str) -> bool:
384
+ """Determines if path is a Windows file-system location."""
385
+ if sys.platform != "win32":
386
+ return False
387
+
388
+ if len(path) >= 1 and path[0] == "\\":
389
+ return True
390
+ if (
391
+ len(path) >= 3
392
+ and path[1] == ":"
393
+ and (path[2] == "/" or path[2] == "\\")
394
+ and path[0].isalpha()
395
+ ):
396
+ return True
397
+ return False
398
+
399
+
400
+ def _unwrap_protocol(path):
401
+ """
402
+ Slice off any protocol prefixes on path.
403
+ """
404
+ if sys.platform == "win32" and _is_local_windows_path(path):
405
+ # Represent as posix path such that downstream functions properly handle it.
406
+ # This is executed when 'file://' is NOT included in the path.
407
+ return pathlib.Path(path).as_posix()
408
+
409
+ parsed = urllib.parse.urlparse(path, allow_fragments=False) # support '#' in path
410
+ query = "?" + parsed.query if parsed.query else "" # support '?' in path
411
+ netloc = parsed.netloc
412
+ if parsed.scheme == "s3" and "@" in parsed.netloc:
413
+ # If the path contains an @, it is assumed to be an anonymous
414
+ # credentialed path, and we need to strip off the credentials.
415
+ netloc = parsed.netloc.split("@")[-1]
416
+
417
+ parsed_path = parsed.path
418
+ # urlparse prepends the path with a '/'. This does not work on Windows
419
+ # so if this is the case strip the leading slash.
420
+ if (
421
+ sys.platform == "win32"
422
+ and not netloc
423
+ and len(parsed_path) >= 3
424
+ and parsed_path[0] == "/" # The problematic leading slash
425
+ and parsed_path[1].isalpha() # Ensure it is a drive letter.
426
+ and parsed_path[2:4] in (":", ":/")
427
+ ):
428
+ parsed_path = parsed_path[1:]
429
+
430
+ return netloc + parsed_path + query
431
+
432
+
433
+ def _encode_url(path):
434
+ return urllib.parse.quote(path, safe="/:")
435
+
436
+
437
+ def _decode_url(path):
438
+ return urllib.parse.unquote(path)
439
+
440
+
441
+ def _resolve_custom_scheme(path: str) -> str:
442
+ """Returns the resolved path if the given path follows a Ray-specific custom
443
+ scheme. Othewise, returns the path unchanged.
444
+
445
+ The supported custom schemes are: "local", "example".
446
+ """
447
+ parsed_uri = urllib.parse.urlparse(path)
448
+ if parsed_uri.scheme == _LOCAL_SCHEME:
449
+ path = parsed_uri.netloc + parsed_uri.path
450
+ return path
@@ -0,0 +1,74 @@
1
+ import posixpath
2
+ import pyarrow.fs
3
+
4
+ from deltacat.constants import REV_DIR_NAME
5
+ from deltacat.storage.model.partition import PartitionLocator
6
+ from deltacat.utils.filesystem import resolve_path_and_filesystem
7
+
8
+ """
9
+ Helper functions to work with deltacat metadata paths.
10
+ TODO: Replace with direct calls to Deltacat storage interface.
11
+ """
12
+
13
+
14
+ def _find_first_child_with_rev(
15
+ parent_path: str, filesystem: pyarrow.fs.FileSystem
16
+ ) -> str:
17
+ """
18
+ Walks the filesystem to find the first child directory with a `rev/` folder.
19
+
20
+ This is a temporary solution to locate the first Namespace and Table directories.
21
+ The Deltacat Storage interface will provide a more robust way to locate these directories.
22
+
23
+ param: parent_path: The parent directory to search for a child with a `rev/` folder.
24
+ param: filesystem: The filesystem to search for the child directory.
25
+ returns: The name of the first child directory with a `rev/` folder.
26
+ """
27
+ children = filesystem.get_file_info(
28
+ pyarrow.fs.FileSelector(parent_path, allow_not_found=True)
29
+ )
30
+ for child in children:
31
+ if child.type == pyarrow.fs.FileType.Directory:
32
+ rev_path = posixpath.join(child.path, REV_DIR_NAME)
33
+ if filesystem.get_file_info(rev_path).type == pyarrow.fs.FileType.Directory:
34
+ return child.base_name
35
+ raise ValueError(f"No directory with 'rev/' found under {parent_path}")
36
+
37
+
38
+ def _find_table_path(root_path: str, filesystem: pyarrow.fs.FileSystem):
39
+ """
40
+ Finds a path with structure: root/namespace_id/table_id
41
+ Uses _find_first_child_with_rev to determine the namespace and table ids.
42
+
43
+ param: root_path: The root directory to search for the namespace and table directories.
44
+ param: filesystem: The filesystem to search for the namespace and table directories.
45
+ returns: The path to the table directory.
46
+ raises: ValueError if the namespace or table directories are not found.
47
+ """
48
+ try:
49
+ # Find Namespace (first directory under root with rev/)
50
+ namespace_id = _find_first_child_with_rev(root_path, filesystem)
51
+ namespace_path = posixpath.join(root_path, namespace_id)
52
+
53
+ # Find Table (first directory under namespace with rev/)
54
+ table_id = _find_first_child_with_rev(namespace_path, filesystem)
55
+ return posixpath.join(namespace_path, table_id)
56
+
57
+ except ValueError as e:
58
+ raise ValueError(f"Failed to locate Namespace or Table: {e}") from e
59
+
60
+
61
+ def _find_partition_path(root_path: str, locator: PartitionLocator) -> str:
62
+ """
63
+ Finds the path to the partition directory for the specified locator.
64
+
65
+ param: root_uri: The root URI of the dataset.
66
+ param: locator: The DeltaLocator for the delta.
67
+ returns: The path to the delta directory.
68
+ """
69
+ root_path, filesystem = resolve_path_and_filesystem(root_path)
70
+ return posixpath.join(
71
+ _find_table_path(root_path, filesystem),
72
+ locator.table_version,
73
+ locator.stream_id,
74
+ )