deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,46 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
3
 
4
- from typing import Any, Dict, Optional
4
+ import posixpath
5
+ from typing import Any, Dict, Optional, List
5
6
 
6
- from deltacat.storage.model.locator import Locator
7
- from deltacat.storage.model.namespace import NamespaceLocator
7
+ import pyarrow
8
8
 
9
+ from deltacat.storage.model.locator import Locator, LocatorName
10
+ from deltacat.storage.model.namespace import (
11
+ NamespaceLocator,
12
+ Namespace,
13
+ )
14
+ from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
15
+ from deltacat.constants import TXN_DIR_NAME
16
+ from deltacat.types.tables import TableProperty
17
+
18
+ TableProperties = Dict[str, Any]
19
+
20
+
21
+ class Table(Metafile):
22
+ """
23
+ Tables store properties common to every table version including the
24
+ table's name, a high-level description of all table versions, and
25
+ properties shared by all table versions.
26
+ """
9
27
 
10
- class Table(dict):
11
28
  @staticmethod
12
29
  def of(
13
30
  locator: Optional[TableLocator],
14
- permissions: Optional[Dict[str, Any]] = None,
15
31
  description: Optional[str] = None,
16
- properties: Optional[Dict[str, str]] = None,
32
+ properties: Optional[TableProperties] = None,
33
+ latest_active_table_version: Optional[str] = None,
34
+ latest_table_version: Optional[str] = None,
35
+ native_object: Optional[Any] = None,
17
36
  ) -> Table:
18
37
  table = Table()
19
38
  table.locator = locator
20
- table.permissions = permissions
21
39
  table.description = description
22
40
  table.properties = properties
41
+ table.latest_active_table_version = latest_active_table_version
42
+ table.latest_table_version = latest_table_version
43
+ table.native_object = native_object
23
44
  return table
24
45
 
25
46
  @property
@@ -33,14 +54,6 @@ class Table(dict):
33
54
  def locator(self, table_locator: Optional[TableLocator]) -> None:
34
55
  self["tableLocator"] = table_locator
35
56
 
36
- @property
37
- def permissions(self) -> Optional[Dict[str, Any]]:
38
- return self.get("permissions")
39
-
40
- @permissions.setter
41
- def permissions(self, permissions: Optional[Dict[str, Any]]) -> None:
42
- self["permissions"] = permissions
43
-
44
57
  @property
45
58
  def description(self) -> Optional[str]:
46
59
  return self.get("description")
@@ -50,13 +63,43 @@ class Table(dict):
50
63
  self["description"] = description
51
64
 
52
65
  @property
53
- def properties(self) -> Optional[Dict[str, str]]:
66
+ def properties(self) -> Optional[TableProperties]:
54
67
  return self.get("properties")
55
68
 
56
69
  @properties.setter
57
- def properties(self, properties: Optional[Dict[str, str]]) -> None:
70
+ def properties(self, properties: Optional[TableProperties]) -> None:
58
71
  self["properties"] = properties
59
72
 
73
+ @property
74
+ def latest_active_table_version(self) -> Optional[str]:
75
+ return self.get("latest_active_table_version")
76
+
77
+ @latest_active_table_version.setter
78
+ def latest_active_table_version(
79
+ self,
80
+ latest_active_table_version: Optional[str],
81
+ ) -> None:
82
+ self["latest_active_table_version"] = latest_active_table_version
83
+
84
+ @property
85
+ def latest_table_version(self) -> Optional[str]:
86
+ return self.get("latest_table_version")
87
+
88
+ @latest_table_version.setter
89
+ def latest_table_version(
90
+ self,
91
+ latest_table_version: Optional[str],
92
+ ) -> None:
93
+ self["latest_table_version"] = latest_table_version
94
+
95
+ @property
96
+ def native_object(self) -> Optional[Any]:
97
+ return self.get("nativeObject")
98
+
99
+ @native_object.setter
100
+ def native_object(self, native_object: Optional[Any]) -> None:
101
+ self["nativeObject"] = native_object
102
+
60
103
  @property
61
104
  def namespace_locator(self) -> Optional[NamespaceLocator]:
62
105
  table_locator = self.locator
@@ -78,6 +121,70 @@ class Table(dict):
78
121
  return table_locator.table_name
79
122
  return None
80
123
 
124
+ @table_name.setter
125
+ def table_name(self, table_name: Optional[str]) -> None:
126
+ table_locator = self.locator
127
+ if table_locator:
128
+ table_locator.table_name = table_name
129
+
130
+ def url(self, catalog_name: Optional[str] = None) -> str:
131
+ return (
132
+ f"dc://{catalog_name}/{self.namespace}/{self.table_name}/"
133
+ if catalog_name
134
+ else f"table://{self.namespace}/{self.table_name}/"
135
+ )
136
+
137
+ def read_table_property(self, property: TableProperty) -> Any:
138
+ return TableProperty.read_table_property(self, property)
139
+
140
+ def to_serializable(self) -> Table:
141
+ serializable = self
142
+ if serializable.namespace_locator:
143
+ serializable: Table = Table.update_for(self)
144
+ # remove the mutable namespace locator
145
+ serializable.locator.namespace_locator = NamespaceLocator.of(self.id)
146
+ return serializable
147
+
148
+ def from_serializable(
149
+ self,
150
+ path: str,
151
+ filesystem: Optional[pyarrow.fs.FileSystem] = None,
152
+ ) -> Table:
153
+ # restore the namespace locator from its mapped immutable metafile ID
154
+ if self.namespace_locator and self.namespace_locator.namespace == self.id:
155
+ parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
156
+ base_metafile_path=path,
157
+ parent_number=1,
158
+ )
159
+ txn_log_dir = posixpath.join(
160
+ posixpath.dirname(
161
+ posixpath.dirname(parent_rev_dir_path),
162
+ ),
163
+ TXN_DIR_NAME,
164
+ )
165
+ namespace = Namespace.read(
166
+ MetafileRevisionInfo.latest_revision(
167
+ revision_dir_path=parent_rev_dir_path,
168
+ filesystem=filesystem,
169
+ success_txn_log_dir=txn_log_dir,
170
+ ).path,
171
+ filesystem,
172
+ )
173
+ self.locator.namespace_locator = namespace.locator
174
+ return self
175
+
176
+
177
+ class TableLocatorName(LocatorName):
178
+ def __init__(self, locator: TableLocator):
179
+ self.locator = locator
180
+
181
+ @property
182
+ def immutable_id(self) -> Optional[str]:
183
+ return None
184
+
185
+ def parts(self) -> List[str]:
186
+ return [self.locator.table_name]
187
+
81
188
 
82
189
  class TableLocator(Locator, dict):
83
190
  @staticmethod
@@ -91,11 +198,19 @@ class TableLocator(Locator, dict):
91
198
 
92
199
  @staticmethod
93
200
  def at(namespace: Optional[str], table_name: Optional[str]) -> TableLocator:
94
- namespace_locator = NamespaceLocator.of(namespace)
201
+ namespace_locator = NamespaceLocator.of(namespace) if namespace else None
95
202
  return TableLocator.of(namespace_locator, table_name)
96
203
 
97
204
  @property
98
- def namespace_locator(self) -> NamespaceLocator:
205
+ def name(self) -> TableLocatorName:
206
+ return TableLocatorName(self)
207
+
208
+ @property
209
+ def parent(self) -> Optional[NamespaceLocator]:
210
+ return self.namespace_locator
211
+
212
+ @property
213
+ def namespace_locator(self) -> Optional[NamespaceLocator]:
99
214
  val: Dict[str, Any] = self.get("namespaceLocator")
100
215
  if val is not None and not isinstance(val, NamespaceLocator):
101
216
  self.namespace_locator = val = NamespaceLocator(val)
@@ -119,13 +234,3 @@ class TableLocator(Locator, dict):
119
234
  if namespace_locator:
120
235
  return namespace_locator.namespace
121
236
  return None
122
-
123
- def canonical_string(self) -> str:
124
- """
125
- Returns a unique string for the given locator that can be used
126
- for equality checks (i.e. two locators are equal if they have
127
- the same canonical string).
128
- """
129
- nl_hexdigest = self.namespace_locator.hexdigest()
130
- table_name = self.table_name
131
- return f"{nl_hexdigest}|{table_name}"
@@ -1,38 +1,77 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
3
 
4
- from typing import Any, Dict, List, Optional, Union
4
+ import base64
5
+ import re
6
+ import posixpath
7
+ from typing import Any, Dict, List, Optional, Tuple
5
8
 
9
+ import pyarrow
6
10
  import pyarrow as pa
7
11
 
8
- from deltacat.storage.model.locator import Locator
12
+ import deltacat.storage.model.partition as partition
13
+
14
+ from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
15
+ from deltacat.constants import (
16
+ METAFILE_FORMAT,
17
+ METAFILE_FORMAT_JSON,
18
+ TXN_DIR_NAME,
19
+ BYTES_PER_KIBIBYTE,
20
+ )
21
+ from deltacat.storage.model.schema import (
22
+ Schema,
23
+ SchemaList,
24
+ )
25
+ from deltacat.storage.model.locator import (
26
+ Locator,
27
+ LocatorName,
28
+ )
9
29
  from deltacat.storage.model.namespace import NamespaceLocator
10
- from deltacat.storage.model.table import TableLocator
30
+ from deltacat.storage.model.table import (
31
+ TableLocator,
32
+ Table,
33
+ )
11
34
  from deltacat.types.media import ContentType
12
- from deltacat.storage.model.sort_key import SortKey
35
+ from deltacat.storage.model.sort_key import SortScheme, SortSchemeList
36
+ from deltacat.storage.model.types import LifecycleState
37
+ from deltacat.types.tables import TableProperty
13
38
 
39
+ TableVersionProperties = Dict[str, Any]
14
40
 
15
- class TableVersion(dict):
41
+
42
+ class TableVersion(Metafile):
16
43
  @staticmethod
17
44
  def of(
18
45
  locator: Optional[TableVersionLocator],
19
- schema: Optional[Union[pa.Schema, str, bytes]],
20
- partition_keys: Optional[List[Dict[str, Any]]] = None,
21
- primary_key_columns: Optional[List[str]] = None,
46
+ schema: Optional[Schema],
47
+ partition_scheme: Optional[partition.PartitionScheme] = None,
22
48
  description: Optional[str] = None,
23
- properties: Optional[Dict[str, str]] = None,
49
+ properties: Optional[TableVersionProperties] = None,
24
50
  content_types: Optional[List[ContentType]] = None,
25
- sort_keys: Optional[List[SortKey]] = None,
51
+ sort_scheme: Optional[SortScheme] = None,
52
+ watermark: Optional[int] = None,
53
+ lifecycle_state: Optional[LifecycleState] = None,
54
+ schemas: Optional[SchemaList] = None,
55
+ partition_schemes: Optional[partition.PartitionSchemeList] = None,
56
+ sort_schemes: Optional[SortSchemeList] = None,
57
+ previous_table_version: Optional[str] = None,
58
+ native_object: Optional[Any] = None,
26
59
  ) -> TableVersion:
27
60
  table_version = TableVersion()
28
61
  table_version.locator = locator
29
62
  table_version.schema = schema
30
- table_version.partition_keys = partition_keys
31
- table_version.primary_keys = primary_key_columns
63
+ table_version.partition_scheme = partition_scheme
32
64
  table_version.description = description
33
65
  table_version.properties = properties
34
66
  table_version.content_types = content_types
35
- table_version.sort_keys = sort_keys
67
+ table_version.sort_scheme = sort_scheme
68
+ table_version.watermark = watermark
69
+ table_version.state = lifecycle_state
70
+ table_version.schemas = schemas
71
+ table_version.partition_schemes = partition_schemes
72
+ table_version.sort_schemes = sort_schemes
73
+ table_version.previous_table_version = previous_table_version
74
+ table_version.native_object = native_object
36
75
  return table_version
37
76
 
38
77
  @property
@@ -47,36 +86,91 @@ class TableVersion(dict):
47
86
  self["tableVersionLocator"] = table_version_locator
48
87
 
49
88
  @property
50
- def schema(self) -> Optional[Union[pa.Schema, str, bytes]]:
51
- return self.get("schema")
89
+ def schema(self) -> Optional[Schema]:
90
+ val: Dict[str, Any] = self.get("schema")
91
+ if val is not None and not isinstance(val, Schema):
92
+ self.schema = val = Schema(val)
93
+ return val
52
94
 
53
95
  @schema.setter
54
- def schema(self, schema: Optional[Union[pa.Schema, str, bytes]]) -> None:
96
+ def schema(self, schema: Optional[Schema]) -> None:
55
97
  self["schema"] = schema
56
98
 
57
99
  @property
58
- def sort_keys(self) -> Optional[List[SortKey]]:
59
- return self.get("sortKeys")
100
+ def schemas(self) -> Optional[SchemaList]:
101
+ val: Optional[SchemaList] = self.get("schemas")
102
+ if val is not None and not isinstance(val, SchemaList):
103
+ self["schemas"] = val = SchemaList.of(val)
104
+ return val
105
+
106
+ @schemas.setter
107
+ def schemas(self, schemas: Optional[SchemaList]) -> None:
108
+ self["schemas"] = schemas
109
+
110
+ @property
111
+ def sort_scheme(self) -> Optional[SortScheme]:
112
+ val: Dict[str, Any] = self.get("sortScheme")
113
+ if val is not None and not isinstance(val, SortScheme):
114
+ self["sortScheme"] = val = SortScheme(val)
115
+ return val
116
+
117
+ @sort_scheme.setter
118
+ def sort_scheme(self, sort_scheme: Optional[SortScheme]) -> None:
119
+ self["sortScheme"] = sort_scheme
120
+
121
+ @property
122
+ def sort_schemes(self) -> Optional[SortSchemeList]:
123
+ val: Dict[str, Any] = self.get("sortSchemes")
124
+ if val is not None and not isinstance(val, SortSchemeList):
125
+ self["sortSchemes"] = val = SortSchemeList.of(val)
126
+ return val
127
+
128
+ @sort_schemes.setter
129
+ def sort_schemes(self, sort_schemes: Optional[SortSchemeList]) -> None:
130
+ self["sortSchemes"] = sort_schemes
131
+
132
+ @property
133
+ def watermark(self) -> Optional[int]:
134
+ return self.get("watermark")
60
135
 
61
- @sort_keys.setter
62
- def sort_keys(self, sort_keys: Optional[List[SortKey]]) -> None:
63
- self["sortKeys"] = sort_keys
136
+ @watermark.setter
137
+ def watermark(self, watermark: Optional[int]) -> None:
138
+ self["watermark"] = watermark
64
139
 
65
140
  @property
66
- def partition_keys(self) -> Optional[List[Dict[str, Any]]]:
67
- return self.get("partitionKeys")
141
+ def state(self) -> Optional[LifecycleState]:
142
+ state = self.get("state")
143
+ return None if state is None else LifecycleState(state)
68
144
 
69
- @partition_keys.setter
70
- def partition_keys(self, partition_keys: Optional[List[Dict[str, Any]]]) -> None:
71
- self["partitionKeys"] = partition_keys
145
+ @state.setter
146
+ def state(self, state: Optional[LifecycleState]) -> None:
147
+ self["state"] = state
72
148
 
73
149
  @property
74
- def primary_keys(self) -> Optional[List[str]]:
75
- return self.get("primaryKeys")
150
+ def partition_scheme(self) -> Optional[partition.PartitionScheme]:
151
+ val: Dict[str, Any] = self.get("partitionScheme")
152
+ if val is not None and not isinstance(val, partition.PartitionScheme):
153
+ self["partitionScheme"] = val = partition.PartitionScheme(val)
154
+ return val
76
155
 
77
- @primary_keys.setter
78
- def primary_keys(self, primary_keys: Optional[List[str]]) -> None:
79
- self["primaryKeys"] = primary_keys
156
+ @partition_scheme.setter
157
+ def partition_scheme(
158
+ self, partition_scheme: Optional[partition.PartitionScheme]
159
+ ) -> None:
160
+ self["partitionScheme"] = partition_scheme
161
+
162
+ @property
163
+ def partition_schemes(self) -> Optional[partition.PartitionSchemeList]:
164
+ val: Dict[str, Any] = self.get("partitionSchemes")
165
+ if val is not None and not isinstance(val, partition.PartitionSchemeList):
166
+ self["partitionSchemes"] = val = partition.PartitionSchemeList.of(val)
167
+ return val
168
+
169
+ @partition_schemes.setter
170
+ def partition_schemes(
171
+ self, partition_schemes: Optional[partition.PartitionSchemeList]
172
+ ) -> None:
173
+ self["partitionSchemes"] = partition_schemes
80
174
 
81
175
  @property
82
176
  def description(self) -> Optional[str]:
@@ -87,11 +181,19 @@ class TableVersion(dict):
87
181
  self["description"] = description
88
182
 
89
183
  @property
90
- def properties(self) -> Optional[Dict[str, str]]:
184
+ def previous_table_version(self) -> Optional[str]:
185
+ return self.get("previous_table_version")
186
+
187
+ @previous_table_version.setter
188
+ def previous_table_version(self, previous_table_version: Optional[str]) -> None:
189
+ self["previous_table_version"] = previous_table_version
190
+
191
+ @property
192
+ def properties(self) -> Optional[TableVersionProperties]:
91
193
  return self.get("properties")
92
194
 
93
195
  @properties.setter
94
- def properties(self, properties: Optional[Dict[str, str]]) -> None:
196
+ def properties(self, properties: Optional[TableVersionProperties]) -> None:
95
197
  self["properties"] = properties
96
198
 
97
199
  @property
@@ -107,6 +209,14 @@ class TableVersion(dict):
107
209
  def content_types(self, content_types: Optional[List[ContentType]]) -> None:
108
210
  self["contentTypes"] = content_types
109
211
 
212
+ @property
213
+ def native_object(self) -> Optional[Any]:
214
+ return self.get("nativeObject")
215
+
216
+ @native_object.setter
217
+ def native_object(self, native_object: Optional[Any]) -> None:
218
+ self["nativeObject"] = native_object
219
+
110
220
  @property
111
221
  def namespace_locator(self) -> Optional[NamespaceLocator]:
112
222
  table_version_locator = self.locator
@@ -142,17 +252,190 @@ class TableVersion(dict):
142
252
  return table_version_locator.table_version
143
253
  return None
144
254
 
255
+ def url(self, catalog_name: Optional[str] = None) -> str:
256
+ return (
257
+ f"dc://{catalog_name}/{self.namespace}/{self.table_name}/{self.table_version}/"
258
+ if catalog_name
259
+ else f"table://{self.namespace}/{self.table_name}/{self.table_version}/"
260
+ )
261
+
145
262
  def is_supported_content_type(self, content_type: ContentType):
146
263
  supported_content_types = self.content_types
147
264
  return (not supported_content_types) or (
148
265
  content_type in supported_content_types
149
266
  )
150
267
 
268
+ def to_serializable(self) -> TableVersion:
269
+ serializable: TableVersion = TableVersion.update_for(self)
270
+ if serializable.schema:
271
+ schema_bytes = serializable.schema.serialize().to_pybytes()
272
+ serializable.schema = (
273
+ base64.b64encode(schema_bytes).decode("utf-8")
274
+ if METAFILE_FORMAT == METAFILE_FORMAT_JSON
275
+ else schema_bytes
276
+ )
277
+
278
+ if serializable.schemas:
279
+ serializable.schemas = [
280
+ base64.b64encode(schema.serialize().to_pybytes()).decode("utf-8")
281
+ if METAFILE_FORMAT == METAFILE_FORMAT_JSON
282
+ else schema.serialize().to_pybytes()
283
+ for schema in serializable.schemas
284
+ ]
285
+ if serializable.table_locator:
286
+ # remove the mutable table locator
287
+ serializable.locator.table_locator = TableLocator.at(
288
+ namespace=self.id,
289
+ table_name=self.id,
290
+ )
291
+ return serializable
292
+
293
+ def from_serializable(
294
+ self,
295
+ path: str,
296
+ filesystem: Optional[pyarrow.fs.FileSystem] = None,
297
+ ) -> TableVersion:
298
+ if self.get("schema"):
299
+ schema_data = self["schema"]
300
+ schema_bytes = (
301
+ base64.b64decode(schema_data)
302
+ if METAFILE_FORMAT == "json"
303
+ else schema_data
304
+ )
305
+ self["schema"] = Schema.deserialize(pa.py_buffer(schema_bytes))
306
+ else:
307
+ self["schema"] = None
308
+
309
+ if self.get("schemas"):
310
+ self.schemas = [
311
+ Schema.deserialize(
312
+ pa.py_buffer(
313
+ base64.b64decode(schema)
314
+ if METAFILE_FORMAT == METAFILE_FORMAT_JSON
315
+ else schema
316
+ )
317
+ )
318
+ for schema in self["schemas"]
319
+ ]
320
+ else:
321
+ self.schemas = None
322
+
323
+ if self.sort_scheme:
324
+ # force list-to-tuple conversion of sort keys via property invocation
325
+ self.sort_scheme.keys
326
+ [sort_scheme.keys for sort_scheme in self.sort_schemes]
327
+ # restore the table locator from its mapped immutable metafile ID
328
+ if self.table_locator and self.table_locator.table_name == self.id:
329
+ parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
330
+ base_metafile_path=path,
331
+ parent_number=1,
332
+ )
333
+ txn_log_dir = posixpath.join(
334
+ posixpath.dirname(
335
+ posixpath.dirname(
336
+ posixpath.dirname(parent_rev_dir_path),
337
+ )
338
+ ),
339
+ TXN_DIR_NAME,
340
+ )
341
+ table = Table.read(
342
+ MetafileRevisionInfo.latest_revision(
343
+ revision_dir_path=parent_rev_dir_path,
344
+ filesystem=filesystem,
345
+ success_txn_log_dir=txn_log_dir,
346
+ ).path,
347
+ filesystem,
348
+ )
349
+ self.locator.table_locator = table.locator
350
+ return self
351
+
352
+ def current_version_number(self) -> Optional[int]:
353
+ """
354
+ Returns the current table version number as an integer, or None if
355
+ a table version has not yet been assigned.
356
+ """
357
+ prefix, version_number = (
358
+ TableVersion.parse_table_version(
359
+ self.table_version,
360
+ )
361
+ if self.table_version is not None
362
+ else (None, None)
363
+ )
364
+ return int(version_number) if version_number is not None else None
365
+
366
+ def read_table_property(self, property: TableProperty) -> Any:
367
+ return TableProperty.read_table_property(self, property)
368
+
369
+ @staticmethod
370
+ def next_version(previous_version: Optional[str] = None) -> str:
371
+ """
372
+ Assigns the next table version string given the previous table version
373
+ by incrementing the version number of the given previous table version
374
+ identifier. Returns "1" if the previous version is undefined.
375
+ """
376
+ prefix, previous_version_number = (
377
+ TableVersion.parse_table_version(
378
+ previous_version,
379
+ )
380
+ if previous_version is not None
381
+ else (None, None)
382
+ )
383
+ new_version_number = (
384
+ int(previous_version_number) + 1
385
+ if previous_version_number is not None
386
+ else 1
387
+ )
388
+ new_prefix = prefix if prefix is not None else ""
389
+ return f"{new_prefix}{new_version_number}"
390
+
391
+ @staticmethod
392
+ def parse_table_version(table_version: str) -> Tuple[Optional[str], int]:
393
+ """
394
+ Parses a table version string into its prefix and version number.
395
+ Returns a tuple of the prefix and version number.
396
+ """
397
+ if not table_version:
398
+ raise ValueError(f"Table version to parse is undefined.")
399
+ if len(table_version) > BYTES_PER_KIBIBYTE:
400
+ raise ValueError(
401
+ f"Invalid table version {table_version}. Table version "
402
+ f"identifier cannot be greater than {BYTES_PER_KIBIBYTE} "
403
+ f"characters."
404
+ )
405
+ version_match = re.match(
406
+ rf"^(\w*\.)?(\d+)$",
407
+ table_version,
408
+ )
409
+ if version_match:
410
+ prefix, version_number = version_match.groups()
411
+ return prefix, int(version_number)
412
+ raise ValueError(
413
+ f"Invalid table version {table_version}. Valid table versions "
414
+ f"are of the form `TableVersionName.1` or simply `1`.",
415
+ )
416
+
417
+
418
+ class TableVersionLocatorName(LocatorName):
419
+ def __init__(self, locator: TableVersionLocator):
420
+ self.locator = locator
421
+
422
+ @property
423
+ def immutable_id(self) -> Optional[str]:
424
+ return self.locator.table_version
425
+
426
+ @immutable_id.setter
427
+ def immutable_id(self, immutable_id: Optional[str]):
428
+ self.locator.table_version = immutable_id
429
+
430
+ def parts(self) -> List[str]:
431
+ return [self.locator.table_version]
432
+
151
433
 
152
434
  class TableVersionLocator(Locator, dict):
153
435
  @staticmethod
154
436
  def of(
155
- table_locator: Optional[TableLocator], table_version: Optional[str]
437
+ table_locator: Optional[TableLocator],
438
+ table_version: Optional[str],
156
439
  ) -> TableVersionLocator:
157
440
  table_version_locator = TableVersionLocator()
158
441
  table_version_locator.table_locator = table_locator
@@ -165,9 +448,17 @@ class TableVersionLocator(Locator, dict):
165
448
  table_name: Optional[str],
166
449
  table_version: Optional[str],
167
450
  ) -> TableVersionLocator:
168
- table_locator = TableLocator.at(namespace, table_name)
451
+ table_locator = TableLocator.at(namespace, table_name) if table_name else None
169
452
  return TableVersionLocator.of(table_locator, table_version)
170
453
 
454
+ @property
455
+ def name(self):
456
+ return TableVersionLocatorName(self)
457
+
458
+ @property
459
+ def parent(self) -> Optional[TableLocator]:
460
+ return self.table_locator
461
+
171
462
  @property
172
463
  def table_locator(self) -> Optional[TableLocator]:
173
464
  val: Dict[str, Any] = self.get("tableLocator")
@@ -185,7 +476,13 @@ class TableVersionLocator(Locator, dict):
185
476
 
186
477
  @table_version.setter
187
478
  def table_version(self, table_version: Optional[str]) -> None:
188
- self["tableVersion"] = table_version
479
+ # ensure that the table version is valid
480
+ prefix, version_number = TableVersion.parse_table_version(table_version)
481
+ # restate the table version number in its canonical form
482
+ # (e.g., ensure that "MyVersion.0001" is saved as "MyVersion.1")
483
+ self["tableVersion"] = (
484
+ f"{prefix}{version_number}" if prefix else str(version_number)
485
+ )
189
486
 
190
487
  @property
191
488
  def namespace_locator(self) -> Optional[NamespaceLocator]:
@@ -207,13 +504,3 @@ class TableVersionLocator(Locator, dict):
207
504
  if table_locator:
208
505
  return table_locator.table_name
209
506
  return None
210
-
211
- def canonical_string(self) -> str:
212
- """
213
- Returns a unique string for the given locator that can be used
214
- for equality checks (i.e. two locators are equal if they have
215
- the same canonical string).
216
- """
217
- tl_hexdigest = self.table_locator.hexdigest()
218
- table_version = self.table_version
219
- return f"{tl_hexdigest}|{table_version}"