deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,643 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import itertools
5
+
6
+ from enum import Enum
7
+ from typing import Optional, List, Dict, Any, TYPE_CHECKING
8
+ from uuid import uuid4
9
+
10
+ if TYPE_CHECKING:
11
+ from deltacat.storage.model.schema import FieldLocator
12
+
13
+ from deltacat import logs
14
+
15
+ from deltacat.types.media import (
16
+ ContentType,
17
+ ContentEncoding,
18
+ EXT_TO_CONTENT_TYPE,
19
+ EXT_TO_CONTENT_ENCODING,
20
+ )
21
+
22
+ import json
23
+ import pyarrow as pa
24
+ import posixpath
25
+
26
+ from deltacat.utils.filesystem import get_file_info
27
+
28
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
29
+
30
+
31
+ class EntryType(str, Enum):
32
+ """
33
+ Enum representing all possible content categories of a manifest entry file.
34
+
35
+ DATA: The entry contains fully qualified records compliant with the parent
36
+ table's schema to insert and/or update. Data files for upsert Deltas use
37
+ this entry's parameters to find matching fields to update. If no entry
38
+ parameters are specified, then the parent table's primary keys are used.
39
+ Only records from entries in Deltas with lower stream positions than this
40
+ entry will be targeted for update.
41
+
42
+ POSITIONAL_DELETE: The entry contains pointers to records in other entries
43
+ to delete. Deleted records will be filtered from query results at runtime.
44
+
45
+ EQUALITY_DELETE: The entry contains a subset of field values from the
46
+ table records to find and delete. The full record of any matching data
47
+ entries in Deltas with a lower stream position than this entry's Delta
48
+ will be deleted. The fields used for record discovery are controlled by
49
+ this entry's parameters. If no entry parameters are specified, then the
50
+ fields used for record discovery are linked to the parent table's merge
51
+ keys. The entry may contain additional fields not used for delete record
52
+ discovery which will be ignored. Deleted records will be filtered from
53
+ query results at runtime.
54
+ """
55
+
56
+ DATA = "data"
57
+ POSITIONAL_DELETE = "positional_delete"
58
+ EQUALITY_DELETE = "equality_delete"
59
+
60
+ @classmethod
61
+ def get_default(cls):
62
+ return EntryType.DATA
63
+
64
+ @classmethod
65
+ def list(cls):
66
+ return [c.value for c in EntryType]
67
+
68
+
69
+ class EntryParams(dict):
70
+ """
71
+ Parameters that control manifest entry interpretation.
72
+
73
+ For EQUALITY_DELETE manifest entry types, parameters include equality
74
+ field identifiers.
75
+ """
76
+
77
+ @staticmethod
78
+ def of(
79
+ equality_field_locators: Optional[List["FieldLocator"]] = None,
80
+ ) -> EntryParams:
81
+ params = EntryParams()
82
+ if equality_field_locators is not None:
83
+ params["equality_field_locators"] = equality_field_locators
84
+ return params
85
+
86
+ @property
87
+ def equality_field_locators(self) -> Optional[List["FieldLocator"]]:
88
+ return self.get("equality_field_locators")
89
+
90
+
91
+ class Manifest(dict):
92
+ """
93
+ A DeltaCAT manifest contains metadata common to multiple manifest formats
94
+ like Amazon Redshift and Apache Iceberg to simplify dataset import/export.
95
+ """
96
+
97
+ @staticmethod
98
+ def _build_manifest(
99
+ meta: Optional[ManifestMeta],
100
+ entries: Optional[ManifestEntryList],
101
+ author: Optional[ManifestAuthor] = None,
102
+ uuid: str = None,
103
+ ) -> Manifest:
104
+ if not uuid:
105
+ uuid = str(uuid4())
106
+ manifest = Manifest()
107
+ manifest["id"] = uuid
108
+ if meta is not None:
109
+ manifest["meta"] = meta
110
+ if entries is not None:
111
+ manifest["entries"] = entries
112
+ if author is not None:
113
+ manifest["author"] = author
114
+ return manifest
115
+
116
+ @staticmethod
117
+ def of(
118
+ entries: ManifestEntryList,
119
+ author: Optional[ManifestAuthor] = None,
120
+ uuid: str = None,
121
+ entry_type: Optional[EntryType] = None,
122
+ entry_params: Optional[EntryParams] = None,
123
+ ) -> Manifest:
124
+ if not uuid:
125
+ uuid = str(uuid4())
126
+ total_record_count = 0
127
+ total_content_length = 0
128
+ total_source_content_length = 0
129
+ content_type = None
130
+ content_encoding = None
131
+ credentials = None
132
+ content_type_params = None
133
+ schema_id = None
134
+ sort_scheme_id = None
135
+ if entries:
136
+ content_type = entries[0].meta.content_type
137
+ content_encoding = entries[0].meta.content_encoding
138
+ credentials = entries[0].meta.credentials
139
+ content_type_params = entries[0].meta.content_type_parameters
140
+
141
+ # Keep the latest schema ID
142
+ # Schema IDs are >= 0, and schema evolution always increments the last schema ID
143
+ entry_schema_ids = [
144
+ entry.meta.schema_id if entry.meta.schema_id is not None else -1
145
+ for entry in entries
146
+ ]
147
+ max_schema_id = max(entry_schema_ids) if entry_schema_ids else -1
148
+ schema_id = max_schema_id if max_schema_id >= 0 else None
149
+
150
+ # Handle sort_scheme_id: set to None if entries have multiple different sort_scheme_ids
151
+ entry_sort_scheme_ids = set(
152
+ entry.meta.sort_scheme_id
153
+ for entry in entries
154
+ if entry.meta.sort_scheme_id is not None
155
+ )
156
+ sort_scheme_id = (
157
+ list(entry_sort_scheme_ids)[0]
158
+ if len(entry_sort_scheme_ids) == 1
159
+ else None
160
+ )
161
+
162
+ for entry in entries:
163
+ meta = entry.meta
164
+ if meta.content_type != content_type:
165
+ content_type = None
166
+ if meta.content_encoding != content_encoding:
167
+ content_encoding = None
168
+ entry_content_type = meta.content_type
169
+ if content_type and entry_content_type != content_type:
170
+ msg = (
171
+ f"Expected all manifest entries to have content "
172
+ f"type '{content_type}' but found "
173
+ f"'{entry_content_type}'"
174
+ )
175
+ raise ValueError(msg)
176
+ entry_content_encoding = meta.get("content_encoding", None)
177
+ if content_encoding and entry_content_encoding != content_encoding:
178
+ msg = (
179
+ f"Expected all manifest entries to have content "
180
+ f"encoding '{content_encoding}' but found "
181
+ f"'{entry_content_encoding}'"
182
+ )
183
+ raise ValueError(msg)
184
+ actual_entry_type = meta.entry_type
185
+ if entry_type and (actual_entry_type != entry_type):
186
+ msg = (
187
+ f"Expected all manifest entries to have type "
188
+ f"'{entry_type}' but found '{actual_entry_type}'"
189
+ )
190
+ raise ValueError(msg)
191
+ actual_entry_params = meta.entry_params
192
+ if entry_params and (actual_entry_params != entry_params):
193
+ msg = (
194
+ f"Expected all manifest entries to have params "
195
+ f"'{entry_params}' but found '{actual_entry_params}'"
196
+ )
197
+ raise ValueError(msg)
198
+ actual_credentials = meta.credentials
199
+ if credentials and (actual_credentials != credentials):
200
+ msg = (
201
+ f"Expected all manifest entries to have credentials "
202
+ f"'{credentials}' but found '{actual_credentials}'"
203
+ )
204
+ raise ValueError(msg)
205
+ actual_content_type_params = meta.content_type_parameters
206
+ if content_type_params and (
207
+ actual_content_type_params != content_type_params
208
+ ):
209
+ msg = (
210
+ f"Expected all manifest entries to have content type params "
211
+ f"'{content_type_params}' but found '{actual_content_type_params}'"
212
+ )
213
+ raise ValueError(msg)
214
+
215
+ total_record_count += meta.record_count or 0
216
+ total_content_length += meta.content_length or 0
217
+ total_source_content_length += meta.source_content_length or 0
218
+
219
+ meta = ManifestMeta.of(
220
+ record_count=total_record_count,
221
+ content_length=total_content_length,
222
+ content_type=content_type,
223
+ content_encoding=content_encoding,
224
+ source_content_length=total_source_content_length,
225
+ credentials=credentials,
226
+ content_type_parameters=content_type_params,
227
+ entry_type=entry_type,
228
+ entry_params=entry_params,
229
+ schema_id=schema_id,
230
+ sort_scheme_id=sort_scheme_id,
231
+ )
232
+ manifest = Manifest._build_manifest(meta, entries, author, uuid)
233
+ return manifest
234
+
235
+ @staticmethod
236
+ def from_json(json_string: str) -> Manifest:
237
+ parsed_dict = json.loads(json_string)
238
+ return Manifest.of(
239
+ entries=ManifestEntryList.of(
240
+ [
241
+ ManifestEntry.from_dict(entry)
242
+ for entry in parsed_dict.get("entries", [])
243
+ ]
244
+ ),
245
+ author=ManifestAuthor.from_dict(parsed_dict.get("author")),
246
+ uuid=parsed_dict.get("id"),
247
+ )
248
+
249
+ @staticmethod
250
+ def merge_manifests(
251
+ manifests: List[Manifest], author: Optional[ManifestAuthor] = None
252
+ ) -> Manifest:
253
+ all_entries = ManifestEntryList(
254
+ itertools.chain(*[m.entries for m in manifests])
255
+ )
256
+ merged_manifest = Manifest.of(all_entries, author)
257
+ return merged_manifest
258
+
259
+ @property
260
+ def meta(self) -> Optional[ManifestMeta]:
261
+ val: Dict[str, Any] = self.get("meta")
262
+ if val is not None and not isinstance(val, ManifestMeta):
263
+ self["meta"] = val = ManifestMeta(val)
264
+ return val
265
+
266
+ @property
267
+ def entries(self) -> Optional[ManifestEntryList]:
268
+ val: List[ManifestEntry] = self.get("entries")
269
+ if val is not None and not isinstance(val, ManifestEntryList):
270
+ self["entries"] = val = ManifestEntryList.of(val)
271
+ return val
272
+
273
+ @property
274
+ def id(self) -> str:
275
+ return self["id"]
276
+
277
+ @property
278
+ def author(self) -> Optional[ManifestAuthor]:
279
+ val: Dict[str, Any] = self.get("author")
280
+ if val is not None and not isinstance(val, ManifestAuthor):
281
+ self["author"] = val = ManifestAuthor(val)
282
+ return val
283
+
284
+
285
+ class ManifestMeta(dict):
286
+ @staticmethod
287
+ def of(
288
+ record_count: Optional[int],
289
+ content_length: Optional[int],
290
+ content_type: Optional[str],
291
+ content_encoding: Optional[str],
292
+ source_content_length: Optional[int] = None,
293
+ credentials: Optional[Dict[str, str]] = None,
294
+ content_type_parameters: Optional[List[Dict[str, str]]] = None,
295
+ entry_type: Optional[EntryType] = None,
296
+ entry_params: Optional[EntryParams] = None,
297
+ schema_id: Optional[int] = None,
298
+ sort_scheme_id: Optional[str] = None,
299
+ ) -> ManifestMeta:
300
+ manifest_meta = ManifestMeta()
301
+ if record_count is not None:
302
+ manifest_meta["record_count"] = record_count
303
+ if content_length is not None:
304
+ manifest_meta["content_length"] = content_length
305
+ if source_content_length is not None:
306
+ manifest_meta["source_content_length"] = source_content_length
307
+ if content_type is not None:
308
+ manifest_meta["content_type"] = content_type
309
+ if content_type_parameters is not None:
310
+ manifest_meta["content_type_parameters"] = content_type_parameters
311
+ if content_encoding is not None:
312
+ manifest_meta["content_encoding"] = content_encoding
313
+ if credentials is not None:
314
+ manifest_meta["credentials"] = credentials
315
+ if entry_type is not None:
316
+ manifest_meta["entry_type"] = (
317
+ entry_type.value if isinstance(entry_type, EntryType) else entry_type
318
+ )
319
+ if entry_params is not None:
320
+ manifest_meta["entry_params"] = entry_params
321
+ if schema_id is not None:
322
+ manifest_meta["schema_id"] = schema_id
323
+ if sort_scheme_id is not None:
324
+ manifest_meta["sort_scheme_id"] = sort_scheme_id
325
+ return manifest_meta
326
+
327
+ @staticmethod
328
+ def from_dict(obj: dict) -> Optional[ManifestMeta]:
329
+ if obj is None:
330
+ return None
331
+
332
+ return ManifestMeta.of(
333
+ record_count=obj.get("record_count"),
334
+ content_length=obj.get("content_length"),
335
+ content_type=obj.get("content_type"),
336
+ content_encoding=obj.get("content_encoding"),
337
+ source_content_length=obj.get("source_content_length"),
338
+ credentials=obj.get("credentials"),
339
+ content_type_parameters=obj.get("content_type_parameters"),
340
+ entry_type=obj.get("entry_type"),
341
+ entry_params=obj.get("entry_params"),
342
+ schema_id=obj.get("schema_id"),
343
+ sort_scheme_id=obj.get("sort_scheme_id"),
344
+ )
345
+
346
+ @property
347
+ def record_count(self) -> Optional[int]:
348
+ return self.get("record_count")
349
+
350
+ @property
351
+ def content_length(self) -> Optional[int]:
352
+ return self.get("content_length")
353
+
354
+ @property
355
+ def content_type(self) -> Optional[str]:
356
+ return self.get("content_type")
357
+
358
+ @property
359
+ def content_encoding(self) -> Optional[str]:
360
+ return self.get("content_encoding")
361
+
362
+ @property
363
+ def source_content_length(self) -> Optional[int]:
364
+ return self.get("source_content_length")
365
+
366
+ @property
367
+ def content_type_parameters(self) -> Optional[List[Dict[str, str]]]:
368
+ return self.get("content_type_parameters")
369
+
370
+ @content_type_parameters.setter
371
+ def content_type_parameters(self, params: List[Dict[str, str]]) -> None:
372
+ self["content_type_parameters"] = params
373
+
374
+ @property
375
+ def credentials(self) -> Optional[Dict[str, str]]:
376
+ return self.get("credentials")
377
+
378
+ @property
379
+ def entry_type(self) -> Optional[EntryType]:
380
+ val = self.get("entry_type")
381
+ if val is not None:
382
+ return EntryType(self["entry_type"])
383
+ return val
384
+
385
+ @property
386
+ def entry_params(self) -> Optional[EntryParams]:
387
+ val: Dict[str, Any] = self.get("entry_params")
388
+ if val is not None and not isinstance(val, EntryParams):
389
+ self["entry_params"] = val = EntryParams(val)
390
+ return val
391
+
392
+ @property
393
+ def schema_id(self) -> Optional[int]:
394
+ return self.get("schema_id")
395
+
396
+ @property
397
+ def sort_scheme_id(self) -> Optional[str]:
398
+ return self.get("sort_scheme_id")
399
+
400
+
401
+ class ManifestEntry(dict):
402
+ @staticmethod
403
+ def of(
404
+ url: Optional[str],
405
+ meta: Optional[ManifestMeta],
406
+ mandatory: bool = True,
407
+ uri: Optional[str] = None,
408
+ uuid: Optional[str] = None,
409
+ ) -> ManifestEntry:
410
+ manifest_entry = ManifestEntry()
411
+ if not (uri or url):
412
+ raise ValueError("No URI or URL specified for manifest entry contents.")
413
+ if (uri and url) and (uri != url):
414
+ raise ValueError(f"Manifest entry URI ({uri}) != URL ({url})")
415
+ if url:
416
+ manifest_entry["url"] = manifest_entry["uri"] = url
417
+ elif uri:
418
+ manifest_entry["url"] = manifest_entry["uri"] = uri
419
+ if meta is not None:
420
+ manifest_entry["meta"] = meta
421
+ if mandatory is not None:
422
+ manifest_entry["mandatory"] = mandatory
423
+ if uuid is not None:
424
+ manifest_entry["id"] = uuid
425
+ return manifest_entry
426
+
427
+ @staticmethod
428
+ def from_s3_obj_url(
429
+ url: str,
430
+ record_count: int,
431
+ source_content_length: Optional[int] = None,
432
+ credentials: Optional[Dict[str, str]] = None,
433
+ content_type_parameters: Optional[List[Dict[str, str]]] = None,
434
+ entry_type: Optional[EntryType] = None,
435
+ entry_params: Optional[EntryParams] = None,
436
+ **s3_client_kwargs,
437
+ ) -> ManifestEntry:
438
+ from deltacat.aws import s3u as s3_utils
439
+
440
+ s3_obj = s3_utils.get_object_at_url(url, **s3_client_kwargs)
441
+ logger.debug(f"Building manifest entry from {url}: {s3_obj}")
442
+ manifest_entry_meta = ManifestMeta.of(
443
+ record_count=record_count,
444
+ content_length=s3_obj["ContentLength"],
445
+ content_type=s3_obj["ContentType"],
446
+ content_encoding=s3_obj["ContentEncoding"],
447
+ source_content_length=source_content_length,
448
+ credentials=credentials,
449
+ content_type_parameters=content_type_parameters,
450
+ entry_type=entry_type,
451
+ entry_params=entry_params,
452
+ )
453
+ manifest_entry = ManifestEntry.of(url, manifest_entry_meta)
454
+ return manifest_entry
455
+
456
+ @staticmethod
457
+ def from_dict(obj: dict) -> ManifestEntry:
458
+ return ManifestEntry.of(
459
+ url=obj.get("url"),
460
+ uri=obj.get("uri"),
461
+ meta=ManifestMeta.from_dict(obj.get("meta")),
462
+ mandatory=obj.get("mandatory", True),
463
+ uuid=obj.get("id"),
464
+ )
465
+
466
+ @staticmethod
467
+ def from_path(
468
+ path: str,
469
+ filesystem: pa.fs.FileSystem,
470
+ record_count: int,
471
+ source_content_length: Optional[int] = None,
472
+ content_type: Optional[str] = None,
473
+ content_encoding: Optional[str] = None,
474
+ credentials: Optional[Dict[str, str]] = None,
475
+ content_type_parameters: Optional[List[Dict[str, str]]] = None,
476
+ entry_type: Optional[EntryType] = None,
477
+ entry_params: Optional[EntryParams] = None,
478
+ schema_id: Optional[int] = None,
479
+ sort_scheme_id: Optional[str] = None,
480
+ ) -> ManifestEntry:
481
+ """
482
+ Creates a manifest entry from a path using a pyarrow filesystem.
483
+
484
+ Args:
485
+ path: Path to the file
486
+ filesystem: PyArrow filesystem to use for accessing the file
487
+ record_count: Number of records in the file
488
+ source_content_length: Optional original content length in-memory
489
+ before writing to disk.
490
+ content_type: Optional content type override. If not provided, will
491
+ be derived from file extension.
492
+ content_encoding: Optional content encoding override. If not
493
+ provided, will be derived from file extension.
494
+ credentials: Optional credentials required to read this manifest entry.
495
+ content_type_parameters: Optional content type parameters.
496
+ entry_type: Optional entry type of this manifest entry. Defaults to DATA.
497
+ entry_params: Optional entry type parameters.
498
+ schema_id: Schema ID used to write this manifest entry.
499
+ sort_scheme_id: Sort scheme ID used to write this manifest entry.
500
+
501
+ Returns:
502
+ A ManifestEntry instance
503
+ """
504
+ file_info = get_file_info(path, filesystem)
505
+ if file_info.type != pa.fs.FileType.File:
506
+ raise FileNotFoundError(f"Path does not point to a file: {path}")
507
+
508
+ # Extract extensions from right to left
509
+ # First split will get potential encoding extension
510
+ base_path, ext1 = posixpath.splitext(path)
511
+
512
+ # Initialize with defaults for no extensions
513
+ derived_content_type = ContentType.BINARY
514
+ derived_content_encoding = ContentEncoding.IDENTITY
515
+
516
+ # Only proceed with extension checks if we found at least one extension
517
+ if ext1:
518
+ # Check if the first extension is a known encoding
519
+ derived_content_encoding = EXT_TO_CONTENT_ENCODING.get(
520
+ ext1,
521
+ ContentEncoding.IDENTITY,
522
+ )
523
+
524
+ # Get second extension only if first was an encoding
525
+ if derived_content_encoding != ContentEncoding.IDENTITY:
526
+ # Second split will get potential content type extension
527
+ _, ext2 = posixpath.splitext(base_path)
528
+ if ext2:
529
+ derived_content_type = EXT_TO_CONTENT_TYPE.get(
530
+ ext2,
531
+ ContentType.BINARY,
532
+ )
533
+ else:
534
+ # First extension wasn't an encoding, check if it's a
535
+ # content type
536
+ derived_content_type = EXT_TO_CONTENT_TYPE.get(
537
+ ext1,
538
+ ContentType.BINARY,
539
+ )
540
+
541
+ if (
542
+ derived_content_type == ContentType.BINARY
543
+ and derived_content_encoding != ContentEncoding.IDENTITY
544
+ ):
545
+ logger.debug(
546
+ f"Found encoding {derived_content_encoding.value} but no "
547
+ f"content type for {path}, assuming binary"
548
+ )
549
+
550
+ # Use provided values if available, otherwise use derived values
551
+ final_content_type = (
552
+ content_type if content_type is not None else derived_content_type.value
553
+ )
554
+ final_content_encoding = (
555
+ content_encoding
556
+ if content_encoding is not None
557
+ else derived_content_encoding.value
558
+ )
559
+
560
+ manifest_entry_meta = ManifestMeta.of(
561
+ record_count=record_count,
562
+ content_length=file_info.size,
563
+ content_type=final_content_type,
564
+ content_encoding=final_content_encoding,
565
+ source_content_length=source_content_length,
566
+ credentials=credentials,
567
+ content_type_parameters=content_type_parameters,
568
+ entry_type=entry_type,
569
+ entry_params=entry_params,
570
+ schema_id=schema_id,
571
+ sort_scheme_id=sort_scheme_id,
572
+ )
573
+ manifest_entry = ManifestEntry.of(path, manifest_entry_meta)
574
+ return manifest_entry
575
+
576
+ @property
577
+ def uri(self) -> Optional[str]:
578
+ return self.get("uri")
579
+
580
+ @property
581
+ def url(self) -> Optional[str]:
582
+ return self.get("url")
583
+
584
+ @property
585
+ def meta(self) -> Optional[ManifestMeta]:
586
+ val: Dict[str, Any] = self.get("meta")
587
+ if val is not None and not isinstance(val, ManifestMeta):
588
+ self["meta"] = val = ManifestMeta(val)
589
+ return val
590
+
591
+ @property
592
+ def mandatory(self) -> bool:
593
+ return self["mandatory"]
594
+
595
+ @property
596
+ def id(self) -> Optional[str]:
597
+ return self.get("id")
598
+
599
+
600
+ class ManifestAuthor(dict):
601
+ @staticmethod
602
+ def of(name: Optional[str], version: Optional[str]) -> ManifestAuthor:
603
+ manifest_author = ManifestAuthor()
604
+ if name is not None:
605
+ manifest_author["name"] = name
606
+ if version is not None:
607
+ manifest_author["version"] = version
608
+ return manifest_author
609
+
610
+ @staticmethod
611
+ def from_dict(obj: dict) -> Optional[ManifestAuthor]:
612
+ if obj is None:
613
+ return None
614
+ return ManifestAuthor.of(obj.get("name"), obj.get("version"))
615
+
616
+ @property
617
+ def name(self) -> Optional[str]:
618
+ return self.get("name")
619
+
620
+ @property
621
+ def version(self) -> Optional[str]:
622
+ return self.get("version")
623
+
624
+
625
+ class ManifestEntryList(List[ManifestEntry]):
626
+ @staticmethod
627
+ def of(entries: List[ManifestEntry]) -> ManifestEntryList:
628
+ manifest_entries = ManifestEntryList()
629
+ for entry in entries:
630
+ if entry is not None and not isinstance(entry, ManifestEntry):
631
+ entry = ManifestEntry(entry)
632
+ manifest_entries.append(entry)
633
+ return manifest_entries
634
+
635
+ def __getitem__(self, item):
636
+ val = super().__getitem__(item)
637
+ if val is not None and not isinstance(val, ManifestEntry):
638
+ self[item] = val = ManifestEntry(val)
639
+ return val
640
+
641
+ def __iter__(self):
642
+ for i in range(len(self)):
643
+ yield self[i] # This triggers __getitem__ conversion