deltacat 2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. deltacat/__init__.py +117 -18
  2. deltacat/api.py +536 -126
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +1 -19
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2444 -282
  12. deltacat/catalog/model/catalog.py +208 -113
  13. deltacat/catalog/model/properties.py +63 -24
  14. deltacat/compute/__init__.py +14 -0
  15. deltacat/compute/compactor/compaction_session.py +97 -75
  16. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  17. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  18. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  19. deltacat/compute/compactor/repartition_session.py +8 -21
  20. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  21. deltacat/compute/compactor/steps/materialize.py +9 -7
  22. deltacat/compute/compactor/steps/repartition.py +12 -11
  23. deltacat/compute/compactor/utils/io.py +6 -5
  24. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  25. deltacat/compute/compactor/utils/system_columns.py +3 -1
  26. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  27. deltacat/compute/compactor_v2/constants.py +30 -1
  28. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  29. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  30. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  31. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  32. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  33. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  34. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  35. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  36. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  37. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  38. deltacat/compute/compactor_v2/utils/io.py +11 -4
  39. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  40. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  41. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  42. deltacat/compute/converter/constants.py +5 -0
  43. deltacat/compute/converter/converter_session.py +207 -52
  44. deltacat/compute/converter/model/convert_input.py +43 -16
  45. deltacat/compute/converter/model/convert_input_files.py +33 -16
  46. deltacat/compute/converter/model/convert_result.py +80 -0
  47. deltacat/compute/converter/model/converter_session_params.py +64 -19
  48. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  49. deltacat/compute/converter/pyiceberg/overrides.py +193 -65
  50. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  51. deltacat/compute/converter/steps/convert.py +230 -75
  52. deltacat/compute/converter/steps/dedupe.py +46 -12
  53. deltacat/compute/converter/utils/convert_task_options.py +66 -22
  54. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  55. deltacat/compute/converter/utils/iceberg_columns.py +13 -8
  56. deltacat/compute/converter/utils/io.py +173 -13
  57. deltacat/compute/converter/utils/s3u.py +42 -27
  58. deltacat/compute/janitor.py +205 -0
  59. deltacat/compute/jobs/client.py +417 -0
  60. deltacat/compute/resource_estimation/delta.py +38 -6
  61. deltacat/compute/resource_estimation/model.py +8 -0
  62. deltacat/constants.py +49 -6
  63. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  64. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  65. deltacat/env.py +10 -0
  66. deltacat/examples/basic_logging.py +6 -6
  67. deltacat/examples/compactor/aws/__init__.py +1 -0
  68. deltacat/examples/compactor/bootstrap.py +863 -0
  69. deltacat/examples/compactor/compactor.py +373 -0
  70. deltacat/examples/compactor/explorer.py +473 -0
  71. deltacat/examples/compactor/gcp/__init__.py +1 -0
  72. deltacat/examples/compactor/job_runner.py +439 -0
  73. deltacat/examples/compactor/utils/__init__.py +1 -0
  74. deltacat/examples/compactor/utils/common.py +261 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  80. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  81. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +66 -21
  82. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  83. deltacat/examples/hello_world.py +4 -2
  84. deltacat/examples/indexer/indexer.py +163 -0
  85. deltacat/examples/indexer/job_runner.py +198 -0
  86. deltacat/exceptions.py +66 -4
  87. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  88. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  89. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +43 -12
  90. deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +12 -14
  91. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  92. deltacat/experimental/converter_agent/__init__.py +0 -0
  93. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  94. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  95. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  96. deltacat/experimental/daft/__init__.py +4 -0
  97. deltacat/experimental/daft/daft_catalog.py +229 -0
  98. deltacat/experimental/storage/__init__.py +0 -0
  99. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  100. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  101. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  102. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  103. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  104. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  105. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  107. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  108. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  109. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  110. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  111. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  112. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  113. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  114. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  115. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  116. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  117. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  118. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  119. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  120. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  121. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  122. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  123. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  124. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  125. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  126. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  127. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  128. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  129. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  130. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  131. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  132. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  133. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  134. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  135. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  136. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  137. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  138. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  139. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  140. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  141. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  142. deltacat/io/__init__.py +13 -0
  143. deltacat/io/dataset/__init__.py +0 -0
  144. deltacat/io/dataset/deltacat_dataset.py +91 -0
  145. deltacat/io/datasink/__init__.py +0 -0
  146. deltacat/io/datasink/deltacat_datasink.py +207 -0
  147. deltacat/io/datasource/__init__.py +0 -0
  148. deltacat/io/datasource/deltacat_datasource.py +579 -0
  149. deltacat/io/reader/__init__.py +0 -0
  150. deltacat/io/reader/deltacat_read_api.py +172 -0
  151. deltacat/storage/__init__.py +22 -2
  152. deltacat/storage/interface.py +54 -32
  153. deltacat/storage/main/impl.py +1494 -541
  154. deltacat/storage/model/delta.py +27 -3
  155. deltacat/storage/model/expression/__init__.py +47 -0
  156. deltacat/storage/model/expression/expression.py +656 -0
  157. deltacat/storage/model/expression/visitor.py +248 -0
  158. deltacat/storage/model/locator.py +6 -12
  159. deltacat/storage/model/manifest.py +231 -6
  160. deltacat/storage/model/metafile.py +224 -119
  161. deltacat/storage/model/namespace.py +8 -1
  162. deltacat/storage/model/partition.py +117 -42
  163. deltacat/storage/model/scan/push_down.py +32 -5
  164. deltacat/storage/model/schema.py +2427 -159
  165. deltacat/storage/model/shard.py +6 -2
  166. deltacat/storage/model/sort_key.py +40 -0
  167. deltacat/storage/model/stream.py +9 -2
  168. deltacat/storage/model/table.py +12 -1
  169. deltacat/storage/model/table_version.py +11 -0
  170. deltacat/storage/model/transaction.py +1184 -208
  171. deltacat/storage/model/transform.py +81 -2
  172. deltacat/storage/model/types.py +53 -29
  173. deltacat/storage/util/__init__.py +0 -0
  174. deltacat/storage/util/scan_planner.py +26 -0
  175. deltacat/tests/_io/reader/__init__.py +0 -0
  176. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  177. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  178. deltacat/tests/aws/test_s3u.py +2 -31
  179. deltacat/tests/catalog/data/__init__.py +0 -0
  180. deltacat/tests/catalog/main/__init__.py +0 -0
  181. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  182. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  183. deltacat/tests/catalog/model/__init__.py +0 -0
  184. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  185. deltacat/tests/catalog/test_catalogs.py +103 -106
  186. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  187. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  188. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  189. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  190. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  191. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  192. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  193. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  194. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  195. deltacat/tests/compute/conftest.py +8 -44
  196. deltacat/tests/compute/converter/test_convert_session.py +697 -349
  197. deltacat/tests/compute/converter/utils.py +15 -6
  198. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  199. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  200. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  201. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  202. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  203. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  204. deltacat/tests/compute/test_janitor.py +236 -0
  205. deltacat/tests/compute/test_util_common.py +716 -43
  206. deltacat/tests/compute/test_util_constant.py +0 -1
  207. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  208. deltacat/tests/daft/__init__.py +0 -0
  209. deltacat/tests/daft/test_model.py +97 -0
  210. deltacat/tests/experimental/__init__.py +1 -0
  211. deltacat/tests/experimental/catalog/__init__.py +0 -0
  212. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  213. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  214. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  215. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  216. deltacat/tests/experimental/daft/__init__.py +0 -0
  217. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  218. deltacat/tests/experimental/storage/__init__.py +0 -0
  219. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  220. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  221. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  222. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
  223. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  224. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  225. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  226. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  227. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  228. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  229. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  230. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  231. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
  232. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  233. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  234. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  235. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  236. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  237. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  238. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  239. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  240. deltacat/tests/storage/model/test_expression.py +327 -0
  241. deltacat/tests/storage/model/test_manifest.py +129 -0
  242. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  243. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  244. deltacat/tests/storage/model/test_schema.py +171 -0
  245. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  246. deltacat/tests/storage/model/test_shard.py +3 -1
  247. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  248. deltacat/tests/storage/model/test_transaction.py +393 -48
  249. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  250. deltacat/tests/test_deltacat_api.py +1036 -11
  251. deltacat/tests/test_exceptions.py +9 -5
  252. deltacat/tests/test_utils/pyarrow.py +52 -21
  253. deltacat/tests/test_utils/storage.py +23 -34
  254. deltacat/tests/types/__init__.py +0 -0
  255. deltacat/tests/types/test_tables.py +104 -0
  256. deltacat/tests/utils/exceptions.py +22 -0
  257. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  258. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  259. deltacat/tests/utils/test_daft.py +121 -31
  260. deltacat/tests/utils/test_numpy.py +1193 -0
  261. deltacat/tests/utils/test_pandas.py +1106 -0
  262. deltacat/tests/utils/test_polars.py +1040 -0
  263. deltacat/tests/utils/test_pyarrow.py +1370 -89
  264. deltacat/types/media.py +345 -37
  265. deltacat/types/tables.py +2344 -46
  266. deltacat/utils/arguments.py +33 -1
  267. deltacat/utils/daft.py +824 -40
  268. deltacat/utils/export.py +3 -1
  269. deltacat/utils/filesystem.py +139 -9
  270. deltacat/utils/metafile_locator.py +2 -1
  271. deltacat/utils/numpy.py +118 -26
  272. deltacat/utils/pandas.py +577 -48
  273. deltacat/utils/polars.py +759 -0
  274. deltacat/utils/pyarrow.py +1373 -192
  275. deltacat/utils/ray_utils/concurrency.py +1 -1
  276. deltacat/utils/ray_utils/dataset.py +101 -10
  277. deltacat/utils/ray_utils/runtime.py +56 -4
  278. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  279. deltacat/utils/url.py +1325 -0
  280. deltacat-2.0.0.dist-info/METADATA +1163 -0
  281. deltacat-2.0.0.dist-info/RECORD +439 -0
  282. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  283. deltacat/catalog/iceberg/__init__.py +0 -4
  284. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  285. deltacat/compute/merge_on_read/__init__.py +0 -4
  286. deltacat/compute/merge_on_read/daft.py +0 -40
  287. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  288. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  289. deltacat/examples/common/fixtures.py +0 -15
  290. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  291. deltacat/storage/rivulet/__init__.py +0 -11
  292. deltacat/storage/rivulet/feather/__init__.py +0 -5
  293. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  294. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  295. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  296. deltacat/tests/local_deltacat_storage/__init__.py +0 -1235
  297. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  298. deltacat/utils/s3fs.py +0 -21
  299. deltacat-2.0.dist-info/METADATA +0 -65
  300. deltacat-2.0.dist-info/RECORD +0 -347
  301. /deltacat/compute/{merge_on_read/model → jobs}/__init__.py +0 -0
  302. /deltacat/{compute/merge_on_read/utils → docs}/__init__.py +0 -0
  303. /deltacat/{examples/common → docs/autogen}/__init__.py +0 -0
  304. /deltacat/{examples/iceberg → docs/autogen/schema}/__init__.py +0 -0
  305. /deltacat/{storage/iceberg → docs/autogen/schema/inference}/__init__.py +0 -0
  306. /deltacat/{storage/rivulet/arrow → examples/compactor}/__init__.py +0 -0
  307. /deltacat/{storage/rivulet/fs → examples/experimental}/__init__.py +0 -0
  308. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg}/__init__.py +0 -0
  309. /deltacat/{storage/rivulet/reader → examples/experimental/iceberg/converter}/__init__.py +0 -0
  310. /deltacat/{storage/rivulet/schema → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  311. /deltacat/{storage/rivulet/writer → examples/indexer}/__init__.py +0 -0
  312. /deltacat/{tests/storage/rivulet → examples/indexer/aws}/__init__.py +0 -0
  313. /deltacat/{tests/storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
  314. /deltacat/{tests/storage/rivulet/schema → experimental}/__init__.py +0 -0
  315. /deltacat/{tests/storage/rivulet/writer → experimental/catalog}/__init__.py +0 -0
  316. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/compatibility/__init__.py} +0 -0
  317. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  318. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  319. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  320. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  321. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  322. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  323. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  324. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,327 @@
1
+ import pytest
2
+ import pyarrow as pa
3
+
4
+ from deltacat.storage.model.expression import (
5
+ Reference,
6
+ Literal,
7
+ Equal,
8
+ NotEqual,
9
+ GreaterThan,
10
+ LessThan,
11
+ GreaterThanEqual,
12
+ LessThanEqual,
13
+ And,
14
+ Or,
15
+ Not,
16
+ In,
17
+ Between,
18
+ Like,
19
+ IsNull,
20
+ )
21
+ from deltacat.storage.model.expression.visitor import DisplayVisitor, ExpressionVisitor
22
+
23
+
24
+ @pytest.fixture
25
+ def field_ref():
26
+ return Reference("field1")
27
+
28
+
29
+ @pytest.fixture
30
+ def field_ref2():
31
+ return Reference("field2")
32
+
33
+
34
+ @pytest.fixture
35
+ def literal_int():
36
+ return Literal(pa.scalar(42))
37
+
38
+
39
+ @pytest.fixture
40
+ def literal_str():
41
+ return Literal(pa.scalar("test"))
42
+
43
+
44
+ @pytest.fixture
45
+ def display_visitor():
46
+ return DisplayVisitor()
47
+
48
+
49
+ class TestExpressionLibrary:
50
+ """Test suite for the Deltacat expression library."""
51
+
52
+ def test_reference_creation(self):
53
+ ref = Reference("field1")
54
+ assert ref.field == "field1"
55
+ assert ref.index is None
56
+
57
+ def test_reference_with_index(self):
58
+ ref = Reference("field1", 0)
59
+ assert ref.field == "field1"
60
+ assert ref.index == 0
61
+
62
+ def test_literal_creation(self):
63
+ lit = Literal(pa.scalar(42))
64
+ assert lit.value.as_py() == 42
65
+
66
+ # Test the factory methods (.of)
67
+ def test_factory_methods(self):
68
+ # Reference.of
69
+ ref = Reference.of("field1")
70
+ assert ref.field == "field1"
71
+
72
+ # Literal.of
73
+ lit = Literal.of(42)
74
+ assert lit.value.as_py() == 42
75
+
76
+ # Equal.of with mixed types
77
+ eq = Equal.of("field1", 42)
78
+ assert isinstance(eq.left, Literal)
79
+ assert isinstance(eq.right, Literal)
80
+ assert eq.left.value.as_py() == "field1"
81
+ assert eq.right.value.as_py() == 42
82
+
83
+ # Not.of
84
+ not_expr = Not.of(Equal.of("field1", 42))
85
+ assert isinstance(not_expr.operand, Equal)
86
+
87
+ # In.of
88
+ in_expr = In.of("field1", [1, 2, 3])
89
+ assert isinstance(in_expr.value, Literal)
90
+ assert len(in_expr.values) == 3
91
+ assert all(isinstance(v, Literal) for v in in_expr.values)
92
+
93
+ # Between.of
94
+ between_expr = Between.of("field1", 10, 20)
95
+ assert isinstance(between_expr.value, Literal)
96
+ assert between_expr.lower.value.as_py() == 10
97
+ assert between_expr.upper.value.as_py() == 20
98
+
99
+ # Like.of
100
+ like_expr = Like.of("field1", "%test%")
101
+ assert isinstance(like_expr.value, Literal)
102
+ assert like_expr.pattern.value.as_py() == "%test%"
103
+
104
+ # Test reference comparison helper methods
105
+ def test_reference_comparison_helpers(self, field_ref):
106
+ # Test eq, ne, gt, lt, ge, le methods
107
+ eq_expr = field_ref.eq(42)
108
+ assert isinstance(eq_expr, Equal)
109
+ assert eq_expr.left == field_ref
110
+ assert eq_expr.right.value.as_py() == 42
111
+
112
+ ne_expr = field_ref.ne(42)
113
+ assert isinstance(ne_expr, NotEqual)
114
+
115
+ gt_expr = field_ref.gt(42)
116
+ assert isinstance(gt_expr, GreaterThan)
117
+
118
+ lt_expr = field_ref.lt(42)
119
+ assert isinstance(lt_expr, LessThan)
120
+
121
+ ge_expr = field_ref.ge(42)
122
+ assert isinstance(ge_expr, GreaterThanEqual)
123
+
124
+ le_expr = field_ref.le(42)
125
+ assert isinstance(le_expr, LessThanEqual)
126
+
127
+ # Test reference special operation helpers
128
+ def test_reference_special_helpers(self, field_ref):
129
+ # Test is_null, in_, between, like methods
130
+ is_null_expr = field_ref.is_null()
131
+ assert isinstance(is_null_expr, IsNull)
132
+ assert is_null_expr.operand == field_ref
133
+
134
+ in_expr = field_ref.in_([1, 2, 3])
135
+ assert isinstance(in_expr, In)
136
+ assert in_expr.value == field_ref
137
+ assert len(in_expr.values) == 3
138
+ assert in_expr.values[0].value.as_py() == 1
139
+
140
+ between_expr = field_ref.between(10, 20)
141
+ assert isinstance(between_expr, Between)
142
+ assert between_expr.value == field_ref
143
+ assert between_expr.lower.value.as_py() == 10
144
+ assert between_expr.upper.value.as_py() == 20
145
+
146
+ like_expr = field_ref.like("%test%")
147
+ assert isinstance(like_expr, Like)
148
+ assert like_expr.value == field_ref
149
+ assert like_expr.pattern.value.as_py() == "%test%"
150
+
151
+ # Test boolean expression helper methods
152
+ def test_boolean_expression_helpers(self, field_ref):
153
+ # Test and_, or_, not_ methods
154
+ expr1 = field_ref.eq(42)
155
+ expr2 = field_ref.gt(10)
156
+
157
+ and_expr = expr1.and_(expr2)
158
+ assert isinstance(and_expr, And)
159
+ assert and_expr.left == expr1
160
+ assert and_expr.right == expr2
161
+
162
+ or_expr = expr1.or_(expr2)
163
+ assert isinstance(or_expr, Or)
164
+ assert or_expr.left == expr1
165
+ assert or_expr.right == expr2
166
+
167
+ not_expr = expr1.not_()
168
+ assert isinstance(not_expr, Not)
169
+ assert not_expr.operand == expr1
170
+
171
+ # Test building complex expressions
172
+ def test_complex_expression_building(self, field_ref, field_ref2):
173
+ # Test building more complex expressions using method chaining
174
+ expr = field_ref.eq(42).and_(field_ref2.gt(10)).or_(field_ref.is_null()).not_()
175
+
176
+ assert isinstance(expr, Not)
177
+ assert isinstance(expr.operand, Or)
178
+ assert isinstance(expr.operand.left, And)
179
+ assert isinstance(expr.operand.right, IsNull)
180
+
181
+ # Test DisplayVisitor for different expression types
182
+ def test_reference_display(self, field_ref, display_visitor):
183
+ assert display_visitor.visit(field_ref) == "field1"
184
+
185
+ def test_literal_display(self, literal_int, literal_str, display_visitor):
186
+ assert display_visitor.visit(literal_int) == "42"
187
+ assert display_visitor.visit(literal_str) == "test"
188
+
189
+ def test_comparison_display(self, field_ref, literal_int, display_visitor):
190
+ assert display_visitor.visit(Equal(field_ref, literal_int)) == "field1 = 42"
191
+ assert display_visitor.visit(NotEqual(field_ref, literal_int)) == "field1 <> 42"
192
+ assert (
193
+ display_visitor.visit(GreaterThan(field_ref, literal_int)) == "field1 > 42"
194
+ )
195
+ assert display_visitor.visit(LessThan(field_ref, literal_int)) == "field1 < 42"
196
+ assert (
197
+ display_visitor.visit(GreaterThanEqual(field_ref, literal_int))
198
+ == "field1 >= 42"
199
+ )
200
+ assert (
201
+ display_visitor.visit(LessThanEqual(field_ref, literal_int))
202
+ == "field1 <= 42"
203
+ )
204
+
205
+ def test_logical_operator_display(self, field_ref, literal_int, display_visitor):
206
+ eq_expr = Equal(field_ref, literal_int)
207
+ gt_expr = GreaterThan(field_ref, literal_int)
208
+
209
+ assert (
210
+ display_visitor.visit(And(eq_expr, gt_expr))
211
+ == "(field1 = 42 AND field1 > 42)"
212
+ )
213
+ assert (
214
+ display_visitor.visit(Or(eq_expr, gt_expr))
215
+ == "(field1 = 42 OR field1 > 42)"
216
+ )
217
+ assert display_visitor.visit(Not(eq_expr)) == "NOT (field1 = 42)"
218
+
219
+ def test_special_operator_display(self, field_ref, display_visitor):
220
+ assert display_visitor.visit(IsNull(field_ref)) == "(field1) IS NULL"
221
+
222
+ values = [Literal(pa.scalar(1)), Literal(pa.scalar(2)), Literal(pa.scalar(3))]
223
+ assert display_visitor.visit(In(field_ref, values)) == "field1 IN (1, 2, 3)"
224
+
225
+ lower = Literal(pa.scalar(10))
226
+ upper = Literal(pa.scalar(20))
227
+ assert (
228
+ display_visitor.visit(Between(field_ref, lower, upper))
229
+ == "field1 BETWEEN 10 AND 20"
230
+ )
231
+
232
+ pattern = Literal(pa.scalar("%test%"))
233
+ assert display_visitor.visit(Like(field_ref, pattern)) == "field1 LIKE %test%"
234
+
235
+ def test_complex_expression_display(self, field_ref, field_ref2, display_visitor):
236
+ expr = field_ref.eq(42).and_(field_ref2.gt(10)).or_(field_ref.is_null()).not_()
237
+
238
+ # Check that the DisplayVisitor correctly formats the complex expression
239
+ assert (
240
+ display_visitor.visit(expr)
241
+ == "NOT (((field1 = 42 AND field2 > 10) OR (field1) IS NULL))"
242
+ )
243
+
244
+ # Test BinaryExpression with_ methods
245
+ def test_binary_expression_with_methods(self, field_ref, field_ref2, literal_int):
246
+ eq_expr = Equal(field_ref, literal_int)
247
+
248
+ # Test with_left
249
+ new_expr = eq_expr.with_left(field_ref2)
250
+ assert isinstance(new_expr, Equal)
251
+ assert new_expr.left == field_ref2
252
+ assert new_expr.right == literal_int
253
+
254
+ # Test with_right
255
+ new_lit = Literal(pa.scalar(100))
256
+ new_expr = eq_expr.with_right(new_lit)
257
+ assert new_expr.left == field_ref
258
+ assert new_expr.right == new_lit
259
+
260
+ # Test __str__ method which uses DisplayVisitor
261
+ def test_expression_str_method(self, field_ref, literal_int):
262
+ eq_expr = Equal(field_ref, literal_int)
263
+ assert str(eq_expr) == "field1 = 42"
264
+
265
+ # Test proper parenthesization in complex expressions
266
+ def test_nested_parentheses(self, field_ref, field_ref2, display_visitor):
267
+ # Create a complex expression: (field1 = 1 AND field2 = 2) OR field2 = 3
268
+ expr1 = Equal(field_ref, Literal(pa.scalar(1)))
269
+ expr2 = Equal(field_ref2, Literal(pa.scalar(2)))
270
+ expr3 = Equal(field_ref2, Literal(pa.scalar(3)))
271
+
272
+ and_expr = And(expr1, expr2)
273
+ or_expr = Or(and_expr, expr3)
274
+
275
+ assert (
276
+ display_visitor.visit(or_expr)
277
+ == "((field1 = 1 AND field2 = 2) OR field2 = 3)"
278
+ )
279
+
280
+ # Test Literal comparison methods
281
+ def test_literal_comparison_methods(self, literal_int):
282
+ eq_expr = literal_int.eq("test")
283
+ assert isinstance(eq_expr, Equal)
284
+ assert eq_expr.left == literal_int
285
+ assert eq_expr.right.value.as_py() == "test"
286
+
287
+ ne_expr = literal_int.ne("test")
288
+ assert isinstance(ne_expr, NotEqual)
289
+ assert ne_expr.left == literal_int
290
+ assert ne_expr.right.value.as_py() == "test"
291
+
292
+ # Test a custom ExpressionVisitor implementation
293
+ def test_custom_visitor(self, field_ref, literal_int):
294
+ class CountingVisitor(ExpressionVisitor[None, int]):
295
+ """Simple visitor that counts expression nodes"""
296
+
297
+ def visit_reference(self, expr, context=None):
298
+ return 1
299
+
300
+ def visit_literal(self, expr, context=None):
301
+ return 1
302
+
303
+ def visit_binary_expression(self, expr, left, right, context=None):
304
+ return left + right + 1
305
+
306
+ def visit_unary_expression(self, expr, operand, context=None):
307
+ return operand + 1
308
+
309
+ def visit_in(self, expr, context=None):
310
+ return 1 + len(expr.values) + 1 # value + all values + In operator
311
+
312
+ def visit_between(self, expr, context=None):
313
+ return 3 # value + lower + upper
314
+
315
+ def visit_like(self, expr, context=None):
316
+ return 2 # value + pattern
317
+
318
+ visitor = CountingVisitor()
319
+
320
+ # Count nodes in simple expressions
321
+ assert visitor.visit(field_ref) == 1
322
+ assert visitor.visit(literal_int) == 1
323
+ assert visitor.visit(Equal(field_ref, literal_int)) == 3 # left + right + Equal
324
+
325
+ # Count nodes in a more complex expression
326
+ expr = field_ref.eq(42).and_(field_ref.gt(10))
327
+ assert visitor.visit(expr) == 7 # (1+1+1) + (1+1+1) + 1
@@ -0,0 +1,129 @@
1
+ import json
2
+
3
+ import pytest
4
+
5
+ from deltacat.storage.model.manifest import Manifest, ManifestEntry
6
+
7
+
8
+ @pytest.fixture
9
+ def manifest_a():
10
+ return """
11
+ {
12
+ "entries":[
13
+ {
14
+ "uri":"s3://test_bucket/file1.tsv.gz",
15
+ "mandatory":true,
16
+ "meta":{
17
+ "record_count":0,
18
+ "content_length":123,
19
+ "source_content_length":0,
20
+ "content_type":"application/x-amzn-unescaped-tsv",
21
+ "content_encoding":"gzip"
22
+ }
23
+ },
24
+ {
25
+ "uri":"s3://test_bucket/file2.tsv.gz",
26
+ "mandatory":true,
27
+ "meta":{
28
+ "record_count":0,
29
+ "content_length":456,
30
+ "source_content_length":0,
31
+ "content_type":"application/x-amzn-unescaped-tsv",
32
+ "content_encoding":"gzip"
33
+ }
34
+ }
35
+ ],
36
+ "meta":{
37
+ "record_count":0,
38
+ "content_length":579,
39
+ "source_content_length":0,
40
+ "content_type":"application/x-amzn-unescaped-tsv",
41
+ "content_encoding":"gzip"
42
+ },
43
+ "id":"052f62c0-5082-4935-9937-18a705156123",
44
+ "author":{
45
+ "name":"Dave",
46
+ "version":"1.0"
47
+ }
48
+ }
49
+ """
50
+
51
+
52
+ @pytest.fixture
53
+ def manifest_no_author():
54
+ return """
55
+ {
56
+ "entries":[
57
+ {
58
+ "uri":"s3://test_bucket/file1.tsv.gz",
59
+ "mandatory":true,
60
+ "meta":{
61
+ "record_count":0,
62
+ "content_length":123,
63
+ "source_content_length":0,
64
+ "content_type":"application/x-amzn-unescaped-tsv",
65
+ "content_encoding":"gzip"
66
+ }
67
+ },
68
+ {
69
+ "uri":"s3://test_bucket/file2.tsv.gz",
70
+ "mandatory":true,
71
+ "meta":{
72
+ "record_count":0,
73
+ "content_length":456,
74
+ "source_content_length":0,
75
+ "content_type":"application/x-amzn-unescaped-tsv",
76
+ "content_encoding":"gzip"
77
+ }
78
+ }
79
+ ],
80
+ "meta":{
81
+ "record_count":0,
82
+ "content_length":579,
83
+ "source_content_length":0,
84
+ "content_type":"application/x-amzn-unescaped-tsv",
85
+ "content_encoding":"gzip"
86
+ },
87
+ "id":"052f62c0-5082-4935-9937-18a705156123"
88
+ }
89
+ """
90
+
91
+
92
+ @pytest.fixture()
93
+ def manifest_entry_no_meta():
94
+ return """
95
+ {
96
+ "uri":"s3://test_bucket/file1.tsv.gz",
97
+ "mandatory":true
98
+ }
99
+ """
100
+
101
+
102
+ def test_manifest_from_json(manifest_a):
103
+ manifest = Manifest.from_json(manifest_a)
104
+
105
+ assert manifest.entries is not None
106
+ assert len(manifest.entries) == 2
107
+ assert manifest.entries[0].uri == "s3://test_bucket/file1.tsv.gz"
108
+ assert manifest.entries[0].meta.record_count == 0
109
+ assert manifest.meta.content_length == 579
110
+ assert manifest.author.name == "Dave"
111
+
112
+
113
+ def test_manifest_from_json_no_author(manifest_no_author):
114
+ manifest = Manifest.from_json(manifest_no_author)
115
+
116
+ assert manifest.entries is not None
117
+ assert len(manifest.entries) == 2
118
+ assert manifest.entries[0].uri == "s3://test_bucket/file1.tsv.gz"
119
+ assert manifest.entries[0].meta is not None
120
+ assert manifest.author is None
121
+
122
+
123
+ def test_manifest_entry_from_dict_no_meta(manifest_entry_no_meta):
124
+ entry = ManifestEntry.from_dict(json.loads(manifest_entry_no_meta))
125
+
126
+ assert entry is not None
127
+ assert entry.meta is None
128
+ assert entry.uri == "s3://test_bucket/file1.tsv.gz"
129
+ assert entry.mandatory is True