deltacat 1.1.36__py3-none-any.whl → 2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. deltacat/__init__.py +42 -3
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +168 -0
  4. deltacat/aws/s3u.py +4 -4
  5. deltacat/benchmarking/benchmark_engine.py +82 -0
  6. deltacat/benchmarking/benchmark_report.py +86 -0
  7. deltacat/benchmarking/benchmark_suite.py +11 -0
  8. deltacat/benchmarking/conftest.py +21 -0
  9. deltacat/benchmarking/data/random_row_generator.py +94 -0
  10. deltacat/benchmarking/data/row_generator.py +10 -0
  11. deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
  12. deltacat/catalog/__init__.py +14 -0
  13. deltacat/catalog/delegate.py +199 -106
  14. deltacat/catalog/iceberg/__init__.py +4 -0
  15. deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
  16. deltacat/catalog/iceberg/impl.py +368 -0
  17. deltacat/catalog/iceberg/overrides.py +74 -0
  18. deltacat/catalog/interface.py +273 -76
  19. deltacat/catalog/main/impl.py +720 -0
  20. deltacat/catalog/model/catalog.py +227 -20
  21. deltacat/catalog/model/properties.py +116 -0
  22. deltacat/catalog/model/table_definition.py +32 -1
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +5 -5
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +1 -1
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +1 -1
  32. deltacat/compute/compactor/steps/materialize.py +6 -2
  33. deltacat/compute/compactor/utils/io.py +1 -1
  34. deltacat/compute/compactor/utils/sort_key.py +9 -2
  35. deltacat/compute/compactor_v2/compaction_session.py +5 -9
  36. deltacat/compute/compactor_v2/constants.py +1 -30
  37. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  38. deltacat/compute/compactor_v2/model/merge_input.py +1 -7
  39. deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
  40. deltacat/compute/compactor_v2/steps/merge.py +17 -126
  41. deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
  42. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  43. deltacat/compute/compactor_v2/utils/io.py +1 -1
  44. deltacat/compute/compactor_v2/utils/merge.py +0 -1
  45. deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
  46. deltacat/compute/compactor_v2/utils/task_options.py +23 -43
  47. deltacat/compute/converter/constants.py +4 -0
  48. deltacat/compute/converter/converter_session.py +143 -0
  49. deltacat/compute/converter/model/convert_input.py +69 -0
  50. deltacat/compute/converter/model/convert_input_files.py +61 -0
  51. deltacat/compute/converter/model/converter_session_params.py +99 -0
  52. deltacat/compute/converter/pyiceberg/__init__.py +0 -0
  53. deltacat/compute/converter/pyiceberg/catalog.py +75 -0
  54. deltacat/compute/converter/pyiceberg/overrides.py +135 -0
  55. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
  56. deltacat/compute/converter/steps/__init__.py +0 -0
  57. deltacat/compute/converter/steps/convert.py +211 -0
  58. deltacat/compute/converter/steps/dedupe.py +60 -0
  59. deltacat/compute/converter/utils/__init__.py +0 -0
  60. deltacat/compute/converter/utils/convert_task_options.py +88 -0
  61. deltacat/compute/converter/utils/converter_session_utils.py +109 -0
  62. deltacat/compute/converter/utils/iceberg_columns.py +82 -0
  63. deltacat/compute/converter/utils/io.py +43 -0
  64. deltacat/compute/converter/utils/s3u.py +133 -0
  65. deltacat/compute/resource_estimation/delta.py +1 -19
  66. deltacat/constants.py +47 -1
  67. deltacat/env.py +51 -0
  68. deltacat/examples/__init__.py +0 -0
  69. deltacat/examples/basic_logging.py +101 -0
  70. deltacat/examples/common/__init__.py +0 -0
  71. deltacat/examples/common/fixtures.py +15 -0
  72. deltacat/examples/hello_world.py +27 -0
  73. deltacat/examples/iceberg/__init__.py +0 -0
  74. deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
  75. deltacat/examples/iceberg/iceberg_reader.py +149 -0
  76. deltacat/exceptions.py +51 -9
  77. deltacat/logs.py +4 -1
  78. deltacat/storage/__init__.py +118 -28
  79. deltacat/storage/iceberg/__init__.py +0 -0
  80. deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
  81. deltacat/storage/iceberg/impl.py +737 -0
  82. deltacat/storage/iceberg/model.py +709 -0
  83. deltacat/storage/interface.py +217 -134
  84. deltacat/storage/main/__init__.py +0 -0
  85. deltacat/storage/main/impl.py +2077 -0
  86. deltacat/storage/model/delta.py +118 -71
  87. deltacat/storage/model/interop.py +24 -0
  88. deltacat/storage/model/list_result.py +8 -0
  89. deltacat/storage/model/locator.py +93 -3
  90. deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
  91. deltacat/storage/model/metafile.py +1316 -0
  92. deltacat/storage/model/namespace.py +34 -18
  93. deltacat/storage/model/partition.py +362 -37
  94. deltacat/storage/model/scan/__init__.py +0 -0
  95. deltacat/storage/model/scan/push_down.py +19 -0
  96. deltacat/storage/model/scan/scan_plan.py +10 -0
  97. deltacat/storage/model/scan/scan_task.py +34 -0
  98. deltacat/storage/model/schema.py +892 -0
  99. deltacat/storage/model/shard.py +47 -0
  100. deltacat/storage/model/sort_key.py +170 -13
  101. deltacat/storage/model/stream.py +208 -80
  102. deltacat/storage/model/table.py +123 -29
  103. deltacat/storage/model/table_version.py +322 -46
  104. deltacat/storage/model/transaction.py +757 -0
  105. deltacat/storage/model/transform.py +198 -61
  106. deltacat/storage/model/types.py +111 -13
  107. deltacat/storage/rivulet/__init__.py +11 -0
  108. deltacat/storage/rivulet/arrow/__init__.py +0 -0
  109. deltacat/storage/rivulet/arrow/serializer.py +75 -0
  110. deltacat/storage/rivulet/dataset.py +744 -0
  111. deltacat/storage/rivulet/dataset_executor.py +87 -0
  112. deltacat/storage/rivulet/feather/__init__.py +5 -0
  113. deltacat/storage/rivulet/feather/file_reader.py +136 -0
  114. deltacat/storage/rivulet/feather/serializer.py +35 -0
  115. deltacat/storage/rivulet/fs/__init__.py +0 -0
  116. deltacat/storage/rivulet/fs/file_provider.py +105 -0
  117. deltacat/storage/rivulet/fs/file_store.py +130 -0
  118. deltacat/storage/rivulet/fs/input_file.py +76 -0
  119. deltacat/storage/rivulet/fs/output_file.py +86 -0
  120. deltacat/storage/rivulet/logical_plan.py +105 -0
  121. deltacat/storage/rivulet/metastore/__init__.py +0 -0
  122. deltacat/storage/rivulet/metastore/delta.py +190 -0
  123. deltacat/storage/rivulet/metastore/json_sst.py +105 -0
  124. deltacat/storage/rivulet/metastore/sst.py +82 -0
  125. deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  126. deltacat/storage/rivulet/mvp/Table.py +101 -0
  127. deltacat/storage/rivulet/mvp/__init__.py +5 -0
  128. deltacat/storage/rivulet/parquet/__init__.py +5 -0
  129. deltacat/storage/rivulet/parquet/data_reader.py +0 -0
  130. deltacat/storage/rivulet/parquet/file_reader.py +127 -0
  131. deltacat/storage/rivulet/parquet/serializer.py +37 -0
  132. deltacat/storage/rivulet/reader/__init__.py +0 -0
  133. deltacat/storage/rivulet/reader/block_scanner.py +378 -0
  134. deltacat/storage/rivulet/reader/data_reader.py +136 -0
  135. deltacat/storage/rivulet/reader/data_scan.py +63 -0
  136. deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
  137. deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
  138. deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
  139. deltacat/storage/rivulet/reader/query_expression.py +99 -0
  140. deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
  141. deltacat/storage/rivulet/schema/__init__.py +0 -0
  142. deltacat/storage/rivulet/schema/datatype.py +128 -0
  143. deltacat/storage/rivulet/schema/schema.py +251 -0
  144. deltacat/storage/rivulet/serializer.py +40 -0
  145. deltacat/storage/rivulet/serializer_factory.py +42 -0
  146. deltacat/storage/rivulet/writer/__init__.py +0 -0
  147. deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
  148. deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
  149. deltacat/tests/_io/__init__.py +1 -0
  150. deltacat/tests/catalog/test_catalogs.py +324 -0
  151. deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
  152. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  153. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  154. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  155. deltacat/tests/compute/compact_partition_test_cases.py +19 -53
  156. deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
  157. deltacat/tests/compute/compactor/utils/test_io.py +6 -8
  158. deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
  159. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
  160. deltacat/tests/compute/conftest.py +75 -0
  161. deltacat/tests/compute/converter/__init__.py +0 -0
  162. deltacat/tests/compute/converter/conftest.py +80 -0
  163. deltacat/tests/compute/converter/test_convert_session.py +478 -0
  164. deltacat/tests/compute/converter/utils.py +123 -0
  165. deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
  166. deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
  167. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
  168. deltacat/tests/compute/test_compact_partition_params.py +3 -3
  169. deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
  170. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
  171. deltacat/tests/compute/test_util_common.py +19 -12
  172. deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
  173. deltacat/tests/local_deltacat_storage/__init__.py +76 -103
  174. deltacat/tests/storage/__init__.py +0 -0
  175. deltacat/tests/storage/conftest.py +25 -0
  176. deltacat/tests/storage/main/__init__.py +0 -0
  177. deltacat/tests/storage/main/test_main_storage.py +1399 -0
  178. deltacat/tests/storage/model/__init__.py +0 -0
  179. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  180. deltacat/tests/storage/model/test_metafile_io.py +2535 -0
  181. deltacat/tests/storage/model/test_schema.py +308 -0
  182. deltacat/tests/storage/model/test_shard.py +22 -0
  183. deltacat/tests/storage/model/test_table_version.py +110 -0
  184. deltacat/tests/storage/model/test_transaction.py +308 -0
  185. deltacat/tests/storage/rivulet/__init__.py +0 -0
  186. deltacat/tests/storage/rivulet/conftest.py +149 -0
  187. deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
  188. deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
  189. deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
  190. deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
  191. deltacat/tests/storage/rivulet/test_dataset.py +406 -0
  192. deltacat/tests/storage/rivulet/test_manifest.py +67 -0
  193. deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
  194. deltacat/tests/storage/rivulet/test_utils.py +122 -0
  195. deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
  196. deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
  197. deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
  198. deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  199. deltacat/tests/test_deltacat_api.py +39 -0
  200. deltacat/tests/test_utils/filesystem.py +14 -0
  201. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  202. deltacat/tests/test_utils/pyarrow.py +8 -15
  203. deltacat/tests/test_utils/storage.py +266 -3
  204. deltacat/tests/utils/test_daft.py +3 -3
  205. deltacat/tests/utils/test_pyarrow.py +0 -432
  206. deltacat/types/partial_download.py +1 -1
  207. deltacat/types/tables.py +1 -1
  208. deltacat/utils/export.py +59 -0
  209. deltacat/utils/filesystem.py +320 -0
  210. deltacat/utils/metafile_locator.py +73 -0
  211. deltacat/utils/pyarrow.py +36 -183
  212. deltacat-2.0.dist-info/METADATA +65 -0
  213. deltacat-2.0.dist-info/RECORD +347 -0
  214. deltacat/aws/redshift/__init__.py +0 -19
  215. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  216. deltacat/io/dataset.py +0 -73
  217. deltacat/io/read_api.py +0 -143
  218. deltacat/storage/model/delete_parameters.py +0 -40
  219. deltacat/storage/model/partition_spec.py +0 -71
  220. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
  221. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
  222. deltacat-1.1.36.dist-info/METADATA +0 -64
  223. deltacat-1.1.36.dist-info/RECORD +0 -219
  224. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  225. /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
  226. /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
  227. /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
  228. /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
  229. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  230. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  231. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  232. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  233. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  234. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
  235. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
  236. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -1,253 +0,0 @@
1
- import ray
2
- from typing import Dict, Any
3
- from deltacat.types.media import ContentType
4
- import pyarrow as pa
5
-
6
- import pytest
7
- import deltacat.tests.local_deltacat_storage as ds
8
- import os
9
- from deltacat.tests.test_utils.pyarrow import (
10
- stage_partition_from_file_paths,
11
- commit_delta_to_staged_partition,
12
- )
13
- from deltacat.utils.pyarrow import (
14
- ReadKwargsProviderPyArrowCsvPureUtf8,
15
- ReadKwargsProviderPyArrowSchemaOverride,
16
- )
17
-
18
- DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
19
- "db_file_path",
20
- "deltacat/tests/local_deltacat_storage/db_test.sqlite",
21
- )
22
-
23
-
24
- class TestContentTypeParams:
25
- TEST_NAMESPACE = "test_content_type_params"
26
- TEST_ENTRY_INDEX = 0
27
- DEDUPE_BASE_COMPACTED_TABLE_STRING_PK = "deltacat/tests/compute/compactor_v2/steps/data/dedupe_base_compacted_table_string_pk.csv"
28
- DEDUPE_NO_DUPLICATION_STRING_PK = "deltacat/tests/compute/compactor_v2/steps/data/dedupe_table_no_duplication_string_pk.csv"
29
-
30
- @pytest.fixture(scope="module", autouse=True)
31
- def setup_ray_cluster(self):
32
- ray.init(local_mode=True, ignore_reinit_error=True)
33
- yield
34
- ray.shutdown()
35
-
36
- @pytest.fixture(scope="function")
37
- def local_deltacat_storage_kwargs(self, request: pytest.FixtureRequest):
38
- # see deltacat/tests/local_deltacat_storage/README.md for documentation
39
- kwargs_for_local_deltacat_storage: Dict[str, Any] = {
40
- DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
41
- }
42
- yield kwargs_for_local_deltacat_storage
43
- if os.path.exists(DATABASE_FILE_PATH_VALUE):
44
- os.remove(DATABASE_FILE_PATH_VALUE)
45
-
46
- def test__download_parquet_metadata_for_manifest_entry_sanity(
47
- self, local_deltacat_storage_kwargs
48
- ):
49
- from deltacat.compute.compactor_v2.utils.content_type_params import (
50
- _download_parquet_metadata_for_manifest_entry,
51
- )
52
- from deltacat.types.partial_download import PartialParquetParameters
53
-
54
- partition = stage_partition_from_file_paths(
55
- self.TEST_NAMESPACE,
56
- [self.DEDUPE_BASE_COMPACTED_TABLE_STRING_PK],
57
- **local_deltacat_storage_kwargs,
58
- )
59
- test_delta = commit_delta_to_staged_partition(
60
- partition,
61
- [self.DEDUPE_BASE_COMPACTED_TABLE_STRING_PK],
62
- **local_deltacat_storage_kwargs,
63
- )
64
- test_entry_index = 0
65
- obj_ref = _download_parquet_metadata_for_manifest_entry.remote(
66
- test_delta, test_entry_index, ds, local_deltacat_storage_kwargs
67
- )
68
- parquet_metadata = ray.get(obj_ref)
69
- partial_parquet_params = parquet_metadata["partial_parquet_params"]
70
-
71
- # validate
72
- assert isinstance(parquet_metadata, dict)
73
- assert "entry_index" in parquet_metadata
74
- assert "partial_parquet_params" in parquet_metadata
75
- assert parquet_metadata["entry_index"] == test_entry_index
76
- assert isinstance(partial_parquet_params, PartialParquetParameters)
77
-
78
- assert partial_parquet_params.row_groups_to_download == [0]
79
- assert partial_parquet_params.num_row_groups == 1
80
- assert partial_parquet_params.num_rows == 8
81
- assert isinstance(partial_parquet_params.in_memory_size_bytes, float)
82
- assert partial_parquet_params.in_memory_size_bytes > 0
83
-
84
- pq_metadata = partial_parquet_params.pq_metadata
85
- assert pq_metadata.num_columns == 2
86
- assert pq_metadata.num_rows == 8
87
- assert pq_metadata.num_row_groups == 1
88
- assert pq_metadata.format_version == "2.6"
89
-
90
- assert (
91
- test_delta.manifest.entries[self.TEST_ENTRY_INDEX].meta.content_type
92
- == ContentType.PARQUET.value
93
- )
94
-
95
- @pytest.mark.parametrize(
96
- "read_kwargs_provider,expected_values",
97
- [
98
- (
99
- ReadKwargsProviderPyArrowCsvPureUtf8(),
100
- {
101
- "num_rows": 6,
102
- "num_columns": 2,
103
- "num_row_groups": 1,
104
- "format_version": "2.6",
105
- "column_types": [pa.string(), pa.string()],
106
- },
107
- ),
108
- (
109
- ReadKwargsProviderPyArrowSchemaOverride(
110
- schema=pa.schema(
111
- [
112
- ("id", pa.string()),
113
- ("value", pa.int64()),
114
- ]
115
- )
116
- ),
117
- {
118
- "num_rows": 6,
119
- "num_columns": 2,
120
- "num_row_groups": 1,
121
- "format_version": "2.6",
122
- "column_types": [pa.string(), pa.int64()],
123
- },
124
- ),
125
- (
126
- ReadKwargsProviderPyArrowSchemaOverride(
127
- schema=None,
128
- pq_coerce_int96_timestamp_unit="ms",
129
- parquet_reader_type="daft",
130
- ),
131
- {
132
- "num_rows": 6,
133
- "num_columns": 2,
134
- "num_row_groups": 1,
135
- "format_version": "2.6",
136
- "column_types": None, # Will use default type inference
137
- },
138
- ),
139
- ],
140
- )
141
- def test__download_parquet_metadata_for_manifest_entry_with_read_kwargs_provider(
142
- self, read_kwargs_provider, expected_values, local_deltacat_storage_kwargs
143
- ):
144
- from deltacat.compute.compactor_v2.utils.content_type_params import (
145
- _download_parquet_metadata_for_manifest_entry,
146
- )
147
-
148
- partition = stage_partition_from_file_paths(
149
- self.TEST_NAMESPACE,
150
- [self.DEDUPE_NO_DUPLICATION_STRING_PK],
151
- **local_deltacat_storage_kwargs,
152
- )
153
- test_delta = commit_delta_to_staged_partition(
154
- partition,
155
- [self.DEDUPE_NO_DUPLICATION_STRING_PK],
156
- **local_deltacat_storage_kwargs,
157
- )
158
- test_entry_index = 0
159
- read_kwargs_provider = ReadKwargsProviderPyArrowCsvPureUtf8
160
- obj_ref = _download_parquet_metadata_for_manifest_entry.remote(
161
- test_delta,
162
- test_entry_index,
163
- ds,
164
- local_deltacat_storage_kwargs,
165
- read_kwargs_provider,
166
- )
167
- parquet_metadata = ray.get(obj_ref)
168
- partial_parquet_params = parquet_metadata["partial_parquet_params"]
169
-
170
- # validate
171
- assert isinstance(parquet_metadata, dict)
172
- assert "entry_index" in parquet_metadata
173
- assert "partial_parquet_params" in parquet_metadata
174
- assert parquet_metadata["entry_index"] == self.TEST_ENTRY_INDEX
175
-
176
- assert partial_parquet_params.row_groups_to_download == [0]
177
- assert (
178
- partial_parquet_params.num_row_groups == expected_values["num_row_groups"]
179
- )
180
- assert partial_parquet_params.num_rows == expected_values["num_rows"]
181
- assert isinstance(partial_parquet_params.in_memory_size_bytes, float)
182
- assert partial_parquet_params.in_memory_size_bytes > 0
183
-
184
- pq_metadata = partial_parquet_params.pq_metadata
185
- assert pq_metadata.num_columns == expected_values["num_columns"]
186
- assert pq_metadata.num_rows == expected_values["num_rows"]
187
- assert pq_metadata.num_row_groups == expected_values["num_row_groups"]
188
- assert pq_metadata.format_version == expected_values["format_version"]
189
-
190
- assert (
191
- test_delta.manifest.entries[self.TEST_ENTRY_INDEX].meta.content_type
192
- == ContentType.PARQUET.value
193
- )
194
-
195
- def test_download_parquet_metadata_for_manifest_entry_file_reader_kwargs_present_top_level_and_deltacat_storage_kwarg(
196
- self, local_deltacat_storage_kwargs, caplog
197
- ):
198
- from deltacat.compute.compactor_v2.utils.content_type_params import (
199
- _download_parquet_metadata_for_manifest_entry,
200
- )
201
- from deltacat.types.partial_download import PartialParquetParameters
202
-
203
- test_file_reader_kwargs_provider = ReadKwargsProviderPyArrowCsvPureUtf8()
204
-
205
- local_deltacat_storage_kwargs[
206
- "file_reader_kwargs_provider"
207
- ] = ReadKwargsProviderPyArrowCsvPureUtf8()
208
-
209
- partition = stage_partition_from_file_paths(
210
- self.TEST_NAMESPACE,
211
- [self.DEDUPE_BASE_COMPACTED_TABLE_STRING_PK],
212
- **local_deltacat_storage_kwargs,
213
- )
214
- test_delta = commit_delta_to_staged_partition(
215
- partition,
216
- [self.DEDUPE_BASE_COMPACTED_TABLE_STRING_PK],
217
- **local_deltacat_storage_kwargs,
218
- )
219
-
220
- test_entry_index = 0
221
- obj_ref = _download_parquet_metadata_for_manifest_entry.remote(
222
- test_delta,
223
- test_entry_index,
224
- ds,
225
- local_deltacat_storage_kwargs,
226
- test_file_reader_kwargs_provider,
227
- )
228
- parquet_metadata = ray.get(obj_ref)
229
- partial_parquet_params = parquet_metadata["partial_parquet_params"]
230
-
231
- # validate
232
- assert isinstance(parquet_metadata, dict)
233
- assert "entry_index" in parquet_metadata
234
- assert "partial_parquet_params" in parquet_metadata
235
- assert parquet_metadata["entry_index"] == test_entry_index
236
- assert isinstance(partial_parquet_params, PartialParquetParameters)
237
-
238
- assert partial_parquet_params.row_groups_to_download == [0]
239
- assert partial_parquet_params.num_row_groups == 1
240
- assert partial_parquet_params.num_rows == 8
241
- assert isinstance(partial_parquet_params.in_memory_size_bytes, float)
242
- assert partial_parquet_params.in_memory_size_bytes > 0
243
-
244
- pq_metadata = partial_parquet_params.pq_metadata
245
- assert pq_metadata.num_columns == 2
246
- assert pq_metadata.num_rows == 8
247
- assert pq_metadata.num_row_groups == 1
248
- assert pq_metadata.format_version == "2.6"
249
-
250
- assert (
251
- test_delta.manifest.entries[self.TEST_ENTRY_INDEX].meta.content_type
252
- == ContentType.PARQUET.value
253
- )
@@ -1,45 +0,0 @@
1
- import pyarrow as pa
2
- from deltacat.compute.compactor_v2.utils.primary_key_index import (
3
- group_by_pk_hash_bucket,
4
- )
5
-
6
-
7
- class TestGroupByPkHashBucket:
8
- def test_sanity(self):
9
- record = pa.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
10
- pk = pa.array(["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"])
11
- record_batch = pa.RecordBatch.from_arrays([record, pk], names=["record", "pk"])
12
- table = pa.Table.from_batches([record_batch])
13
- grouped_array = group_by_pk_hash_bucket(table, 3, ["pk"])
14
-
15
- assert len(grouped_array) == 3
16
- total_records = 0
17
- for arr in grouped_array:
18
- if arr is not None:
19
- total_records += len(arr[1])
20
-
21
- assert total_records == len(table)
22
-
23
- def test_when_record_batches_exceed_int_max_size(self):
24
- record = pa.array(["12bytestring" * 90_000_000])
25
- record_batch = pa.RecordBatch.from_arrays([record], names=["pk"])
26
- table = pa.Table.from_batches([record_batch, record_batch])
27
-
28
- grouped_array = group_by_pk_hash_bucket(table, 3, ["pk"])
29
-
30
- assert len(grouped_array) == 3
31
- # two record batches are preserved as combining them
32
- # would exceed 2GB.
33
- assert len(grouped_array[2].to_batches()) == 2
34
-
35
- def test_when_record_batches_less_than_int_max_size(self):
36
- record = pa.array(["12bytestring" * 90_000])
37
- record_batch = pa.RecordBatch.from_arrays([record], names=["pk"])
38
- table = pa.Table.from_batches([record_batch, record_batch])
39
-
40
- grouped_array = group_by_pk_hash_bucket(table, 3, ["pk"])
41
-
42
- assert len(grouped_array) == 3
43
- # Combined the arrays into one record batch as the size
44
- # would not exceed 2GB.
45
- assert len(grouped_array[1].to_batches()) == 1
@@ -1,64 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: deltacat
3
- Version: 1.1.36
4
- Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
- Home-page: https://github.com/ray-project/deltacat
6
- Author: Ray Team
7
- License: UNKNOWN
8
- Platform: UNKNOWN
9
- Classifier: Development Status :: 4 - Beta
10
- Classifier: Intended Audience :: Developers
11
- Classifier: Programming Language :: Python :: 3 :: Only
12
- Classifier: Programming Language :: Python :: 3.10
13
- Classifier: Programming Language :: Python :: 3.9
14
- Classifier: Operating System :: OS Independent
15
- Requires-Python: >=3.9
16
- Description-Content-Type: text/markdown
17
- License-File: LICENSE
18
- Requires-Dist: aws-embedded-metrics==3.2.0
19
- Requires-Dist: boto3~=1.34
20
- Requires-Dist: numpy==1.21.5
21
- Requires-Dist: pandas==1.3.5
22
- Requires-Dist: pyarrow==12.0.1
23
- Requires-Dist: pydantic==1.10.4
24
- Requires-Dist: ray>=2.20.0
25
- Requires-Dist: s3fs==2024.5.0
26
- Requires-Dist: tenacity==8.1.0
27
- Requires-Dist: typing-extensions==4.4.0
28
- Requires-Dist: pymemcache==4.0.0
29
- Requires-Dist: redis==4.6.0
30
- Requires-Dist: getdaft==0.3.6
31
- Requires-Dist: schedule==1.2.0
32
-
33
- # DeltaCAT
34
-
35
- DeltaCAT is a Pythonic Data Catalog powered by Ray.
36
-
37
- Its data storage model allows you to define and manage fast, scalable,
38
- ACID-compliant data catalogs through git-like stage/commit APIs, and has been
39
- used to successfully host exabyte-scale enterprise data lakes.
40
-
41
- DeltaCAT uses the Ray distributed compute framework together with Apache Arrow
42
- for common table management tasks, including petabyte-scale
43
- change-data-capture, data consistency checks, and table repair.
44
-
45
- ## Getting Started
46
-
47
- ### Install
48
-
49
- ```
50
- pip install deltacat
51
- ```
52
-
53
- ### Running Tests
54
-
55
- ```
56
- pip3 install virtualenv
57
- virtualenv test_env
58
- source test_env/bin/activate
59
- pip3 install -r requirements.txt
60
-
61
- pytest
62
- ```
63
-
64
-
@@ -1,219 +0,0 @@
1
- deltacat/__init__.py,sha256=9vJMHGceWew6atD_3VqKurlBJ3crD5mwAQIgSB1yjNY,1778
2
- deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
3
- deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
4
- deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
5
- deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- deltacat/aws/clients.py,sha256=4eQvpkV1PzFfxog7EriuglOGGwNFHR5hbGYpjsNNPxk,6949
7
- deltacat/aws/constants.py,sha256=hcYAUot4ahq9GXCMClQiuYCtiDs5XaOebdUoKg4V84k,1222
8
- deltacat/aws/s3u.py,sha256=GRmYwE9If-JQAazowUo5BCCu2yRa5EeOwwLfOPIGeCc,28584
9
- deltacat/aws/redshift/__init__.py,sha256=7SvjG-dqox8zZUhFicTsUvpG5vXYDl_QQ3ohlHOgTKc,342
10
- deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- deltacat/aws/redshift/model/manifest.py,sha256=-ap44dxaG2bVNkVMzpJe-oIFHx0iBWCnA_sO-riQp0Y,13605
12
- deltacat/benchmarking/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- deltacat/benchmarking/benchmark_parquet_reads.py,sha256=2BctkvXAYcAxokLwMSTu4TQ6-HGqzkgYcVEAzPN2QQo,1709
14
- deltacat/benchmarking/conftest.py,sha256=6M9NJ71vnOpeMxG-Ly9UWRsgZmky5-1GTuoRD-OElng,1604
15
- deltacat/catalog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- deltacat/catalog/delegate.py,sha256=lVPPG4bBxoNt0IFufKIXc7H2Nhvb6QNox-WfXxaNxuc,8745
17
- deltacat/catalog/interface.py,sha256=On5hsbznnIKsvCcm5C1N3OS3ar3v6q7pAUkLWMb6PtY,6556
18
- deltacat/catalog/default_catalog_impl/__init__.py,sha256=ted1_sA2Y3ljJjb9hRuWUCiUlCen-HAW8TxoV4g4IW0,12677
19
- deltacat/catalog/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- deltacat/catalog/model/catalog.py,sha256=-Ho7a3rV1hiOS9cSRCAor9AtXV9nJn9t_MDVql9pIxo,2212
21
- deltacat/catalog/model/table_definition.py,sha256=tKrM1mmaQlvxqXrLt3QJVZK5BZfaJnhjTZ6KjybYlhE,727
22
- deltacat/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- deltacat/compute/compactor/__init__.py,sha256=ivpOPve1yKi3Vz3tVgp-eeFMNEeUSf-dlRJNSCM85sE,1022
24
- deltacat/compute/compactor/compaction_session.py,sha256=YthBYNpj6qvr6SqfVfXTy5ylKFOo8zUKI3bn4tHt0e8,27766
25
- deltacat/compute/compactor/repartition_session.py,sha256=AAPwNZtPpC_Mtoja855_alBdXDA6efp7zcvkE-MANaQ,7254
26
- deltacat/compute/compactor/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- deltacat/compute/compactor/model/compact_partition_params.py,sha256=jjvpUiHfGAw-Dy7s4wyTINtruf8Nk4EPMma7Y4KMF2U,19067
28
- deltacat/compute/compactor/model/compaction_session_audit_info.py,sha256=Jjt4YOEO8lc-kiV4fB7rOD_Xd17_BS6pRDzqbtZp0GI,31350
29
- deltacat/compute/compactor/model/compactor_version.py,sha256=RwRvManiCxZmzjAWzm1OPDxjB1BEHu1d0fBJyGhXKxA,87
30
- deltacat/compute/compactor/model/dedupe_result.py,sha256=1OCV944qJdLQ_-8scisVKl45ej1eRv9OV539QYZtQ-U,292
31
- deltacat/compute/compactor/model/delta_annotated.py,sha256=bCE9H5mrBoHfd1lbL6tYWC4_dbAgucAlFLjOtyPLW14,12515
32
- deltacat/compute/compactor/model/delta_file_envelope.py,sha256=6P-3qM4HE1dIGqbKmiyk8cyJAJD1WbwnN22_ppQocHc,3676
33
- deltacat/compute/compactor/model/delta_file_locator.py,sha256=AmhPGPDsmahVhp91rohJMx4ByumcIY5feqRLZTrNu4s,1905
34
- deltacat/compute/compactor/model/hash_bucket_result.py,sha256=71qGmaT1Mks-r3-aatjNbn2x3yWIgT8RmV0bRWe6pdA,275
35
- deltacat/compute/compactor/model/materialize_result.py,sha256=w7FYtVg2j30c6GJ1fKS4lcOTAjlEovuGYT7wVyKkXro,2542
36
- deltacat/compute/compactor/model/primary_key_index.py,sha256=9EYoxauzXeEY_cYAVSCqDMXps8wEAPSXWk-6_LLNwBU,10449
37
- deltacat/compute/compactor/model/pyarrow_write_result.py,sha256=WYIa0DRcyaemR6yUS8_8RLQ2voTmCVNFUL99qxPmt70,1324
38
- deltacat/compute/compactor/model/repartition_result.py,sha256=HZy7Ls6toI4rXgVW2yIKMIkVS8o9kxvlIJPvo5_pCxA,140
39
- deltacat/compute/compactor/model/round_completion_info.py,sha256=SGR6WeGBGwRNDz6KsSqenlGvo0GYtwbPMnvpm5un0wM,5115
40
- deltacat/compute/compactor/model/table_object_store.py,sha256=7BsBsuV8TBudGzy_NfQc7QyzLIP0EXf04DvqaOJNeJE,1487
41
- deltacat/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- deltacat/compute/compactor/steps/dedupe.py,sha256=iAPRIeMdGxNxaCy2QC_XzRWiNDVkKbkplJY0DVoWwsE,10190
43
- deltacat/compute/compactor/steps/hash_bucket.py,sha256=r4DOf1M8brsloiVyOKyplSvFG72Ao86N7YaH2l4mwEk,10646
44
- deltacat/compute/compactor/steps/materialize.py,sha256=j2r01KL5GGhGss9FSN9vpYmgsCQdm2uUpKMDVPtk6_k,14246
45
- deltacat/compute/compactor/steps/repartition.py,sha256=_ITw4yvvnNv3wwOYxprzlIz5J6t3b72re6lllpzJD9U,10960
46
- deltacat/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- deltacat/compute/compactor/utils/io.py,sha256=S-JZdjETP_tHblK4j860jLHyX9S6A87BPz3Rl0jGbRM,17303
48
- deltacat/compute/compactor/utils/primary_key_index.py,sha256=ay2-7t4mP9I_l5gKkrv5h5_r8Icts8mBcbH7OJBknrY,2435
49
- deltacat/compute/compactor/utils/round_completion_file.py,sha256=fFevhUuveCvrU3g_JhX_vPCuEv9Oku0ihbi-n9E6H74,3381
50
- deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
51
- deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
52
- deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
- deltacat/compute/compactor_v2/compaction_session.py,sha256=RbO_du0qX7nlyXO-ZSksX8RqWuRwfdvWddpTJjLDVNk,8185
54
- deltacat/compute/compactor_v2/constants.py,sha256=F5Phrh-2JgnWvtjHXacxOG5Z2ivKcHnboerI12rc1zk,3632
55
- deltacat/compute/compactor_v2/deletes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- deltacat/compute/compactor_v2/deletes/delete_file_envelope.py,sha256=AeuH9JRMwp6mvQf6P2cqL92hUEtResQq6qUTS0kIKac,3111
57
- deltacat/compute/compactor_v2/deletes/delete_strategy.py,sha256=SMEJOxR-5r92kvKNqtu2w6HmwtmhljcZX1wcNEuS-4w,2833
58
- deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py,sha256=U4zxVECXSPs1Nj3iPf_tiRRCs12CF8CHmRt4s_GDzq8,6503
59
- deltacat/compute/compactor_v2/deletes/model.py,sha256=kW7kfRe4jVNMnsWQrl0nyKdDpvB9mbJND-MVzAajbAI,558
60
- deltacat/compute/compactor_v2/deletes/utils.py,sha256=9CchSw1_caWGWtRHa4ycy58t5T422EN6UB9XYa1zpbk,6640
61
- deltacat/compute/compactor_v2/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py,sha256=XAaEEAdaJy6-G3mVGxQCVNdecSyBue11OgEwy8s1WGs,529
63
- deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=iJy8kLi1dIpFIyfoAjkaAtZvg8Np1z7BsUNGAcWfFm4,3042
64
- deltacat/compute/compactor_v2/model/hash_bucket_result.py,sha256=EsY9BPPywhmxlcLKn3kGWzAX4s4BTR2vYyPUB-wAEOc,309
65
- deltacat/compute/compactor_v2/model/merge_file_group.py,sha256=1o86t9lc3K6ZvtViVO1SVljCj6f0B3MfB3hqtGm2S0s,7410
66
- deltacat/compute/compactor_v2/model/merge_input.py,sha256=D-6WuHK4X7m9-P6Hskz6RRemeWrNf6IPdhc14O3KDAg,5860
67
- deltacat/compute/compactor_v2/model/merge_result.py,sha256=_IZTCStpb4UKiRCJYA3g6EhAqjrw0t9vmoDAN8kIK-Y,436
68
- deltacat/compute/compactor_v2/private/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
- deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=nz2N3YZVE9bNwOqRXoQYkArJhyUJRis2s9BweZ3tad8,30989
70
- deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
- deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=1R5xLUkl7GqL1nY-apAgY1czKDEHjIVYSRi9qLOMass,6726
72
- deltacat/compute/compactor_v2/steps/merge.py,sha256=4rKQ__SeWO_QLZl2btcFrYHCMOn-8R3kja74UrWOMgg,26225
73
- deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
- deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=t2j9H9IdFRH9EfpL-9g5XvZs9WK9HybqBGA7fDi82EM,8310
75
- deltacat/compute/compactor_v2/utils/dedupe.py,sha256=Jz1QbBOdZJwT8K1vD9q01eOn7hdLNZ_AF7bJ0wficr0,1949
76
- deltacat/compute/compactor_v2/utils/delta.py,sha256=I7Yvda8NVbpKXG3nM2Ku1utvR2r2OpHvUMqUL2ja3aw,3626
77
- deltacat/compute/compactor_v2/utils/io.py,sha256=Xjs7_D-0xKSetvllIe4o96aM1elfdjt1Ii7YfsHPvZs,6108
78
- deltacat/compute/compactor_v2/utils/merge.py,sha256=fAzEYwQYH2ia8MLdEFdZFivWHpi6qZu8AyyEK0H0vwE,5363
79
- deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=SbQ97M1Cxld-zZik2QMSzlj20g6JlENaQx_0PhlCIP8,12034
80
- deltacat/compute/compactor_v2/utils/task_options.py,sha256=0GoB_DLkCN1q8CVKTlWlDYt55qnpTDIa9fPyXJwB-cU,13801
81
- deltacat/compute/merge_on_read/__init__.py,sha256=ckbgngmqPjYBYz_NySsR1vNTOb_hNpeL1sYkZKvBI9M,214
82
- deltacat/compute/merge_on_read/daft.py,sha256=1oC38u5ig_aTrq7EzyWBo8Ui54rb6yERYMk-vEFbpxM,1400
83
- deltacat/compute/merge_on_read/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
- deltacat/compute/merge_on_read/model/merge_on_read_params.py,sha256=Q51znagh8PtLnsY987Ulx9n20oAydfPq3Zd3Y9ocbTI,2035
85
- deltacat/compute/merge_on_read/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
- deltacat/compute/merge_on_read/utils/delta.py,sha256=e4BtOHa5XPpUnR4r0HqBKjXckBsTI8qBwdUWwpJfkWQ,1367
87
- deltacat/compute/resource_estimation/__init__.py,sha256=4bfBXcq-VAt9JCmjvj3yAmn0lEHVGdGsUCCoMGxjEqA,799
88
- deltacat/compute/resource_estimation/delta.py,sha256=dN64jbUQ8OI1BTz4fYGbulJLWjKjdT-XvwDJNLM__Oo,10583
89
- deltacat/compute/resource_estimation/manifest.py,sha256=gSqOyIda-pYq3vRsKFq3IiZvwhV3mMqrWPtsmUH9dD8,13035
90
- deltacat/compute/resource_estimation/model.py,sha256=psyagFXdpLGt8DfDqy7c8DWiuXCacr0Swe5f0M7DdO4,5465
91
- deltacat/compute/resource_estimation/parquet.py,sha256=5_apma4EKbKcm-nfV73-qN2nfnCeyhFW23ZHX3jz0Kw,3158
92
- deltacat/compute/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
93
- deltacat/compute/stats/types.py,sha256=cp0lT8nITTKbnkc03OysRjXfcfXzQml9a4wqCnR6kqs,215
94
- deltacat/compute/stats/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
- deltacat/compute/stats/models/delta_column_stats.py,sha256=-wXjB2c0BC1RDheumjL_j5-DfRNql4WsK9GpMFQI1cg,3300
96
- deltacat/compute/stats/models/delta_stats.py,sha256=hBith8_hbF9TVr6HocLAt6RJ_kZZKO4zrGP8VOP05vA,8556
97
- deltacat/compute/stats/models/delta_stats_cache_result.py,sha256=mbJYxpZd5jaER_BWrCD2hROFy3p1nNdBrj66nUpc6io,1624
98
- deltacat/compute/stats/models/manifest_entry_stats.py,sha256=NCDAe2nPDEI4kOkuwNkRFgGPS-rqQaQqLuaLoKk20KQ,2419
99
- deltacat/compute/stats/models/stats_result.py,sha256=XQAlmzhUqRmg4jzEMUAOqcYn1HUOBTMryBH1CCVlet8,3820
100
- deltacat/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
101
- deltacat/io/dataset.py,sha256=pFU5UfK-fD9C4fIeffJtrA6yVQSgAx2UPbxzQ4GMFL8,3203
102
- deltacat/io/file_object_store.py,sha256=YoNL3Qla8uLOHaWnyBmIgotjSGAy3Td3Tumah0kk73Y,1868
103
- deltacat/io/memcached_object_store.py,sha256=C96t77-4BQe0XZ4vC76Ygi2o1POUoMN4t4BiyPmulz0,10997
104
- deltacat/io/object_store.py,sha256=z3Crt8TLyLyoRunOuXAri373TQZKFoz66QHpxGOV82U,1910
105
- deltacat/io/ray_plasma_object_store.py,sha256=TyoUPWybE_cSISZ2SQa3YfD93QWMp0r82-6WnoVSmzk,905
106
- deltacat/io/read_api.py,sha256=BhkjL3xjY-fsa62AA9Yv20_88uTskn4_Bv2W6VmMXVA,7023
107
- deltacat/io/redis_object_store.py,sha256=OkbQNq1DUVYA7eupmZTF-9OvXUDTOl6WtEifonA5teg,4862
108
- deltacat/io/s3_object_store.py,sha256=IxvLUvyQZ1w1oYwN9RvRgmKR0Dw56-GggYJw1UCyhBg,1911
109
- deltacat/io/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
- deltacat/io/aws/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
111
- deltacat/storage/__init__.py,sha256=4sWa3oq89IC3YPclsnVc6ZhnlFM2MuSqshT2uW5cSEY,2158
112
- deltacat/storage/interface.py,sha256=xUZz92W8fSYAuoSs-ZSfcNUeuaAWE6qb0fh6Cghv8mI,23840
113
- deltacat/storage/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
- deltacat/storage/model/delete_parameters.py,sha256=yF3_gdDiDkMAuLJPlBr4Es25SYijzEKGxArwgXCrNwI,1563
115
- deltacat/storage/model/delta.py,sha256=w6Oya8UDJTxHhWsRarQxC9UHitNeRnJYpnArpR5HIVw,15810
116
- deltacat/storage/model/list_result.py,sha256=FgD6oYeKo0EPe8z7jC8T4pAFjBOuBwd4axxGrnYyBG4,2466
117
- deltacat/storage/model/locator.py,sha256=1S7szmDSx-R4Z3arFNILOvS4t7dF7_rJNV9fHyRc3G4,1296
118
- deltacat/storage/model/namespace.py,sha256=KI2umYWShXFTx1ykLwsQjuce078WYo_Hmavn3DDeBzE,2086
119
- deltacat/storage/model/partition.py,sha256=hF2vsY5Gd74BcF0iZzCqOPu-N40Qe0HPusvwmRVCdH8,11162
120
- deltacat/storage/model/partition_spec.py,sha256=uIDr6kXAbmfESZP_4t9EqErjWUK01tIN6XWyvOtGaNA,1994
121
- deltacat/storage/model/sort_key.py,sha256=SPIxJfI_o7fbp1s3ZKMyX9x7_jK8UZapaVnKSAg5L1g,1156
122
- deltacat/storage/model/stream.py,sha256=4J7N3j031ckxxi8imyaW-ANk4ygBIE5GGiM8NuFgH8M,9207
123
- deltacat/storage/model/table.py,sha256=IOu1ZOrdRkVDB-FOxYMRvnNf5TukIDfbdHWTqHYN_OY,4225
124
- deltacat/storage/model/table_version.py,sha256=cOM9dN-YB_Hhi4h1CzFbldC5qRkm4C1rQ3rpKIZzCNs,7413
125
- deltacat/storage/model/transform.py,sha256=t4hg1dKua8VPeMFgyllkWdzq-L5M_DRG0HD9sPt6OXQ,3517
126
- deltacat/storage/model/types.py,sha256=hj7MmjjVmKT-R9sMUulOWG-FByGZKKaYXNnOWW32mP0,1608
127
- deltacat/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
128
- deltacat/tests/test_exceptions.py,sha256=V3jUQClHLD24tS18tnGvNIt0psn2WFT3Nf_CIvSqL08,3140
129
- deltacat/tests/test_logs.py,sha256=ULmb3OJ8GGEpq_LFgcil-CPjZQpO9341Ws12svoct0s,6909
130
- deltacat/tests/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
131
- deltacat/tests/aws/test_clients.py,sha256=23GMWfz27WWBDXSqphG9mfputsyS7j3I5P_HRk4YoKE,3790
132
- deltacat/tests/aws/test_s3u.py,sha256=FsYCH8K8DsDRPOtTp-w1Nu3ATqt4p1mqDo6aVJV-SbU,7918
133
- deltacat/tests/catalog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
- deltacat/tests/catalog/test_default_catalog_impl.py,sha256=2l5uwmtLlUJ9yH1LDggtj81fa-pHqbE0-VBt6G4Hyc0,3180
135
- deltacat/tests/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
136
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py,sha256=kWyIJQMyF9oBemvgOp3ngGhMpH9zjkznV-67ewELgHQ,37719
137
- deltacat/tests/compute/compact_partition_rebase_test_cases.py,sha256=8HVr3EIFYFqNaJoqeCuj9xIBjM4Ch2bx-mJcO4BRrLo,16839
138
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py,sha256=l_6-pAKOsRY3NbtfHsYmEaJEkq6IJueYuLsjyJxNgz4,81564
139
- deltacat/tests/compute/compact_partition_test_cases.py,sha256=HJ15Xyawv8ImFju8wDwt22fH5okoPhyS-QAygkXDb7Q,27422
140
- deltacat/tests/compute/test_compact_partition_incremental.py,sha256=8hUqnzeGIhAENcBxLL0R_yfjAaNTmRds6OWxQOmVqD8,15416
141
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=6d3F9E_4eO2Okh97v8NWFbEptPkzKoO0Qq8O6yAXrIs,13377
142
- deltacat/tests/compute/test_compact_partition_params.py,sha256=Dm5eLyHo8oGMeO3XBbpj1rZqHtPZ1hAB7z2qvzc4Lxk,8497
143
- deltacat/tests/compute/test_compact_partition_rebase.py,sha256=vOF8wgTpdaWJKo47mK9aii3NKtwVwWgujoQyS8C3YyA,13535
144
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=BimvU-iHiF78WlE4xbfk4dzHm0a-frwpE7H7Kh4XkbE,15500
145
- deltacat/tests/compute/test_util_common.py,sha256=0mEHo38bgH64y0XZ_zgUL_aZgQMgJOSTlOYvIJxG_MM,11825
146
- deltacat/tests/compute/test_util_constant.py,sha256=4o-W3E7r7jhFl1A3OFLLrdKnwcF46zx4lEIDY8ONJ3c,929
147
- deltacat/tests/compute/test_util_create_table_deltas_repo.py,sha256=Q3HJj1fjoe2JwRUOW8KEjbTqPIIoP2o_T3ZGH6SJnCM,13244
148
- deltacat/tests/compute/compactor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
- deltacat/tests/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
150
- deltacat/tests/compute/compactor/steps/test_repartition.py,sha256=0uRguPEKeLSYs746Jv8io-HZMWdyXNcOMBu8GO2mA0M,9305
151
- deltacat/tests/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
152
- deltacat/tests/compute/compactor/utils/test_io.py,sha256=st5mlU4cVU-eQl7B4mvPgNA3izuNwbVawYOp-NcoyrI,4326
153
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py,sha256=LAQ4usiRF4oTx4cA85L0eOcBa_Z-febc-CuzUijSGrI,7439
154
- deltacat/tests/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
- deltacat/tests/compute/compactor_v2/test_compaction_session.py,sha256=F1DFaranHekHB7HSNH-0_hV5ovdR5HfF9JqTVDw6Vh8,42575
156
- deltacat/tests/compute/compactor_v2/test_hashlib.py,sha256=8csF2hFWtBvY2MbX3-6iphCsVXxRp0zP1NTnKhfdmkg,328
157
- deltacat/tests/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py,sha256=eoiDuBUhgCmc3DYKCXL1g4QWtmROhZ0RJCQgePMY9as,9959
159
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py,sha256=aFb9rzT_EK9k8qAMHPtpqd5btyEmll1So1loDmZkotQ,1769
160
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=YDQKUKv3Vv8S1fe0YQmjHTrwnWSliqKHIWGu0fEdKnI,11478
161
- deltacat/tests/compute/resource_estimation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
- deltacat/tests/compute/resource_estimation/test_delta.py,sha256=HCL2oUnCqm0E26T3HLJjMhoAsHTJIWPYGwIKRgM_H7E,25712
163
- deltacat/tests/compute/resource_estimation/test_manifest.py,sha256=yrMvqDjolExdRf6Vtg5XaKDuaKz9ok15PCZ7_aJOYrI,32893
164
- deltacat/tests/compute/resource_estimation/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
- deltacat/tests/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
- deltacat/tests/io/test_cloudpickle_bug_fix.py,sha256=qnYJg_S-nsLai77a4_I3Qs2Jtr_KWQJOxyl96f9PgHA,1376
167
- deltacat/tests/io/test_file_object_store.py,sha256=bjORXnHe7Ea733XUUO0S2Su_oqSwGuO84TlIfoNO6qA,3587
168
- deltacat/tests/io/test_memcached_object_store.py,sha256=0EIaU5MHiEmIEkA4x5qUXFY9TE6TJ7V2RGH827cu3AU,9512
169
- deltacat/tests/io/test_ray_plasma_object_store.py,sha256=-wJZP6lRtEOogR25wjEiIBGz_lpvWVihwlZ5GqandZU,1911
170
- deltacat/tests/io/test_redis_object_store.py,sha256=4fCxb7PAqYixPbQZEPDwsDU3BEKfOkYxkhAI7V5Zdfc,4988
171
- deltacat/tests/io/test_s3_object_store.py,sha256=I8AbyrPfS32CAYvRHtn_OanL-XPpAnJeuCuhD-u9irQ,2270
172
- deltacat/tests/local_deltacat_storage/__init__.py,sha256=5T9ubNIS42-BotEH0yrUiWEU92feW7lkoSA1-wMeAnQ,40104
173
- deltacat/tests/local_deltacat_storage/exceptions.py,sha256=oxZ0psmrEO0M6P2r8gHQ2E8E-Y8UBfUCBUIwfuHcx38,251
174
- deltacat/tests/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
175
- deltacat/tests/test_utils/constants.py,sha256=UYe--9T_clYjiOpv0M7TtAMGdpje_SMZ-w8n0IeCAjc,214
176
- deltacat/tests/test_utils/pyarrow.py,sha256=QDdGilzsJ2xUESiGotdNVZde9yD7ja9MvNhhssnox-E,3083
177
- deltacat/tests/test_utils/storage.py,sha256=93GEn4A5WbMHWk0Ec4Bd7RxeHoSEnBfSarfWhKOSNtM,972
178
- deltacat/tests/test_utils/utils.py,sha256=a32qEwcSSd1lvRi0aJJ4ZLnc1ZyXmoQF_K95zaQRk2M,455
179
- deltacat/tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
180
- deltacat/tests/utils/test_cloudpickle.py,sha256=J0pnBY3-PxlUh6MamZAN1PuquKQPr2iyzjiJ7-Rcl0o,1506
181
- deltacat/tests/utils/test_daft.py,sha256=kY8lkXoQvyWunok8UvOsh1An297rb3jcnstTuIAyAlc,8232
182
- deltacat/tests/utils/test_metrics.py,sha256=Ym9nOz1EtB180pLmvugihj1sDTNDMb5opIjjr5Nmcls,16339
183
- deltacat/tests/utils/test_placement.py,sha256=g61wVOMkHe4YJeR9Oxg_BOVQ6bhHHbC3IBYv8YhUu94,597
184
- deltacat/tests/utils/test_pyarrow.py,sha256=tuh6HzQOuAHPFxK5Mhgjjdm76Z9Z72H3MZPcJ4RnZn8,37372
185
- deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
186
- deltacat/tests/utils/test_resources.py,sha256=HtpvDrfPZQNtGDXUlsIzc_yd7Vf1cDscZ3YbN0oTvO8,2560
187
- deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
188
- deltacat/tests/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
189
- deltacat/tests/utils/ray_utils/test_concurrency.py,sha256=TjZpX0cjMDEIS79p_--j_BfT0zXKNkTLY1ZzNokBTs0,1211
190
- deltacat/tests/utils/ray_utils/test_dataset.py,sha256=1hoOR_AIO4iJQ_lCjNsJfq7S-2ZyOKyMkKc4Tjt6cwg,2092
191
- deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
192
- deltacat/types/media.py,sha256=7_QRU6NbjmJk0GLAn_Km6ja8RE5G3V8jvLfUXqnjnqU,2320
193
- deltacat/types/partial_download.py,sha256=9BJ5b0DHyWWeV7wMZjOfYoeH_iil_bjZ9b_WMpUzvHs,2516
194
- deltacat/types/tables.py,sha256=HLm-xI5xCuKTeGbaErspni3DeSVC_aRe2TYFLd6b_mA,4409
195
- deltacat/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
196
- deltacat/utils/arguments.py,sha256=5y1Xz4HSAD8M8Jt83i6gOEKoYjy_fMQe1V43IhIE4hY,1191
197
- deltacat/utils/cloudpickle.py,sha256=XE7YDmQe56ksfl3NdYZkzOAhbHSuhNcBZGOehQpgZr0,1187
198
- deltacat/utils/common.py,sha256=RG_-enXNpLKaYrqyx1ne2lL10lxN9vK7F631oJP6SE8,1375
199
- deltacat/utils/daft.py,sha256=nd4XBKcZTFYxf_VH9jm-wqqbrIujKAeisCt2vVbW2BA,5807
200
- deltacat/utils/metrics.py,sha256=HYKyZSrtVLu8gXezg_TMNUKJp4h1WWI0VEzn0Xlzf-I,10778
201
- deltacat/utils/numpy.py,sha256=SpHKKvC-K8NINTWGVfTZ5-gBFTGYqaXjjgKFhsdUjwg,2049
202
- deltacat/utils/pandas.py,sha256=q99mlRB7tymICMcNbfGLfLqFu_C-feyPZKZm2CWJJVc,9574
203
- deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
204
- deltacat/utils/placement.py,sha256=Lj20fb-eq8rgMdm_M2MBMfDLwhDM1sS1nJj2DvIK56s,12060
205
- deltacat/utils/pyarrow.py,sha256=MFCsHJKapqrhaaBeVAvwR2F1MglsNNhVZeCbk7YIdyI,35266
206
- deltacat/utils/resources.py,sha256=Ax1OgLLbZI4oYpp4Ki27OLaST-7I-AJgZwU87FVfY8g,8253
207
- deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
208
- deltacat/utils/schema.py,sha256=m4Wm4ZQcpttzOUxex4dVneGlHy1_E36HspTcjNYzvVM,1564
209
- deltacat/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
210
- deltacat/utils/ray_utils/collections.py,sha256=hj20s4D2RF2jZETU_44r6mFbsczA0JI_I_4kWKTmqes,1951
211
- deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDDwEgFOIkEIo,5457
212
- deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
213
- deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
214
- deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
215
- deltacat-1.1.36.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
216
- deltacat-1.1.36.dist-info/METADATA,sha256=wIZbEGHnJWq_TBKi0u463p4-PgG9R_0MApw7IIwmnRc,1733
217
- deltacat-1.1.36.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
218
- deltacat-1.1.36.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
219
- deltacat-1.1.36.dist-info/RECORD,,
File without changes
File without changes
File without changes
File without changes
File without changes