deltacat 1.1.36__py3-none-any.whl → 2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. deltacat/__init__.py +42 -3
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +168 -0
  4. deltacat/aws/s3u.py +4 -4
  5. deltacat/benchmarking/benchmark_engine.py +82 -0
  6. deltacat/benchmarking/benchmark_report.py +86 -0
  7. deltacat/benchmarking/benchmark_suite.py +11 -0
  8. deltacat/benchmarking/conftest.py +21 -0
  9. deltacat/benchmarking/data/random_row_generator.py +94 -0
  10. deltacat/benchmarking/data/row_generator.py +10 -0
  11. deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
  12. deltacat/catalog/__init__.py +14 -0
  13. deltacat/catalog/delegate.py +199 -106
  14. deltacat/catalog/iceberg/__init__.py +4 -0
  15. deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
  16. deltacat/catalog/iceberg/impl.py +368 -0
  17. deltacat/catalog/iceberg/overrides.py +74 -0
  18. deltacat/catalog/interface.py +273 -76
  19. deltacat/catalog/main/impl.py +720 -0
  20. deltacat/catalog/model/catalog.py +227 -20
  21. deltacat/catalog/model/properties.py +116 -0
  22. deltacat/catalog/model/table_definition.py +32 -1
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +5 -5
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +1 -1
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +1 -1
  32. deltacat/compute/compactor/steps/materialize.py +6 -2
  33. deltacat/compute/compactor/utils/io.py +1 -1
  34. deltacat/compute/compactor/utils/sort_key.py +9 -2
  35. deltacat/compute/compactor_v2/compaction_session.py +5 -9
  36. deltacat/compute/compactor_v2/constants.py +1 -30
  37. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  38. deltacat/compute/compactor_v2/model/merge_input.py +1 -7
  39. deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
  40. deltacat/compute/compactor_v2/steps/merge.py +17 -126
  41. deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
  42. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  43. deltacat/compute/compactor_v2/utils/io.py +1 -1
  44. deltacat/compute/compactor_v2/utils/merge.py +0 -1
  45. deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
  46. deltacat/compute/compactor_v2/utils/task_options.py +23 -43
  47. deltacat/compute/converter/constants.py +4 -0
  48. deltacat/compute/converter/converter_session.py +143 -0
  49. deltacat/compute/converter/model/convert_input.py +69 -0
  50. deltacat/compute/converter/model/convert_input_files.py +61 -0
  51. deltacat/compute/converter/model/converter_session_params.py +99 -0
  52. deltacat/compute/converter/pyiceberg/__init__.py +0 -0
  53. deltacat/compute/converter/pyiceberg/catalog.py +75 -0
  54. deltacat/compute/converter/pyiceberg/overrides.py +135 -0
  55. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
  56. deltacat/compute/converter/steps/__init__.py +0 -0
  57. deltacat/compute/converter/steps/convert.py +211 -0
  58. deltacat/compute/converter/steps/dedupe.py +60 -0
  59. deltacat/compute/converter/utils/__init__.py +0 -0
  60. deltacat/compute/converter/utils/convert_task_options.py +88 -0
  61. deltacat/compute/converter/utils/converter_session_utils.py +109 -0
  62. deltacat/compute/converter/utils/iceberg_columns.py +82 -0
  63. deltacat/compute/converter/utils/io.py +43 -0
  64. deltacat/compute/converter/utils/s3u.py +133 -0
  65. deltacat/compute/resource_estimation/delta.py +1 -19
  66. deltacat/constants.py +47 -1
  67. deltacat/env.py +51 -0
  68. deltacat/examples/__init__.py +0 -0
  69. deltacat/examples/basic_logging.py +101 -0
  70. deltacat/examples/common/__init__.py +0 -0
  71. deltacat/examples/common/fixtures.py +15 -0
  72. deltacat/examples/hello_world.py +27 -0
  73. deltacat/examples/iceberg/__init__.py +0 -0
  74. deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
  75. deltacat/examples/iceberg/iceberg_reader.py +149 -0
  76. deltacat/exceptions.py +51 -9
  77. deltacat/logs.py +4 -1
  78. deltacat/storage/__init__.py +118 -28
  79. deltacat/storage/iceberg/__init__.py +0 -0
  80. deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
  81. deltacat/storage/iceberg/impl.py +737 -0
  82. deltacat/storage/iceberg/model.py +709 -0
  83. deltacat/storage/interface.py +217 -134
  84. deltacat/storage/main/__init__.py +0 -0
  85. deltacat/storage/main/impl.py +2077 -0
  86. deltacat/storage/model/delta.py +118 -71
  87. deltacat/storage/model/interop.py +24 -0
  88. deltacat/storage/model/list_result.py +8 -0
  89. deltacat/storage/model/locator.py +93 -3
  90. deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
  91. deltacat/storage/model/metafile.py +1316 -0
  92. deltacat/storage/model/namespace.py +34 -18
  93. deltacat/storage/model/partition.py +362 -37
  94. deltacat/storage/model/scan/__init__.py +0 -0
  95. deltacat/storage/model/scan/push_down.py +19 -0
  96. deltacat/storage/model/scan/scan_plan.py +10 -0
  97. deltacat/storage/model/scan/scan_task.py +34 -0
  98. deltacat/storage/model/schema.py +892 -0
  99. deltacat/storage/model/shard.py +47 -0
  100. deltacat/storage/model/sort_key.py +170 -13
  101. deltacat/storage/model/stream.py +208 -80
  102. deltacat/storage/model/table.py +123 -29
  103. deltacat/storage/model/table_version.py +322 -46
  104. deltacat/storage/model/transaction.py +757 -0
  105. deltacat/storage/model/transform.py +198 -61
  106. deltacat/storage/model/types.py +111 -13
  107. deltacat/storage/rivulet/__init__.py +11 -0
  108. deltacat/storage/rivulet/arrow/__init__.py +0 -0
  109. deltacat/storage/rivulet/arrow/serializer.py +75 -0
  110. deltacat/storage/rivulet/dataset.py +744 -0
  111. deltacat/storage/rivulet/dataset_executor.py +87 -0
  112. deltacat/storage/rivulet/feather/__init__.py +5 -0
  113. deltacat/storage/rivulet/feather/file_reader.py +136 -0
  114. deltacat/storage/rivulet/feather/serializer.py +35 -0
  115. deltacat/storage/rivulet/fs/__init__.py +0 -0
  116. deltacat/storage/rivulet/fs/file_provider.py +105 -0
  117. deltacat/storage/rivulet/fs/file_store.py +130 -0
  118. deltacat/storage/rivulet/fs/input_file.py +76 -0
  119. deltacat/storage/rivulet/fs/output_file.py +86 -0
  120. deltacat/storage/rivulet/logical_plan.py +105 -0
  121. deltacat/storage/rivulet/metastore/__init__.py +0 -0
  122. deltacat/storage/rivulet/metastore/delta.py +190 -0
  123. deltacat/storage/rivulet/metastore/json_sst.py +105 -0
  124. deltacat/storage/rivulet/metastore/sst.py +82 -0
  125. deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  126. deltacat/storage/rivulet/mvp/Table.py +101 -0
  127. deltacat/storage/rivulet/mvp/__init__.py +5 -0
  128. deltacat/storage/rivulet/parquet/__init__.py +5 -0
  129. deltacat/storage/rivulet/parquet/data_reader.py +0 -0
  130. deltacat/storage/rivulet/parquet/file_reader.py +127 -0
  131. deltacat/storage/rivulet/parquet/serializer.py +37 -0
  132. deltacat/storage/rivulet/reader/__init__.py +0 -0
  133. deltacat/storage/rivulet/reader/block_scanner.py +378 -0
  134. deltacat/storage/rivulet/reader/data_reader.py +136 -0
  135. deltacat/storage/rivulet/reader/data_scan.py +63 -0
  136. deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
  137. deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
  138. deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
  139. deltacat/storage/rivulet/reader/query_expression.py +99 -0
  140. deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
  141. deltacat/storage/rivulet/schema/__init__.py +0 -0
  142. deltacat/storage/rivulet/schema/datatype.py +128 -0
  143. deltacat/storage/rivulet/schema/schema.py +251 -0
  144. deltacat/storage/rivulet/serializer.py +40 -0
  145. deltacat/storage/rivulet/serializer_factory.py +42 -0
  146. deltacat/storage/rivulet/writer/__init__.py +0 -0
  147. deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
  148. deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
  149. deltacat/tests/_io/__init__.py +1 -0
  150. deltacat/tests/catalog/test_catalogs.py +324 -0
  151. deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
  152. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  153. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  154. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  155. deltacat/tests/compute/compact_partition_test_cases.py +19 -53
  156. deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
  157. deltacat/tests/compute/compactor/utils/test_io.py +6 -8
  158. deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
  159. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
  160. deltacat/tests/compute/conftest.py +75 -0
  161. deltacat/tests/compute/converter/__init__.py +0 -0
  162. deltacat/tests/compute/converter/conftest.py +80 -0
  163. deltacat/tests/compute/converter/test_convert_session.py +478 -0
  164. deltacat/tests/compute/converter/utils.py +123 -0
  165. deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
  166. deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
  167. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
  168. deltacat/tests/compute/test_compact_partition_params.py +3 -3
  169. deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
  170. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
  171. deltacat/tests/compute/test_util_common.py +19 -12
  172. deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
  173. deltacat/tests/local_deltacat_storage/__init__.py +76 -103
  174. deltacat/tests/storage/__init__.py +0 -0
  175. deltacat/tests/storage/conftest.py +25 -0
  176. deltacat/tests/storage/main/__init__.py +0 -0
  177. deltacat/tests/storage/main/test_main_storage.py +1399 -0
  178. deltacat/tests/storage/model/__init__.py +0 -0
  179. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  180. deltacat/tests/storage/model/test_metafile_io.py +2535 -0
  181. deltacat/tests/storage/model/test_schema.py +308 -0
  182. deltacat/tests/storage/model/test_shard.py +22 -0
  183. deltacat/tests/storage/model/test_table_version.py +110 -0
  184. deltacat/tests/storage/model/test_transaction.py +308 -0
  185. deltacat/tests/storage/rivulet/__init__.py +0 -0
  186. deltacat/tests/storage/rivulet/conftest.py +149 -0
  187. deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
  188. deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
  189. deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
  190. deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
  191. deltacat/tests/storage/rivulet/test_dataset.py +406 -0
  192. deltacat/tests/storage/rivulet/test_manifest.py +67 -0
  193. deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
  194. deltacat/tests/storage/rivulet/test_utils.py +122 -0
  195. deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
  196. deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
  197. deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
  198. deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  199. deltacat/tests/test_deltacat_api.py +39 -0
  200. deltacat/tests/test_utils/filesystem.py +14 -0
  201. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  202. deltacat/tests/test_utils/pyarrow.py +8 -15
  203. deltacat/tests/test_utils/storage.py +266 -3
  204. deltacat/tests/utils/test_daft.py +3 -3
  205. deltacat/tests/utils/test_pyarrow.py +0 -432
  206. deltacat/types/partial_download.py +1 -1
  207. deltacat/types/tables.py +1 -1
  208. deltacat/utils/export.py +59 -0
  209. deltacat/utils/filesystem.py +320 -0
  210. deltacat/utils/metafile_locator.py +73 -0
  211. deltacat/utils/pyarrow.py +36 -183
  212. deltacat-2.0.dist-info/METADATA +65 -0
  213. deltacat-2.0.dist-info/RECORD +347 -0
  214. deltacat/aws/redshift/__init__.py +0 -19
  215. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  216. deltacat/io/dataset.py +0 -73
  217. deltacat/io/read_api.py +0 -143
  218. deltacat/storage/model/delete_parameters.py +0 -40
  219. deltacat/storage/model/partition_spec.py +0 -71
  220. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
  221. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
  222. deltacat-1.1.36.dist-info/METADATA +0 -64
  223. deltacat-1.1.36.dist-info/RECORD +0 -219
  224. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  225. /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
  226. /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
  227. /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
  228. /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
  229. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  230. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  231. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  232. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  233. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  234. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
  235. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
  236. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -1,20 +1,42 @@
1
- # Allow classes to use self-referencing Type hints in Python 3.7.
2
1
  from __future__ import annotations
3
2
 
4
- import itertools
5
3
  import logging
6
- from typing import Any, Dict, List, Optional
7
- from uuid import uuid4
4
+ import itertools
5
+
8
6
  from enum import Enum
7
+ from typing import Optional, List, Dict, Any
8
+ from uuid import uuid4
9
9
 
10
10
  from deltacat import logs
11
11
 
12
+ from deltacat.storage.model.schema import FieldLocator
13
+
12
14
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
13
15
 
14
16
 
15
17
  class EntryType(str, Enum):
16
18
  """
17
- Enum representing all possible content categories of an manifest entry file
19
+ Enum representing all possible content categories of a manifest entry file.
20
+
21
+ DATA: The entry contains fully qualified records compliant with the parent
22
+ table's schema to insert and/or update. Data files for upsert Deltas use
23
+ this entry's parameters to find matching fields to update. If no entry
24
+ parameters are specified, then the parent table's primary keys are used.
25
+ Only records from entries in Deltas with lower stream positions than this
26
+ entry will be targeted for update.
27
+
28
+ POSITIONAL_DELETE: The entry contains pointers to records in other entries
29
+ to delete. Deleted records will be filtered from query results at runtime.
30
+
31
+ EQUALITY_DELETE: The entry contains a subset of field values from the
32
+ table records to find and delete. The full record of any matching data
33
+ entries in Deltas with a lower stream position than this entry's Delta
34
+ will be deleted. The fields used for record discovery are controlled by
35
+ this entry's parameters. If no entry parameters are specified, then the
36
+ fields used for record discovery are linked to the parent table's merge
37
+ keys. The entry may contain additional fields not used for delete record
38
+ discovery which will be ignored. Deleted records will be filtered from
39
+ query results at runtime.
18
40
  """
19
41
 
20
42
  DATA = "data"
@@ -30,46 +52,40 @@ class EntryType(str, Enum):
30
52
  return [c.value for c in EntryType]
31
53
 
32
54
 
33
- class EntryFileParams(dict):
55
+ class EntryParams(dict):
34
56
  """
35
- Represents parameters relevant to the underlying contents of manifest entry. Contains all parameters required to support DELETEs
36
- equality_column_names: List of column names that would be used to determine row equality for equality deletes. Relevant only to equality deletes
37
- position: Ordinal position of a deleted row in the target data file identified by uri, starting at 0. Relevant only to positional deletes
57
+ Parameters that control manifest entry interpretation.
58
+
59
+ For EQUALITY_DELETE manifest entry types, parameters include equality
60
+ field identifiers.
38
61
  """
39
62
 
40
63
  @staticmethod
41
64
  def of(
42
- equality_column_names: Optional[List[str]] = None,
43
- position: Optional[int] = None,
44
- ) -> EntryFileParams:
45
- entry_file_params = EntryFileParams()
46
- if equality_column_names is not None:
47
- entry_file_params["equality_column_names"] = equality_column_names
48
- if position is not None:
49
- entry_file_params["position"] = position
50
- return entry_file_params
65
+ equality_field_locators: Optional[List[FieldLocator]] = None,
66
+ ) -> EntryParams:
67
+ params = EntryParams()
68
+ if equality_field_locators is not None:
69
+ params["equality_field_locators"] = equality_field_locators
70
+ return params
51
71
 
52
72
  @property
53
- def equality_column_names(self) -> Optional[List[str]]:
54
- return self.get("equality_column_names")
55
-
56
- @property
57
- def url(self) -> Optional[str]:
58
- return self.get("url")
59
-
60
- @property
61
- def position(self) -> Optional[int]:
62
- return self.get("position")
73
+ def equality_field_locators(self) -> Optional[List[FieldLocator]]:
74
+ return self.get("equality_field_locators")
63
75
 
64
76
 
65
77
  class Manifest(dict):
78
+ """
79
+ A DeltaCAT manifest contains metadata common to multiple manifest formats
80
+ like Amazon Redshift and Apache Iceberg to simplify dataset import/export.
81
+ """
82
+
66
83
  @staticmethod
67
84
  def _build_manifest(
68
85
  meta: Optional[ManifestMeta],
69
86
  entries: Optional[ManifestEntryList],
70
87
  author: Optional[ManifestAuthor] = None,
71
88
  uuid: str = None,
72
- entry_type: Optional[EntryType] = None,
73
89
  ) -> Manifest:
74
90
  if not uuid:
75
91
  uuid = str(uuid4())
@@ -81,8 +97,6 @@ class Manifest(dict):
81
97
  manifest["entries"] = entries
82
98
  if author is not None:
83
99
  manifest["author"] = author
84
- if entry_type is not None:
85
- manifest["entry_type"] = entry_type.value
86
100
  return manifest
87
101
 
88
102
  @staticmethod
@@ -91,6 +105,7 @@ class Manifest(dict):
91
105
  author: Optional[ManifestAuthor] = None,
92
106
  uuid: str = None,
93
107
  entry_type: Optional[EntryType] = None,
108
+ entry_params: Optional[EntryParams] = None,
94
109
  ) -> Manifest:
95
110
  if not uuid:
96
111
  uuid = str(uuid4())
@@ -99,11 +114,13 @@ class Manifest(dict):
99
114
  total_source_content_length = 0
100
115
  content_type = None
101
116
  content_encoding = None
102
- partition_values_set = set()
103
- partition_values = None
117
+ credentials = None
118
+ content_type_params = None
104
119
  if entries:
105
120
  content_type = entries[0].meta.content_type
106
121
  content_encoding = entries[0].meta.content_encoding
122
+ credentials = entries[0].meta.credentials
123
+ content_type_params = entries[0].meta.content_type_parameters
107
124
  for entry in entries:
108
125
  meta = entry.meta
109
126
  if meta.content_type != content_type:
@@ -118,7 +135,7 @@ class Manifest(dict):
118
135
  f"'{entry_content_type}'"
119
136
  )
120
137
  raise ValueError(msg)
121
- entry_content_encoding = meta["content_encoding"]
138
+ entry_content_encoding = meta.get("content_encoding", None)
122
139
  if entry_content_encoding != content_encoding:
123
140
  msg = (
124
141
  f"Expected all manifest entries to have content "
@@ -126,25 +143,53 @@ class Manifest(dict):
126
143
  f"'{entry_content_encoding}'"
127
144
  )
128
145
  raise ValueError(msg)
146
+ actual_entry_type = meta.entry_type
147
+ if entry_type and (actual_entry_type != entry_type):
148
+ msg = (
149
+ f"Expected all manifest entries to have type "
150
+ f"'{entry_type}' but found '{actual_entry_type}'"
151
+ )
152
+ raise ValueError(msg)
153
+ actual_entry_params = meta.entry_params
154
+ if entry_params and (actual_entry_params != entry_params):
155
+ msg = (
156
+ f"Expected all manifest entries to have params "
157
+ f"'{entry_params}' but found '{actual_entry_params}'"
158
+ )
159
+ raise ValueError(msg)
160
+ actual_credentials = meta.credentials
161
+ if credentials and (actual_credentials != credentials):
162
+ msg = (
163
+ f"Expected all manifest entries to have credentials "
164
+ f"'{credentials}' but found '{actual_credentials}'"
165
+ )
166
+ raise ValueError(msg)
167
+ actual_content_type_params = meta.content_type_parameters
168
+ if content_type_params and (
169
+ actual_content_type_params != content_type_params
170
+ ):
171
+ msg = (
172
+ f"Expected all manifest entries to have content type params "
173
+ f"'{content_type_params}' but found '{actual_content_type_params}'"
174
+ )
175
+ raise ValueError(msg)
176
+
129
177
  total_record_count += meta.record_count or 0
130
178
  total_content_length += meta.content_length or 0
131
179
  total_source_content_length += meta.source_content_length or 0
132
- if len(partition_values_set) <= 1:
133
- partition_values_set.add(entry.meta.partition_values)
134
-
135
- if len(partition_values_set) == 1:
136
- partition_values = partition_values_set.pop()
137
180
 
138
181
  meta = ManifestMeta.of(
139
- total_record_count,
140
- total_content_length,
141
- content_type,
142
- content_encoding,
143
- total_source_content_length,
182
+ record_count=total_record_count,
183
+ content_length=total_content_length,
184
+ content_type=content_type,
185
+ content_encoding=content_encoding,
186
+ source_content_length=total_source_content_length,
187
+ credentials=credentials,
188
+ content_type_parameters=content_type_params,
144
189
  entry_type=entry_type,
145
- partition_values=partition_values,
190
+ entry_params=entry_params,
146
191
  )
147
- manifest = Manifest._build_manifest(meta, entries, author, uuid, entry_type)
192
+ manifest = Manifest._build_manifest(meta, entries, author, uuid)
148
193
  return manifest
149
194
 
150
195
  @staticmethod
@@ -194,7 +239,7 @@ class ManifestMeta(dict):
194
239
  credentials: Optional[Dict[str, str]] = None,
195
240
  content_type_parameters: Optional[List[Dict[str, str]]] = None,
196
241
  entry_type: Optional[EntryType] = None,
197
- partition_values: Optional[List[str]] = None,
242
+ entry_params: Optional[EntryParams] = None,
198
243
  ) -> ManifestMeta:
199
244
  manifest_meta = ManifestMeta()
200
245
  if record_count is not None:
@@ -212,9 +257,11 @@ class ManifestMeta(dict):
212
257
  if credentials is not None:
213
258
  manifest_meta["credentials"] = credentials
214
259
  if entry_type is not None:
215
- manifest_meta["entry_type"] = entry_type.value
216
- if partition_values is not None:
217
- manifest_meta["partition_values"] = partition_values
260
+ manifest_meta["entry_type"] = (
261
+ entry_type.value if isinstance(entry_type, EntryType) else entry_type
262
+ )
263
+ if entry_params is not None:
264
+ manifest_meta["entry_params"] = entry_params
218
265
  return manifest_meta
219
266
 
220
267
  @property
@@ -257,27 +304,11 @@ class ManifestMeta(dict):
257
304
  return val
258
305
 
259
306
  @property
260
- def partition_values(self) -> Optional[List[str]]:
261
- return self.get("partition_values")
262
-
263
-
264
- class ManifestAuthor(dict):
265
- @staticmethod
266
- def of(name: Optional[str], version: Optional[str]) -> ManifestAuthor:
267
- manifest_author = ManifestAuthor()
268
- if name is not None:
269
- manifest_author["name"] = name
270
- if version is not None:
271
- manifest_author["version"] = version
272
- return manifest_author
273
-
274
- @property
275
- def name(self) -> Optional[str]:
276
- return self.get("name")
277
-
278
- @property
279
- def version(self) -> Optional[str]:
280
- return self.get("version")
307
+ def entry_params(self) -> Optional[EntryParams]:
308
+ val: Dict[str, Any] = self.get("entry_params")
309
+ if val is not None and not isinstance(val, EntryParams):
310
+ self["entry_params"] = val = EntryParams(val)
311
+ return val
281
312
 
282
313
 
283
314
  class ManifestEntry(dict):
@@ -288,8 +319,6 @@ class ManifestEntry(dict):
288
319
  mandatory: bool = True,
289
320
  uri: Optional[str] = None,
290
321
  uuid: Optional[str] = None,
291
- entry_type: Optional[EntryType] = None,
292
- entry_file_params: Optional[EntryFileParams] = None,
293
322
  ) -> ManifestEntry:
294
323
  manifest_entry = ManifestEntry()
295
324
  if not (uri or url):
@@ -306,16 +335,6 @@ class ManifestEntry(dict):
306
335
  manifest_entry["mandatory"] = mandatory
307
336
  if uuid is not None:
308
337
  manifest_entry["id"] = uuid
309
- if entry_type is not None:
310
- manifest_entry["entry_type"] = entry_type.value
311
- if entry_file_params is not None:
312
- if entry_file_params.get("url") != manifest_entry.get("url"):
313
- msg = (
314
- f"Expected manifest entry url: {manifest_entry.url}"
315
- f" and entry_file_params: '{entry_file_params.url}' to match"
316
- )
317
- raise ValueError(msg)
318
- manifest_entry["entry_file_params"] = entry_file_params
319
338
  return manifest_entry
320
339
 
321
340
  @staticmethod
@@ -330,11 +349,11 @@ class ManifestEntry(dict):
330
349
  s3_obj = s3_utils.get_object_at_url(url, **s3_client_kwargs)
331
350
  logger.debug(f"Building manifest entry from {url}: {s3_obj}")
332
351
  manifest_entry_meta = ManifestMeta.of(
333
- record_count,
334
- s3_obj["ContentLength"],
335
- s3_obj["ContentType"],
336
- s3_obj["ContentEncoding"],
337
- source_content_length,
352
+ record_count=record_count,
353
+ content_length=s3_obj["ContentLength"],
354
+ content_type=s3_obj["ContentType"],
355
+ content_encoding=s3_obj["ContentEncoding"],
356
+ source_content_length=source_content_length,
338
357
  )
339
358
  manifest_entry = ManifestEntry.of(url, manifest_entry_meta)
340
359
  return manifest_entry
@@ -362,19 +381,24 @@ class ManifestEntry(dict):
362
381
  def id(self) -> Optional[str]:
363
382
  return self.get("id")
364
383
 
384
+
385
+ class ManifestAuthor(dict):
386
+ @staticmethod
387
+ def of(name: Optional[str], version: Optional[str]) -> ManifestAuthor:
388
+ manifest_author = ManifestAuthor()
389
+ if name is not None:
390
+ manifest_author["name"] = name
391
+ if version is not None:
392
+ manifest_author["version"] = version
393
+ return manifest_author
394
+
365
395
  @property
366
- def entry_type(self) -> Optional[EntryType]:
367
- val = self.get("entry_type")
368
- if val is not None:
369
- return EntryType(self["entry_type"])
370
- return val
396
+ def name(self) -> Optional[str]:
397
+ return self.get("name")
371
398
 
372
399
  @property
373
- def entry_file_params(self) -> Optional[EntryFileParams]:
374
- val: Dict[str, Any] = self.get("entry_file_params")
375
- if val is not None and not isinstance(val, EntryFileParams):
376
- self["entry_file_params"] = val = EntryFileParams(val)
377
- return val
400
+ def version(self) -> Optional[str]:
401
+ return self.get("version")
378
402
 
379
403
 
380
404
  class ManifestEntryList(List[ManifestEntry]):