deltacat 1.1.36__py3-none-any.whl → 2.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +5 -9
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -7
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
- deltacat/compute/compactor_v2/steps/merge.py +17 -126
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/merge.py +0 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.0b2.dist-info/METADATA +65 -0
- deltacat-2.0.0b2.dist-info/RECORD +349 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.36.dist-info/METADATA +0 -64
- deltacat-1.1.36.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/LICENSE +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/WHEEL +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/top_level.txt +0 -0
@@ -1,20 +1,42 @@
|
|
1
|
-
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
1
|
from __future__ import annotations
|
3
2
|
|
4
|
-
import itertools
|
5
3
|
import logging
|
6
|
-
|
7
|
-
|
4
|
+
import itertools
|
5
|
+
|
8
6
|
from enum import Enum
|
7
|
+
from typing import Optional, List, Dict, Any
|
8
|
+
from uuid import uuid4
|
9
9
|
|
10
10
|
from deltacat import logs
|
11
11
|
|
12
|
+
from deltacat.storage.model.schema import FieldLocator
|
13
|
+
|
12
14
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
13
15
|
|
14
16
|
|
15
17
|
class EntryType(str, Enum):
|
16
18
|
"""
|
17
|
-
Enum representing all possible content categories of
|
19
|
+
Enum representing all possible content categories of a manifest entry file.
|
20
|
+
|
21
|
+
DATA: The entry contains fully qualified records compliant with the parent
|
22
|
+
table's schema to insert and/or update. Data files for upsert Deltas use
|
23
|
+
this entry's parameters to find matching fields to update. If no entry
|
24
|
+
parameters are specified, then the parent table's primary keys are used.
|
25
|
+
Only records from entries in Deltas with lower stream positions than this
|
26
|
+
entry will be targeted for update.
|
27
|
+
|
28
|
+
POSITIONAL_DELETE: The entry contains pointers to records in other entries
|
29
|
+
to delete. Deleted records will be filtered from query results at runtime.
|
30
|
+
|
31
|
+
EQUALITY_DELETE: The entry contains a subset of field values from the
|
32
|
+
table records to find and delete. The full record of any matching data
|
33
|
+
entries in Deltas with a lower stream position than this entry's Delta
|
34
|
+
will be deleted. The fields used for record discovery are controlled by
|
35
|
+
this entry's parameters. If no entry parameters are specified, then the
|
36
|
+
fields used for record discovery are linked to the parent table's merge
|
37
|
+
keys. The entry may contain additional fields not used for delete record
|
38
|
+
discovery which will be ignored. Deleted records will be filtered from
|
39
|
+
query results at runtime.
|
18
40
|
"""
|
19
41
|
|
20
42
|
DATA = "data"
|
@@ -30,46 +52,40 @@ class EntryType(str, Enum):
|
|
30
52
|
return [c.value for c in EntryType]
|
31
53
|
|
32
54
|
|
33
|
-
class
|
55
|
+
class EntryParams(dict):
|
34
56
|
"""
|
35
|
-
|
36
|
-
|
37
|
-
|
57
|
+
Parameters that control manifest entry interpretation.
|
58
|
+
|
59
|
+
For EQUALITY_DELETE manifest entry types, parameters include equality
|
60
|
+
field identifiers.
|
38
61
|
"""
|
39
62
|
|
40
63
|
@staticmethod
|
41
64
|
def of(
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
if position is not None:
|
49
|
-
entry_file_params["position"] = position
|
50
|
-
return entry_file_params
|
65
|
+
equality_field_locators: Optional[List[FieldLocator]] = None,
|
66
|
+
) -> EntryParams:
|
67
|
+
params = EntryParams()
|
68
|
+
if equality_field_locators is not None:
|
69
|
+
params["equality_field_locators"] = equality_field_locators
|
70
|
+
return params
|
51
71
|
|
52
72
|
@property
|
53
|
-
def
|
54
|
-
return self.get("
|
55
|
-
|
56
|
-
@property
|
57
|
-
def url(self) -> Optional[str]:
|
58
|
-
return self.get("url")
|
59
|
-
|
60
|
-
@property
|
61
|
-
def position(self) -> Optional[int]:
|
62
|
-
return self.get("position")
|
73
|
+
def equality_field_locators(self) -> Optional[List[FieldLocator]]:
|
74
|
+
return self.get("equality_field_locators")
|
63
75
|
|
64
76
|
|
65
77
|
class Manifest(dict):
|
78
|
+
"""
|
79
|
+
A DeltaCAT manifest contains metadata common to multiple manifest formats
|
80
|
+
like Amazon Redshift and Apache Iceberg to simplify dataset import/export.
|
81
|
+
"""
|
82
|
+
|
66
83
|
@staticmethod
|
67
84
|
def _build_manifest(
|
68
85
|
meta: Optional[ManifestMeta],
|
69
86
|
entries: Optional[ManifestEntryList],
|
70
87
|
author: Optional[ManifestAuthor] = None,
|
71
88
|
uuid: str = None,
|
72
|
-
entry_type: Optional[EntryType] = None,
|
73
89
|
) -> Manifest:
|
74
90
|
if not uuid:
|
75
91
|
uuid = str(uuid4())
|
@@ -81,8 +97,6 @@ class Manifest(dict):
|
|
81
97
|
manifest["entries"] = entries
|
82
98
|
if author is not None:
|
83
99
|
manifest["author"] = author
|
84
|
-
if entry_type is not None:
|
85
|
-
manifest["entry_type"] = entry_type.value
|
86
100
|
return manifest
|
87
101
|
|
88
102
|
@staticmethod
|
@@ -91,6 +105,7 @@ class Manifest(dict):
|
|
91
105
|
author: Optional[ManifestAuthor] = None,
|
92
106
|
uuid: str = None,
|
93
107
|
entry_type: Optional[EntryType] = None,
|
108
|
+
entry_params: Optional[EntryParams] = None,
|
94
109
|
) -> Manifest:
|
95
110
|
if not uuid:
|
96
111
|
uuid = str(uuid4())
|
@@ -99,11 +114,13 @@ class Manifest(dict):
|
|
99
114
|
total_source_content_length = 0
|
100
115
|
content_type = None
|
101
116
|
content_encoding = None
|
102
|
-
|
103
|
-
|
117
|
+
credentials = None
|
118
|
+
content_type_params = None
|
104
119
|
if entries:
|
105
120
|
content_type = entries[0].meta.content_type
|
106
121
|
content_encoding = entries[0].meta.content_encoding
|
122
|
+
credentials = entries[0].meta.credentials
|
123
|
+
content_type_params = entries[0].meta.content_type_parameters
|
107
124
|
for entry in entries:
|
108
125
|
meta = entry.meta
|
109
126
|
if meta.content_type != content_type:
|
@@ -118,7 +135,7 @@ class Manifest(dict):
|
|
118
135
|
f"'{entry_content_type}'"
|
119
136
|
)
|
120
137
|
raise ValueError(msg)
|
121
|
-
entry_content_encoding = meta
|
138
|
+
entry_content_encoding = meta.get("content_encoding", None)
|
122
139
|
if entry_content_encoding != content_encoding:
|
123
140
|
msg = (
|
124
141
|
f"Expected all manifest entries to have content "
|
@@ -126,25 +143,53 @@ class Manifest(dict):
|
|
126
143
|
f"'{entry_content_encoding}'"
|
127
144
|
)
|
128
145
|
raise ValueError(msg)
|
146
|
+
actual_entry_type = meta.entry_type
|
147
|
+
if entry_type and (actual_entry_type != entry_type):
|
148
|
+
msg = (
|
149
|
+
f"Expected all manifest entries to have type "
|
150
|
+
f"'{entry_type}' but found '{actual_entry_type}'"
|
151
|
+
)
|
152
|
+
raise ValueError(msg)
|
153
|
+
actual_entry_params = meta.entry_params
|
154
|
+
if entry_params and (actual_entry_params != entry_params):
|
155
|
+
msg = (
|
156
|
+
f"Expected all manifest entries to have params "
|
157
|
+
f"'{entry_params}' but found '{actual_entry_params}'"
|
158
|
+
)
|
159
|
+
raise ValueError(msg)
|
160
|
+
actual_credentials = meta.credentials
|
161
|
+
if credentials and (actual_credentials != credentials):
|
162
|
+
msg = (
|
163
|
+
f"Expected all manifest entries to have credentials "
|
164
|
+
f"'{credentials}' but found '{actual_credentials}'"
|
165
|
+
)
|
166
|
+
raise ValueError(msg)
|
167
|
+
actual_content_type_params = meta.content_type_parameters
|
168
|
+
if content_type_params and (
|
169
|
+
actual_content_type_params != content_type_params
|
170
|
+
):
|
171
|
+
msg = (
|
172
|
+
f"Expected all manifest entries to have content type params "
|
173
|
+
f"'{content_type_params}' but found '{actual_content_type_params}'"
|
174
|
+
)
|
175
|
+
raise ValueError(msg)
|
176
|
+
|
129
177
|
total_record_count += meta.record_count or 0
|
130
178
|
total_content_length += meta.content_length or 0
|
131
179
|
total_source_content_length += meta.source_content_length or 0
|
132
|
-
if len(partition_values_set) <= 1:
|
133
|
-
partition_values_set.add(entry.meta.partition_values)
|
134
|
-
|
135
|
-
if len(partition_values_set) == 1:
|
136
|
-
partition_values = partition_values_set.pop()
|
137
180
|
|
138
181
|
meta = ManifestMeta.of(
|
139
|
-
total_record_count,
|
140
|
-
total_content_length,
|
141
|
-
content_type,
|
142
|
-
content_encoding,
|
143
|
-
total_source_content_length,
|
182
|
+
record_count=total_record_count,
|
183
|
+
content_length=total_content_length,
|
184
|
+
content_type=content_type,
|
185
|
+
content_encoding=content_encoding,
|
186
|
+
source_content_length=total_source_content_length,
|
187
|
+
credentials=credentials,
|
188
|
+
content_type_parameters=content_type_params,
|
144
189
|
entry_type=entry_type,
|
145
|
-
|
190
|
+
entry_params=entry_params,
|
146
191
|
)
|
147
|
-
manifest = Manifest._build_manifest(meta, entries, author, uuid
|
192
|
+
manifest = Manifest._build_manifest(meta, entries, author, uuid)
|
148
193
|
return manifest
|
149
194
|
|
150
195
|
@staticmethod
|
@@ -194,7 +239,7 @@ class ManifestMeta(dict):
|
|
194
239
|
credentials: Optional[Dict[str, str]] = None,
|
195
240
|
content_type_parameters: Optional[List[Dict[str, str]]] = None,
|
196
241
|
entry_type: Optional[EntryType] = None,
|
197
|
-
|
242
|
+
entry_params: Optional[EntryParams] = None,
|
198
243
|
) -> ManifestMeta:
|
199
244
|
manifest_meta = ManifestMeta()
|
200
245
|
if record_count is not None:
|
@@ -212,9 +257,11 @@ class ManifestMeta(dict):
|
|
212
257
|
if credentials is not None:
|
213
258
|
manifest_meta["credentials"] = credentials
|
214
259
|
if entry_type is not None:
|
215
|
-
manifest_meta["entry_type"] =
|
216
|
-
|
217
|
-
|
260
|
+
manifest_meta["entry_type"] = (
|
261
|
+
entry_type.value if isinstance(entry_type, EntryType) else entry_type
|
262
|
+
)
|
263
|
+
if entry_params is not None:
|
264
|
+
manifest_meta["entry_params"] = entry_params
|
218
265
|
return manifest_meta
|
219
266
|
|
220
267
|
@property
|
@@ -257,27 +304,11 @@ class ManifestMeta(dict):
|
|
257
304
|
return val
|
258
305
|
|
259
306
|
@property
|
260
|
-
def
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
@staticmethod
|
266
|
-
def of(name: Optional[str], version: Optional[str]) -> ManifestAuthor:
|
267
|
-
manifest_author = ManifestAuthor()
|
268
|
-
if name is not None:
|
269
|
-
manifest_author["name"] = name
|
270
|
-
if version is not None:
|
271
|
-
manifest_author["version"] = version
|
272
|
-
return manifest_author
|
273
|
-
|
274
|
-
@property
|
275
|
-
def name(self) -> Optional[str]:
|
276
|
-
return self.get("name")
|
277
|
-
|
278
|
-
@property
|
279
|
-
def version(self) -> Optional[str]:
|
280
|
-
return self.get("version")
|
307
|
+
def entry_params(self) -> Optional[EntryParams]:
|
308
|
+
val: Dict[str, Any] = self.get("entry_params")
|
309
|
+
if val is not None and not isinstance(val, EntryParams):
|
310
|
+
self["entry_params"] = val = EntryParams(val)
|
311
|
+
return val
|
281
312
|
|
282
313
|
|
283
314
|
class ManifestEntry(dict):
|
@@ -288,8 +319,6 @@ class ManifestEntry(dict):
|
|
288
319
|
mandatory: bool = True,
|
289
320
|
uri: Optional[str] = None,
|
290
321
|
uuid: Optional[str] = None,
|
291
|
-
entry_type: Optional[EntryType] = None,
|
292
|
-
entry_file_params: Optional[EntryFileParams] = None,
|
293
322
|
) -> ManifestEntry:
|
294
323
|
manifest_entry = ManifestEntry()
|
295
324
|
if not (uri or url):
|
@@ -306,16 +335,6 @@ class ManifestEntry(dict):
|
|
306
335
|
manifest_entry["mandatory"] = mandatory
|
307
336
|
if uuid is not None:
|
308
337
|
manifest_entry["id"] = uuid
|
309
|
-
if entry_type is not None:
|
310
|
-
manifest_entry["entry_type"] = entry_type.value
|
311
|
-
if entry_file_params is not None:
|
312
|
-
if entry_file_params.get("url") != manifest_entry.get("url"):
|
313
|
-
msg = (
|
314
|
-
f"Expected manifest entry url: {manifest_entry.url}"
|
315
|
-
f" and entry_file_params: '{entry_file_params.url}' to match"
|
316
|
-
)
|
317
|
-
raise ValueError(msg)
|
318
|
-
manifest_entry["entry_file_params"] = entry_file_params
|
319
338
|
return manifest_entry
|
320
339
|
|
321
340
|
@staticmethod
|
@@ -330,11 +349,11 @@ class ManifestEntry(dict):
|
|
330
349
|
s3_obj = s3_utils.get_object_at_url(url, **s3_client_kwargs)
|
331
350
|
logger.debug(f"Building manifest entry from {url}: {s3_obj}")
|
332
351
|
manifest_entry_meta = ManifestMeta.of(
|
333
|
-
record_count,
|
334
|
-
s3_obj["ContentLength"],
|
335
|
-
s3_obj["ContentType"],
|
336
|
-
s3_obj["ContentEncoding"],
|
337
|
-
source_content_length,
|
352
|
+
record_count=record_count,
|
353
|
+
content_length=s3_obj["ContentLength"],
|
354
|
+
content_type=s3_obj["ContentType"],
|
355
|
+
content_encoding=s3_obj["ContentEncoding"],
|
356
|
+
source_content_length=source_content_length,
|
338
357
|
)
|
339
358
|
manifest_entry = ManifestEntry.of(url, manifest_entry_meta)
|
340
359
|
return manifest_entry
|
@@ -362,19 +381,24 @@ class ManifestEntry(dict):
|
|
362
381
|
def id(self) -> Optional[str]:
|
363
382
|
return self.get("id")
|
364
383
|
|
384
|
+
|
385
|
+
class ManifestAuthor(dict):
|
386
|
+
@staticmethod
|
387
|
+
def of(name: Optional[str], version: Optional[str]) -> ManifestAuthor:
|
388
|
+
manifest_author = ManifestAuthor()
|
389
|
+
if name is not None:
|
390
|
+
manifest_author["name"] = name
|
391
|
+
if version is not None:
|
392
|
+
manifest_author["version"] = version
|
393
|
+
return manifest_author
|
394
|
+
|
365
395
|
@property
|
366
|
-
def
|
367
|
-
|
368
|
-
if val is not None:
|
369
|
-
return EntryType(self["entry_type"])
|
370
|
-
return val
|
396
|
+
def name(self) -> Optional[str]:
|
397
|
+
return self.get("name")
|
371
398
|
|
372
399
|
@property
|
373
|
-
def
|
374
|
-
|
375
|
-
if val is not None and not isinstance(val, EntryFileParams):
|
376
|
-
self["entry_file_params"] = val = EntryFileParams(val)
|
377
|
-
return val
|
400
|
+
def version(self) -> Optional[str]:
|
401
|
+
return self.get("version")
|
378
402
|
|
379
403
|
|
380
404
|
class ManifestEntryList(List[ManifestEntry]):
|