cloe-nessy 0.3.16.3b0__py3-none-any.whl → 0.3.16.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloe_nessy/integration/reader/catalog_reader.py +0 -36
- cloe_nessy/integration/writer/catalog_writer.py +1 -63
- cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py +1 -5
- cloe_nessy/models/column.py +2 -3
- cloe_nessy/models/schema.py +0 -1
- cloe_nessy/models/templates/create_table.sql.j2 +0 -22
- cloe_nessy/object_manager/table_manager.py +7 -28
- cloe_nessy/pipeline/actions/read_catalog_table.py +10 -32
- cloe_nessy/pipeline/actions/read_metadata_yaml.py +33 -61
- cloe_nessy/pipeline/actions/transform_join.py +24 -98
- cloe_nessy/pipeline/actions/transform_union.py +2 -2
- cloe_nessy/pipeline/actions/write_catalog_table.py +19 -64
- cloe_nessy/pipeline/actions/write_delta_merge.py +0 -1
- cloe_nessy/pipeline/pipeline.py +8 -25
- cloe_nessy/pipeline/pipeline_plotting_service.py +340 -0
- cloe_nessy/session/session_manager.py +10 -10
- {cloe_nessy-0.3.16.3b0.dist-info → cloe_nessy-0.3.16.6.dist-info}/METADATA +17 -18
- {cloe_nessy-0.3.16.3b0.dist-info → cloe_nessy-0.3.16.6.dist-info}/RECORD +20 -19
- {cloe_nessy-0.3.16.3b0.dist-info → cloe_nessy-0.3.16.6.dist-info}/WHEEL +2 -1
- cloe_nessy-0.3.16.6.dist-info/top_level.txt +1 -0
- cloe_nessy/pipeline/actions/transform_convert_timestamp.py +0 -87
|
@@ -19,18 +19,18 @@ cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=urayKfOUpSaXKgTs1K
|
|
|
19
19
|
cloe_nessy/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
cloe_nessy/integration/reader/__init__.py,sha256=J5vlORqHLBpHEvzIwfIjzN5xEdOat-8jlmdLcGj8nsA,239
|
|
21
21
|
cloe_nessy/integration/reader/api_reader.py,sha256=3Mf-txOTJ1dXCzdNtRTLC8UKftKms4NxOoLVgzcc2eo,5691
|
|
22
|
-
cloe_nessy/integration/reader/catalog_reader.py,sha256=
|
|
22
|
+
cloe_nessy/integration/reader/catalog_reader.py,sha256=lwDeWBVXfFh75XknPawetL9ZBtqS-Oss5rNzbrEeIQg,2070
|
|
23
23
|
cloe_nessy/integration/reader/excel_reader.py,sha256=8KCqKBYFE6RGCiahJimQOAtbYZzaUzlnoslW9yca5P8,8035
|
|
24
24
|
cloe_nessy/integration/reader/exceptions.py,sha256=_A9jFpe_RIDZCGY76qzjic9bsshxns6yXPSl141dq1c,203
|
|
25
25
|
cloe_nessy/integration/reader/file_reader.py,sha256=3DcZhyyL-Cf_R7Px1UDHJwpO8Un31dWey2Q-f4DtWfY,6879
|
|
26
26
|
cloe_nessy/integration/reader/reader.py,sha256=e2KVPePQme8SBQJEbL-3zpGasOgTiEvKFTslow2wGPw,1034
|
|
27
27
|
cloe_nessy/integration/writer/__init__.py,sha256=3yzCAGiWZdQWtsbzlTih01sxVTJV2DDYwvl34lEAUlE,243
|
|
28
|
-
cloe_nessy/integration/writer/catalog_writer.py,sha256=
|
|
28
|
+
cloe_nessy/integration/writer/catalog_writer.py,sha256=Gb-hMdADgO_uUJ7mZPHBYyNme2qXsdFFnzwo7GcShHM,2192
|
|
29
29
|
cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70To4L6Q182pXx2HRM,5454
|
|
30
30
|
cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
|
|
31
31
|
cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
|
|
32
32
|
cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=TbpW-j87_H9dcUza34uR6VWslJez406y3_5N1ip0SnM,4740
|
|
33
|
-
cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=
|
|
33
|
+
cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=no2GOLqMAJd0fEy2mqMevMj_CvutcJPRmXJC2tD4icA,10112
|
|
34
34
|
cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=kiacqQ2FYQSzakJqZ9-ZHH3os4X7--QuER_2xx9y21k,971
|
|
35
35
|
cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=upUtDZMzwYFU0kzmkelVgkpFToXkrypcR3h_jvGjz14,8596
|
|
36
36
|
cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
|
|
@@ -38,10 +38,10 @@ cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZum
|
|
|
38
38
|
cloe_nessy/logging/logger_mixin.py,sha256=9iy7BF6drYme-f7Rrt_imbVBRgVqQ89xjcP1X5aMtfY,7467
|
|
39
39
|
cloe_nessy/models/__init__.py,sha256=-FmWEJ1Oq1njSopjc0R7GmT64mLSmALkm8PkHNzy9Y8,327
|
|
40
40
|
cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,177
|
|
41
|
-
cloe_nessy/models/column.py,sha256=
|
|
41
|
+
cloe_nessy/models/column.py,sha256=53fBwRnino72XKACsHZpN9QfCBqqSXyKLHZlM0huumg,1988
|
|
42
42
|
cloe_nessy/models/constraint.py,sha256=hsFlhn4n928z81O3dl3v5bMetewPWzMjkJK3_4kASSM,178
|
|
43
43
|
cloe_nessy/models/foreign_key.py,sha256=DwRVHs9sShqqPV-NL7ow_3AmPPWX0Od26yZn_I565pU,1001
|
|
44
|
-
cloe_nessy/models/schema.py,sha256=
|
|
44
|
+
cloe_nessy/models/schema.py,sha256=yUrjjEhAH5zbCymE67Az_jPnVB8hGO-_UNfqzeZCD_Y,3376
|
|
45
45
|
cloe_nessy/models/table.py,sha256=O9vcJ1XBIb6kA-NAI3SNpB5b7MGDo3p4wMJdonPaBfA,12076
|
|
46
46
|
cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
|
|
47
47
|
cloe_nessy/models/volume.py,sha256=51BE06FrL1Wv6zblFwJ_HTiR6WQqH7pSmrdH90rqwLg,2444
|
|
@@ -50,28 +50,28 @@ cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=a-14Ys-AevVYQd0xeJU1sy
|
|
|
50
50
|
cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
51
|
cloe_nessy/models/mixins/read_instance_mixin.py,sha256=j5Y4aNWOh1jlskEaxNooZFJgPyxRmik00gAVLJnAaRs,4507
|
|
52
52
|
cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
|
|
53
|
-
cloe_nessy/models/templates/create_table.sql.j2,sha256=
|
|
53
|
+
cloe_nessy/models/templates/create_table.sql.j2,sha256=QWbiTXwmGaIlZUAIGL4pAlHkDbP9mq1vGAkdKCPOqm4,1669
|
|
54
54
|
cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ1cfDw_ra3_FKmOuLoBs,289
|
|
55
55
|
cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
|
|
56
|
-
cloe_nessy/object_manager/table_manager.py,sha256=
|
|
56
|
+
cloe_nessy/object_manager/table_manager.py,sha256=m6u_KFYCPoqq1hagwt3s7eQopjV2oOJNlmXDVAfku-k,12703
|
|
57
57
|
cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
|
|
58
58
|
cloe_nessy/pipeline/__init__.py,sha256=sespmJ5JsgyiFyZiedTiL2kg--zGIX7cjTYsD5vemEg,325
|
|
59
|
-
cloe_nessy/pipeline/pipeline.py,sha256
|
|
59
|
+
cloe_nessy/pipeline/pipeline.py,sha256=L4wk3b06LNWRj01nnAkuQpeRrwFTyaV1xTpgYAg4sak,10819
|
|
60
60
|
cloe_nessy/pipeline/pipeline_action.py,sha256=S7IVFdmG12fRBzHuE_DiWn7qlMtApz6IloVd2Fj31Sg,1944
|
|
61
61
|
cloe_nessy/pipeline/pipeline_config.py,sha256=BN3ZSbr6bC-X9edoh-n5vRfPHFMbgtAU7mQ3dBrcWO8,3131
|
|
62
62
|
cloe_nessy/pipeline/pipeline_context.py,sha256=csElDc6BsynDUtRXgQOSCH7ONc_b-ag0YEg0zlQTz58,1874
|
|
63
63
|
cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=c_nAsgw81QYBM9AFiTxGgqRhNXABkDKplbeoCJPtbpE,6434
|
|
64
|
+
cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=goMQj73FzUVchKn5c2SsPcWR6fr7DtVkVrcQfJsKCq4,13111
|
|
64
65
|
cloe_nessy/pipeline/pipeline_step.py,sha256=UlnmpS6gm_dZ7m9dD1mZvye7mvUF_DA7HjOZo0oGYDU,1977
|
|
65
66
|
cloe_nessy/pipeline/actions/__init__.py,sha256=9gjSQKLGrPcaYaJrTYZde8d4yNrN1SoXN_DDHq5KrvY,2600
|
|
66
67
|
cloe_nessy/pipeline/actions/read_api.py,sha256=RBv5XeHtjTXuCP09Fqo6JNx6iIhQQI-nuAHCuSaGs2s,7778
|
|
67
|
-
cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=
|
|
68
|
+
cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=oXbqbc6BfR82dSIGclwzWiTN8EVmpFjNIYLKm4qOU50,2754
|
|
68
69
|
cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSiivbhWMglyBtkE,7961
|
|
69
70
|
cloe_nessy/pipeline/actions/read_files.py,sha256=N9bFgtG1tovhp2JayxE5YiN9PiO2lgG2-6h_Y6tD2eU,5220
|
|
70
|
-
cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=
|
|
71
|
+
cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hLOvHjhdk5zg1wVHE60m9k,2295
|
|
71
72
|
cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
|
|
72
73
|
cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=VxvWqENW63c50L96JA1V_ioe4By6gGzx_iY86njOXEM,3044
|
|
73
74
|
cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
|
|
74
|
-
cloe_nessy/pipeline/actions/transform_convert_timestamp.py,sha256=je6H-mtNeokU9W_-RCWaRCFvMhk4oQL9s60FVBrl8Po,3090
|
|
75
75
|
cloe_nessy/pipeline/actions/transform_decode.py,sha256=JajMwHREtxa8u_1Q3RZDBVMjncoSel-WzQFVTO0MREg,4455
|
|
76
76
|
cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=E0ypz9qkHMSatNfnHekP-E6svQVL149M4PV02M03drg,5099
|
|
77
77
|
cloe_nessy/pipeline/actions/transform_distinct.py,sha256=c7aBxANyqT4aKhm0cSELDtD-bP0Se9vxlBF0K4AgQWs,1976
|
|
@@ -79,22 +79,23 @@ cloe_nessy/pipeline/actions/transform_filter.py,sha256=Nz_ggRfKIcNzYFfFOsgq1Qeat
|
|
|
79
79
|
cloe_nessy/pipeline/actions/transform_generic_sql.py,sha256=_naWfmPdYAUKjPNeHu5qJAohOL7DHCSYz_kwoeRv3OI,2741
|
|
80
80
|
cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=KUHeeP-RIDi34dpbsPEJkzea5zFJA6MuyjNpOsFud9o,4045
|
|
81
81
|
cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=heRjBA-Gfu-nmNHOjTYlipEpKY8oNPAHAY40vjJk3aI,8383
|
|
82
|
-
cloe_nessy/pipeline/actions/transform_join.py,sha256=
|
|
82
|
+
cloe_nessy/pipeline/actions/transform_join.py,sha256=e_tvMk8YJTAWcUK_EmOgNt0s31ICZoMX_MKOTWx4lBY,3645
|
|
83
83
|
cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=petF7pnNq1EKc8MqVdG0weFALAHNILSe_eAu4Z5XxIo,4833
|
|
84
84
|
cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=4zJcPCONMU4C67qeuzsrX3AORRRHoq_selUI7FJyeg0,1952
|
|
85
85
|
cloe_nessy/pipeline/actions/transform_replace_values.py,sha256=1OPHTrjcphfyGepcO7ozYfeqfwA18pjlyHpVKUS_AAU,2049
|
|
86
86
|
cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm3BG9BX4_qUDJMbpj1DsKPY046w,4574
|
|
87
|
-
cloe_nessy/pipeline/actions/transform_union.py,sha256=
|
|
88
|
-
cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=
|
|
87
|
+
cloe_nessy/pipeline/actions/transform_union.py,sha256=s81Vge0AbYPc7VkskCYfOQ_LEjqcmfNFyDkytfjcZyo,2720
|
|
88
|
+
cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=j7gRuG3Fedh8JgevIFBbHKock3laJVq4l6Mx3CGU5eo,2676
|
|
89
89
|
cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPexeK0XNXUfQscCwhXHxbKA,2498
|
|
90
|
-
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=
|
|
90
|
+
cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=Hir7QZZZJ9hmQZXiJ9iz6u06OCmcHFpyKFVB_I1saSM,5043
|
|
91
91
|
cloe_nessy/pipeline/actions/write_file.py,sha256=H8LRst045yij-8XJ5pRB9m5d1lZpZjFa0WSVdSFesPo,2984
|
|
92
92
|
cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
|
|
93
|
-
cloe_nessy/session/session_manager.py,sha256=
|
|
93
|
+
cloe_nessy/session/session_manager.py,sha256=VCUPhACeN5armd4D0TqDeH4Ih9nu6XvXSREFqHUwt4s,9710
|
|
94
94
|
cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
|
|
95
95
|
cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
|
|
96
96
|
cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
97
97
|
cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
|
|
98
|
-
cloe_nessy-0.3.16.
|
|
99
|
-
cloe_nessy-0.3.16.
|
|
100
|
-
cloe_nessy-0.3.16.
|
|
98
|
+
cloe_nessy-0.3.16.6.dist-info/METADATA,sha256=YfBuBVqeRWjBTWlj4SQKyUVrc-PX78fK_MnHhO2MQv4,3292
|
|
99
|
+
cloe_nessy-0.3.16.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
100
|
+
cloe_nessy-0.3.16.6.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
|
|
101
|
+
cloe_nessy-0.3.16.6.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cloe_nessy
|
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
from typing import Any
|
|
2
|
-
|
|
3
|
-
from pyspark.errors.exceptions.base import IllegalArgumentException
|
|
4
|
-
from pyspark.sql import functions as F
|
|
5
|
-
|
|
6
|
-
from ...pipeline import PipelineAction, PipelineContext
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class TransformConvertTimestampAction(PipelineAction):
|
|
10
|
-
"""This class implements a Transform action for an ETL pipeline.
|
|
11
|
-
|
|
12
|
-
This action performs timestamp based conversions.
|
|
13
|
-
|
|
14
|
-
Example:
|
|
15
|
-
```yaml
|
|
16
|
-
Convert Timestamp:
|
|
17
|
-
action: TRANSFORM_CONVERT_TIMESTAMP
|
|
18
|
-
options:
|
|
19
|
-
column: my_timestamp_column
|
|
20
|
-
source_format: unixtime
|
|
21
|
-
target_format: yyyy-MM-dd HH:mm:ss
|
|
22
|
-
```
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
name: str = "TRANSFORM_CONVERT_TIMESTAMP"
|
|
26
|
-
|
|
27
|
-
def run(
|
|
28
|
-
self,
|
|
29
|
-
context: PipelineContext,
|
|
30
|
-
*,
|
|
31
|
-
column: str = "",
|
|
32
|
-
source_format: str = "",
|
|
33
|
-
target_format: str = "",
|
|
34
|
-
**_: Any,
|
|
35
|
-
) -> PipelineContext:
|
|
36
|
-
"""Converts a column from a given source format to a new format.
|
|
37
|
-
|
|
38
|
-
Args:
|
|
39
|
-
context: Context in which this Action is executed.
|
|
40
|
-
column: The column that should be converted.
|
|
41
|
-
source_format: Initial format type of the column.
|
|
42
|
-
target_format: Desired format type of the column. This also supports
|
|
43
|
-
passing a format string like 'yyyy-MM-dd HH:mm:ss'.
|
|
44
|
-
|
|
45
|
-
Raises:
|
|
46
|
-
ValueError: If no column, source_format and target_format are provided.
|
|
47
|
-
ValueError: If source_format or target_format are not supported.
|
|
48
|
-
|
|
49
|
-
Returns:
|
|
50
|
-
PipelineContext: Context after the execution of this Action.
|
|
51
|
-
"""
|
|
52
|
-
if not column:
|
|
53
|
-
raise ValueError("No column provided.")
|
|
54
|
-
if not source_format:
|
|
55
|
-
raise ValueError("No source_format provided.")
|
|
56
|
-
if not target_format:
|
|
57
|
-
raise ValueError("No target_format provided.")
|
|
58
|
-
if context.data is None:
|
|
59
|
-
raise ValueError("Context DataFrame is required.")
|
|
60
|
-
df = context.data
|
|
61
|
-
|
|
62
|
-
match source_format:
|
|
63
|
-
# convert always to timestamp first
|
|
64
|
-
case "unixtime":
|
|
65
|
-
df = df.withColumn(column, F.from_unixtime(F.col(column)))
|
|
66
|
-
case "unixtime_ms":
|
|
67
|
-
df = df.withColumn(column, F.to_timestamp(F.col(column) / 1000))
|
|
68
|
-
case "string":
|
|
69
|
-
df = df.withColumn(column, F.to_timestamp(F.col(column)))
|
|
70
|
-
case "timestamp":
|
|
71
|
-
pass
|
|
72
|
-
case _:
|
|
73
|
-
raise ValueError(f"Unknown source_format {source_format}")
|
|
74
|
-
|
|
75
|
-
match target_format:
|
|
76
|
-
# convert from timestamp to desired output format
|
|
77
|
-
case "timestamp":
|
|
78
|
-
pass
|
|
79
|
-
case "unixtime":
|
|
80
|
-
df = df.withColumn(column, F.to_unix_timestamp(F.col(column)))
|
|
81
|
-
case _:
|
|
82
|
-
try:
|
|
83
|
-
df = df.withColumn(column, F.date_format(F.col(column), target_format))
|
|
84
|
-
except IllegalArgumentException as e:
|
|
85
|
-
raise ValueError(f"Invalid target_format {target_format}") from e
|
|
86
|
-
|
|
87
|
-
return context.from_existing(data=df)
|