cloe-nessy 0.3.13.4b0__py3-none-any.whl → 0.3.13.6b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ from pydantic import BaseModel, Field, field_validator, model_validator
5
5
 
6
6
  COLUMN_DATA_TYPE_LIST = {
7
7
  "string",
8
+ "decimal",
8
9
  "integer",
9
10
  "int",
10
11
  "smallint",
@@ -43,7 +44,7 @@ class Column(BaseModel):
43
44
  """
44
45
  val = raw.lower()
45
46
  base_data_types = re.findall(r"\b[a-z]+\b", val)
46
- forbidden_characters = re.findall(r"[^a-z\<\>)]+", val)
47
+ forbidden_characters = re.findall(r"[^a-z0-9\(\)\<\>,\s]+", val)
47
48
 
48
49
  if forbidden_characters:
49
50
  raise ValueError(f"Forbidden characters in data type definition [ '{val}' ]: [' {forbidden_characters} ']")
@@ -13,6 +13,14 @@ USING delta
13
13
  {% if table.storage_path %}
14
14
  LOCATION '{{ table.storage_path }}'
15
15
  {% endif %}
16
+ {% if table.properties %}
17
+ TBLPROPERTIES (
18
+ {%- for key, value in table.properties.items() %}
19
+ {%- if not loop.first %}, {% endif -%}
20
+ '{{key}}' = '{{value}}'
21
+ {%- endfor -%}
22
+ )
23
+ {% endif %}
16
24
  {% if table.partition_by -%}
17
25
  {%- if table.liquid_clustering -%} CLUSTER {%- else -%} PARTITIONED {%- endif %} BY (
18
26
  {%- for column in table.partition_by -%}
@@ -0,0 +1,87 @@
1
+ from typing import Any
2
+
3
+ from pyspark.errors.exceptions.base import IllegalArgumentException
4
+ from pyspark.sql import functions as F
5
+
6
+ from ...pipeline import PipelineAction, PipelineContext
7
+
8
+
9
+ class TransformConvertTimestampAction(PipelineAction):
10
+ """This class implements a Transform action for an ETL pipeline.
11
+
12
+ This action performs timestamp based conversions.
13
+
14
+ Example:
15
+ ```yaml
16
+ Convert Timestamp:
17
+ action: TRANSFORM_CONVERT_TIMESTAMP
18
+ options:
19
+ column: my_timestamp_column
20
+ source_format: unixtime
21
+ target_format: yyyy-MM-dd HH:mm:ss
22
+ ```
23
+ """
24
+
25
+ name: str = "TRANSFORM_CONVERT_TIMESTAMP"
26
+
27
+ def run(
28
+ self,
29
+ context: PipelineContext,
30
+ *,
31
+ column: str = "",
32
+ source_format: str = "",
33
+ target_format: str = "",
34
+ **_: Any,
35
+ ) -> PipelineContext:
36
+ """Converts a column from a given source format to a new format.
37
+
38
+ Args:
39
+ context: Context in which this Action is executed.
40
+ column: The column that should be converted.
41
+ source_format: Initial format type of the column.
42
+ target_format: Desired format type of the column. This also supports
43
+ passing a format string like 'yyyy-MM-dd HH:mm:ss'.
44
+
45
+ Raises:
46
+ ValueError: If no column, source_format and target_format are provided.
47
+ ValueError: If source_format or target_format are not supported.
48
+
49
+ Returns:
50
+ PipelineContext: Context after the execution of this Action.
51
+ """
52
+ if not column:
53
+ raise ValueError("No column provided.")
54
+ if not source_format:
55
+ raise ValueError("No source_format provided.")
56
+ if not target_format:
57
+ raise ValueError("No target_format provided.")
58
+ if context.data is None:
59
+ raise ValueError("Context DataFrame is required.")
60
+ df = context.data
61
+
62
+ match source_format:
63
+ # convert always to timestamp first
64
+ case "unixtime":
65
+ df = df.withColumn(column, F.from_unixtime(F.col(column)))
66
+ case "unixtime_ms":
67
+ df = df.withColumn(column, F.to_timestamp(F.col(column) / 1000))
68
+ case "string":
69
+ df = df.withColumn(column, F.to_timestamp(F.col(column)))
70
+ case "timestamp":
71
+ pass
72
+ case _:
73
+ raise ValueError(f"Unknown source_format {source_format}")
74
+
75
+ match target_format:
76
+ # convert from timestamp to desired output format
77
+ case "timestamp":
78
+ pass
79
+ case "unixtime":
80
+ df = df.withColumn(column, F.to_unix_timestamp(F.col(column)))
81
+ case _:
82
+ try:
83
+ df = df.withColumn(column, F.date_format(F.col(column), target_format))
84
+ except IllegalArgumentException as e:
85
+ raise ValueError(f"Invalid target_format {target_format}") from e
86
+
87
+ return context.from_existing(data=df)
@@ -13,20 +13,49 @@ class TransformJoinAction(PipelineAction):
13
13
  from [PySpark
14
14
  documentation](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.join.html)
15
15
 
16
- Example:
17
- ```yaml
18
- Join Tables:
19
- action: TRANSFORM_JOIN
20
- options:
21
- joined_data: ((step:Transform First Table))
22
- join_on: id
23
- how: anti
24
- ```
16
+ Examples:
17
+ === "Simple Column Join"
18
+ ```yaml
19
+ Join Tables:
20
+ action: TRANSFORM_JOIN
21
+ options:
22
+ joined_data: ((step:Transform First Table))
23
+ join_on: id
24
+ how: inner
25
+ ```
26
+
27
+ === "Multiple Columns Join"
28
+ ```yaml
29
+ Join Tables:
30
+ action: TRANSFORM_JOIN
31
+ options:
32
+ joined_data: ((step:Transform First Table))
33
+ join_on: [customer_id, order_date]
34
+ how: left
35
+ ```
36
+
37
+ === "Dictionary Join (Different Column Names)"
38
+ ```yaml
39
+ Join Tables:
40
+ action: TRANSFORM_JOIN
41
+ options:
42
+ joined_data: ((step:Transform First Table))
43
+ join_on:
44
+ customer_id: cust_id
45
+ order_date: date
46
+ how: inner
47
+ ```
25
48
 
26
49
  !!! note "Referencing a DataFrame from another step"
27
50
  The `joined_data` parameter is a reference to the DataFrame from another step.
28
51
  The DataFrame is accessed using the `result` attribute of the PipelineStep. The syntax
29
52
  for referencing the DataFrame is `((step:Step Name))`, mind the double parentheses.
53
+
54
+ !!! tip "Dictionary Join Syntax"
55
+ When using a dictionary for `join_on`, the keys represent columns
56
+ from the DataFrame in context and the values represent columns from
57
+ the DataFrame in `joined_data` This is useful when joining tables
58
+ with different column names for the same logical entity.
30
59
  """
31
60
 
32
61
  name: str = "TRANSFORM_JOIN"
@@ -22,8 +22,8 @@ class TransformUnionAction(PipelineAction):
22
22
  action: TRANSFORM_UNION
23
23
  options:
24
24
  union_data:
25
- - ((step: Filter First Table))
26
- - ((step: SQL Transform Second Table))
25
+ - ((step:Filter First Table))
26
+ - ((step:SQL Transform Second Table))
27
27
  ```
28
28
  !!! note "Referencing a DataFrame from another step"
29
29
  The `union_data` parameter is a reference to the DataFrame from another step.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 0.3.13.4b0
3
+ Version: 0.3.13.6b0
4
4
  Summary: Your friendly datalake monster.
5
5
  Project-URL: homepage, https://initions.com/
6
6
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
@@ -16,7 +16,8 @@ Requires-Python: <3.13,>=3.11
16
16
  Requires-Dist: azure-identity<2.0.0,>=1.19.0
17
17
  Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
18
18
  Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
19
- Requires-Dist: fsspec<2025.6.0,>=2025.5.1
19
+ Requires-Dist: delta-spark>=3.3.2
20
+ Requires-Dist: fsspec<2025.7.1,>=2025.7.0
20
21
  Requires-Dist: httpx<1.0.0,>=0.27.2
21
22
  Requires-Dist: jinja2<4.0.0,>=3.1.4
22
23
  Requires-Dist: matplotlib<4.0.0,>=3.9.2
@@ -38,7 +38,7 @@ cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZum
38
38
  cloe_nessy/logging/logger_mixin.py,sha256=9iy7BF6drYme-f7Rrt_imbVBRgVqQ89xjcP1X5aMtfY,7467
39
39
  cloe_nessy/models/__init__.py,sha256=-FmWEJ1Oq1njSopjc0R7GmT64mLSmALkm8PkHNzy9Y8,327
40
40
  cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,177
41
- cloe_nessy/models/column.py,sha256=53fBwRnino72XKACsHZpN9QfCBqqSXyKLHZlM0huumg,1988
41
+ cloe_nessy/models/column.py,sha256=8wR7E8PRhUc0dwM83IIlpz7kBncZim7J5FvQzd8R_Us,2012
42
42
  cloe_nessy/models/constraint.py,sha256=hsFlhn4n928z81O3dl3v5bMetewPWzMjkJK3_4kASSM,178
43
43
  cloe_nessy/models/foreign_key.py,sha256=DwRVHs9sShqqPV-NL7ow_3AmPPWX0Od26yZn_I565pU,1001
44
44
  cloe_nessy/models/schema.py,sha256=cNSrH7K4hLRrkg1E6fW6DUIBMZdR2A5B21POj5iQ4GA,3429
@@ -50,7 +50,7 @@ cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=a-14Ys-AevVYQd0xeJU1sy
50
50
  cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  cloe_nessy/models/mixins/read_instance_mixin.py,sha256=j5Y4aNWOh1jlskEaxNooZFJgPyxRmik00gAVLJnAaRs,4507
52
52
  cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
53
- cloe_nessy/models/templates/create_table.sql.j2,sha256=QWbiTXwmGaIlZUAIGL4pAlHkDbP9mq1vGAkdKCPOqm4,1669
53
+ cloe_nessy/models/templates/create_table.sql.j2,sha256=z-NNUJ61wqMZyNaKGBsrj6gqogo6CtEaS1rWoa8hUbw,1877
54
54
  cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ1cfDw_ra3_FKmOuLoBs,289
55
55
  cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
56
56
  cloe_nessy/object_manager/table_manager.py,sha256=suHx56TYXagaJ2dVkvTP7vwSI4xgTqXNkHYBbYh2pd4,13913
@@ -71,6 +71,7 @@ cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=i8fQceV63eAqx_x0ANisCkX
71
71
  cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
72
72
  cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=-CEdcXb7Fz5DQNitGlJ8EVBE_LzxfsInyCIO-D7b4iY,3042
73
73
  cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
74
+ cloe_nessy/pipeline/actions/transform_convert_timestamp.py,sha256=je6H-mtNeokU9W_-RCWaRCFvMhk4oQL9s60FVBrl8Po,3090
74
75
  cloe_nessy/pipeline/actions/transform_decode.py,sha256=JajMwHREtxa8u_1Q3RZDBVMjncoSel-WzQFVTO0MREg,4455
75
76
  cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=E0ypz9qkHMSatNfnHekP-E6svQVL149M4PV02M03drg,5099
76
77
  cloe_nessy/pipeline/actions/transform_distinct.py,sha256=c7aBxANyqT4aKhm0cSELDtD-bP0Se9vxlBF0K4AgQWs,1976
@@ -78,12 +79,12 @@ cloe_nessy/pipeline/actions/transform_filter.py,sha256=Nz_ggRfKIcNzYFfFOsgq1Qeat
78
79
  cloe_nessy/pipeline/actions/transform_generic_sql.py,sha256=_naWfmPdYAUKjPNeHu5qJAohOL7DHCSYz_kwoeRv3OI,2741
79
80
  cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=KUHeeP-RIDi34dpbsPEJkzea5zFJA6MuyjNpOsFud9o,4045
80
81
  cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=heRjBA-Gfu-nmNHOjTYlipEpKY8oNPAHAY40vjJk3aI,8383
81
- cloe_nessy/pipeline/actions/transform_join.py,sha256=e_tvMk8YJTAWcUK_EmOgNt0s31ICZoMX_MKOTWx4lBY,3645
82
+ cloe_nessy/pipeline/actions/transform_join.py,sha256=BjMn2h_Trq8l1n9R4QB55v1pAm0a9ft1vMLDBnHKG6g,4790
82
83
  cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=petF7pnNq1EKc8MqVdG0weFALAHNILSe_eAu4Z5XxIo,4833
83
84
  cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=4zJcPCONMU4C67qeuzsrX3AORRRHoq_selUI7FJyeg0,1952
84
85
  cloe_nessy/pipeline/actions/transform_replace_values.py,sha256=1OPHTrjcphfyGepcO7ozYfeqfwA18pjlyHpVKUS_AAU,2049
85
86
  cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm3BG9BX4_qUDJMbpj1DsKPY046w,4574
86
- cloe_nessy/pipeline/actions/transform_union.py,sha256=s81Vge0AbYPc7VkskCYfOQ_LEjqcmfNFyDkytfjcZyo,2720
87
+ cloe_nessy/pipeline/actions/transform_union.py,sha256=SZtEzh567CIExUj9yMEgshE28h4dXKT7Wr2TDj4zB4k,2718
87
88
  cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=j7gRuG3Fedh8JgevIFBbHKock3laJVq4l6Mx3CGU5eo,2676
88
89
  cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPexeK0XNXUfQscCwhXHxbKA,2498
89
90
  cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=gh3oD0ZGjDq0hw56NiRimK4HHCruDofqqdzFFgYLve8,5085
@@ -94,6 +95,6 @@ cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEv
94
95
  cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
95
96
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
97
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
97
- cloe_nessy-0.3.13.4b0.dist-info/METADATA,sha256=Z9Bby2T96l0nm453ZwbiAq3YDP0uau5GfPORfD9KE7E,3294
98
- cloe_nessy-0.3.13.4b0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
99
- cloe_nessy-0.3.13.4b0.dist-info/RECORD,,
98
+ cloe_nessy-0.3.13.6b0.dist-info/METADATA,sha256=Dn1ZY7mxS1qLW4vJAcOF-aYA9crg-4W7iMPTitd2ogs,3328
99
+ cloe_nessy-0.3.13.6b0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
100
+ cloe_nessy-0.3.13.6b0.dist-info/RECORD,,