cloe-nessy 0.3.5__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. cloe_nessy/__init__.py +0 -0
  2. cloe_nessy/clients/__init__.py +0 -0
  3. cloe_nessy/clients/api_client/__init__.py +0 -0
  4. cloe_nessy/clients/api_client/api_client.py +0 -0
  5. cloe_nessy/clients/api_client/api_response.py +0 -0
  6. cloe_nessy/clients/api_client/auth.py +0 -0
  7. cloe_nessy/clients/api_client/exceptions.py +0 -0
  8. cloe_nessy/file_utilities/__init__.py +0 -0
  9. cloe_nessy/file_utilities/exceptions.py +0 -0
  10. cloe_nessy/file_utilities/factory.py +0 -0
  11. cloe_nessy/file_utilities/get_file_paths.py +0 -0
  12. cloe_nessy/file_utilities/location_types.py +0 -0
  13. cloe_nessy/file_utilities/strategies/__init__.py +0 -0
  14. cloe_nessy/file_utilities/strategies/base_strategy.py +0 -0
  15. cloe_nessy/file_utilities/strategies/local_strategy.py +0 -0
  16. cloe_nessy/file_utilities/strategies/onelake_strategy.py +0 -0
  17. cloe_nessy/file_utilities/strategies/utils_strategy.py +0 -0
  18. cloe_nessy/integration/__init__.py +0 -0
  19. cloe_nessy/integration/reader/__init__.py +0 -0
  20. cloe_nessy/integration/reader/api_reader.py +4 -2
  21. cloe_nessy/integration/reader/catalog_reader.py +6 -3
  22. cloe_nessy/integration/reader/excel_reader.py +1 -1
  23. cloe_nessy/integration/reader/exceptions.py +0 -0
  24. cloe_nessy/integration/reader/file_reader.py +78 -5
  25. cloe_nessy/integration/reader/reader.py +0 -0
  26. cloe_nessy/integration/writer/__init__.py +8 -1
  27. cloe_nessy/integration/writer/catalog_writer.py +0 -0
  28. cloe_nessy/integration/writer/delta_writer/__init__.py +7 -0
  29. cloe_nessy/integration/writer/delta_writer/delta_append_writer.py +108 -0
  30. cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py +215 -0
  31. cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py +21 -0
  32. cloe_nessy/integration/writer/delta_writer/delta_writer_base.py +210 -0
  33. cloe_nessy/integration/writer/delta_writer/exceptions.py +4 -0
  34. cloe_nessy/integration/writer/file_writer.py +132 -0
  35. cloe_nessy/integration/writer/writer.py +54 -0
  36. cloe_nessy/logging/__init__.py +0 -0
  37. cloe_nessy/logging/logger_mixin.py +0 -0
  38. cloe_nessy/models/__init__.py +4 -0
  39. cloe_nessy/models/adapter/__init__.py +3 -0
  40. cloe_nessy/models/adapter/unity_catalog_adapter.py +296 -0
  41. cloe_nessy/models/catalog.py +10 -0
  42. cloe_nessy/models/column.py +0 -0
  43. cloe_nessy/models/constraint.py +0 -0
  44. cloe_nessy/models/foreign_key.py +0 -0
  45. cloe_nessy/models/mixins/__init__.py +0 -0
  46. cloe_nessy/models/mixins/read_instance_mixin.py +0 -0
  47. cloe_nessy/models/mixins/template_loader_mixin.py +0 -0
  48. cloe_nessy/models/schema.py +20 -1
  49. cloe_nessy/models/table.py +67 -11
  50. cloe_nessy/models/types.py +0 -0
  51. cloe_nessy/models/volume.py +67 -0
  52. cloe_nessy/object_manager/__init__.py +7 -2
  53. cloe_nessy/object_manager/table_manager.py +251 -21
  54. cloe_nessy/object_manager/volume_manager.py +70 -0
  55. cloe_nessy/pipeline/__init__.py +0 -0
  56. cloe_nessy/pipeline/actions/__init__.py +9 -1
  57. cloe_nessy/pipeline/actions/read_api.py +0 -0
  58. cloe_nessy/pipeline/actions/read_catalog_table.py +1 -4
  59. cloe_nessy/pipeline/actions/read_excel.py +0 -0
  60. cloe_nessy/pipeline/actions/read_files.py +0 -0
  61. cloe_nessy/pipeline/actions/read_metadata_yaml.py +0 -0
  62. cloe_nessy/pipeline/actions/transform_change_datatype.py +0 -0
  63. cloe_nessy/pipeline/actions/transform_clean_column_names.py +0 -0
  64. cloe_nessy/pipeline/actions/transform_concat_columns.py +0 -0
  65. cloe_nessy/pipeline/actions/transform_decode.py +0 -0
  66. cloe_nessy/pipeline/actions/transform_deduplication.py +0 -0
  67. cloe_nessy/pipeline/actions/transform_distinct.py +0 -0
  68. cloe_nessy/pipeline/actions/transform_filter.py +0 -0
  69. cloe_nessy/pipeline/actions/transform_generic_sql.py +0 -0
  70. cloe_nessy/pipeline/actions/transform_group_aggregate.py +0 -0
  71. cloe_nessy/pipeline/actions/transform_hash_columns.py +209 -0
  72. cloe_nessy/pipeline/actions/transform_join.py +0 -0
  73. cloe_nessy/pipeline/actions/transform_json_normalize.py +0 -0
  74. cloe_nessy/pipeline/actions/transform_rename_columns.py +0 -0
  75. cloe_nessy/pipeline/actions/transform_replace_values.py +0 -0
  76. cloe_nessy/pipeline/actions/transform_select_columns.py +0 -0
  77. cloe_nessy/pipeline/actions/transform_union.py +0 -0
  78. cloe_nessy/pipeline/actions/write_catalog_table.py +0 -0
  79. cloe_nessy/pipeline/actions/write_delta_append.py +69 -0
  80. cloe_nessy/pipeline/actions/write_delta_merge.py +118 -0
  81. cloe_nessy/pipeline/actions/write_file.py +94 -0
  82. cloe_nessy/pipeline/pipeline.py +44 -2
  83. cloe_nessy/pipeline/pipeline_action.py +0 -0
  84. cloe_nessy/pipeline/pipeline_config.py +0 -0
  85. cloe_nessy/pipeline/pipeline_context.py +0 -0
  86. cloe_nessy/pipeline/pipeline_parsing_service.py +0 -0
  87. cloe_nessy/pipeline/pipeline_step.py +0 -0
  88. cloe_nessy/py.typed +0 -0
  89. cloe_nessy/session/__init__.py +0 -0
  90. cloe_nessy/session/session_manager.py +27 -0
  91. cloe_nessy/settings/__init__.py +0 -0
  92. cloe_nessy/settings/settings.py +0 -0
  93. cloe_nessy/utils/__init__.py +0 -0
  94. cloe_nessy/utils/file_and_directory_handler.py +0 -0
  95. cloe_nessy-0.3.9.dist-info/METADATA +70 -0
  96. {cloe_nessy-0.3.5.dist-info → cloe_nessy-0.3.9.dist-info}/RECORD +35 -18
  97. {cloe_nessy-0.3.5.dist-info → cloe_nessy-0.3.9.dist-info}/WHEEL +1 -1
  98. {cloe_nessy-0.3.5.dist-info → cloe_nessy-0.3.9.dist-info}/top_level.txt +0 -0
  99. cloe_nessy-0.3.5.dist-info/METADATA +0 -26
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,69 @@
1
+ from typing import Any
2
+
3
+ from ...integration.writer import DeltaAppendWriter
4
+ from ...models.adapter import UnityCatalogAdapter
5
+ from ...pipeline import PipelineAction, PipelineContext
6
+
7
+
8
+ class WriteDeltaAppendAction(PipelineAction):
9
+ """This class implements an Append action for an ETL pipeline.
10
+
11
+ The WriteDeltaAppendAction appends a Dataframe to Delta Table.
12
+
13
+ Returns:
14
+ None.
15
+ """
16
+
17
+ name: str = "WRITE_DELTA_APPEND"
18
+
19
+ def run(
20
+ self,
21
+ context: PipelineContext,
22
+ *,
23
+ table_identifier: str | None = None,
24
+ ignore_empty_df: bool = False,
25
+ options: dict[str, Any] | None = None,
26
+ **_: Any,
27
+ ) -> PipelineContext:
28
+ """Merge the dataframe into the delta table.
29
+
30
+ Args:
31
+ context: Context in which this Action is executed.
32
+ table_identifier: The identifier of the table. If passed, the
33
+ UC Adapter will be used to create a table object. Otherwise the Table
34
+ object will be created from the table metadata in the context.
35
+ ignore_empty_df: A flag indicating whether to ignore an empty source dataframe.
36
+ options: Additional options for the append writer.
37
+
38
+ Raises:
39
+ ValueError: If the table does not exist.
40
+ ValueError: If the data is not set in the pipeline context.
41
+ ValueError: If the table metadata is empty.
42
+
43
+ Returns:
44
+ Pipeline Context
45
+ """
46
+ delta_append_writer = DeltaAppendWriter()
47
+
48
+ if context.data is None:
49
+ raise ValueError("Data is required for the append operation.")
50
+ if context.table_metadata is None and table_identifier is None:
51
+ raise ValueError("Table metadata or a table identifier are required for the append operation.")
52
+
53
+ if table_identifier is not None:
54
+ context.table_metadata = UnityCatalogAdapter().get_table_by_name(table_identifier)
55
+ else:
56
+ if context.table_metadata is None:
57
+ raise ValueError("Table metadata is required.")
58
+
59
+ if context.table_metadata is None:
60
+ raise ValueError("Table metadata is required.")
61
+
62
+ delta_append_writer.write(
63
+ table_identifier=context.table_metadata.identifier,
64
+ table_location=context.table_metadata.storage_path,
65
+ data_frame=context.data,
66
+ ignore_empty_df=ignore_empty_df,
67
+ options=options,
68
+ )
69
+ return context.from_existing()
@@ -0,0 +1,118 @@
1
+ from typing import Any
2
+
3
+ from ...integration.writer import DeltaMergeWriter
4
+ from ...models.adapter import UnityCatalogAdapter
5
+ from ...pipeline import PipelineAction, PipelineContext
6
+
7
+
8
+ class WriteDeltaMergeAction(PipelineAction):
9
+ """This class implements a Merge action for an ETL pipeline.
10
+
11
+ The MergeIntoDeltaAction merges a Dataframe to Delta Table.
12
+
13
+ Returns:
14
+ None.
15
+ """
16
+
17
+ name: str = "WRITE_DELTA_MERGE"
18
+
19
+ def run(
20
+ self,
21
+ context: PipelineContext,
22
+ *,
23
+ table_identifier: str | None = None,
24
+ key_columns: list[str] | None = None,
25
+ cols_to_update: list[str] | None = None,
26
+ cols_to_insert: list[str] | None = None,
27
+ cols_to_exclude: list[str] | None = None,
28
+ when_matched_update: bool = True,
29
+ when_matched_deleted: bool = False,
30
+ when_not_matched_insert: bool = True,
31
+ use_partition_pruning: bool = True,
32
+ ignore_empty_df: bool = False,
33
+ create_if_not_exists: bool = True,
34
+ refresh_table: bool = True,
35
+ **_: Any,
36
+ ) -> PipelineContext:
37
+ """Merge the dataframe into the delta table.
38
+
39
+ Args:
40
+ context: Context in which this Action is executed.
41
+ table_identifier: The identifier of the table. If passed, the
42
+ UC Adapter will be used to create a table object. Otherwise the Table
43
+ object will be created from the table metadata in the context.
44
+ key_columns: List of column names that form the
45
+ key for the merge operation.
46
+ when_matched_update: Flag to specify whether to
47
+ perform an update operation whenmatching records are found in
48
+ the target Delta table.
49
+ when_matched_deleted: Flag to specify whether to
50
+ perform a delete operation when matching records are found in
51
+ the target Delta table.
52
+ when_not_matched_insert: Flag to specify whether to perform an
53
+ insert operation when matching records are not found in the target
54
+ Delta table.
55
+ cols_to_update: List of column names to be
56
+ updated in the target Delta table.
57
+ cols_to_insert: List of column names to be
58
+ inserted into the target Delta table.
59
+ cols_to_exclude: List of column names to be
60
+ excluded from the merge operation.
61
+ use_partition_pruning: Flag to specify whether to use partition
62
+ pruning to optimize the performance of the merge operation.
63
+ ignore_empty_df: A flag indicating whether to ignore an empty source dataframe.
64
+ create_if_not_exists: Create the table if it not exists.
65
+ refresh_table: Refresh the table after the transaction.
66
+
67
+ Raises:
68
+ ValueError: If the table does not exist.
69
+ ValueError: If the data is not set in the pipeline context.
70
+ ValueError: If the table metadata is empty.
71
+
72
+ Returns:
73
+ Pipeline Context
74
+ """
75
+ delta_merge_writer = DeltaMergeWriter()
76
+
77
+ if context.data is None:
78
+ raise ValueError("Data is required for the merge operation.")
79
+ if context.table_metadata is None and table_identifier is None:
80
+ raise ValueError("Table metadata or a table identifier are required for the merge operation.")
81
+
82
+ if table_identifier is not None:
83
+ context.table_metadata = UnityCatalogAdapter().get_table_by_name(table_identifier)
84
+ else:
85
+ if context.table_metadata is None:
86
+ raise ValueError("Table metadata is required.")
87
+
88
+ if context.table_metadata is None:
89
+ raise ValueError("Table metadata is required.")
90
+
91
+ if create_if_not_exists:
92
+ delta_merge_writer.table_manager.create_table(table=context.table_metadata, ignore_if_exists=True)
93
+
94
+ if not delta_merge_writer.table_manager.table_exists(context.table_metadata):
95
+ raise ValueError(f"Table {context.table_metadata.name} does not exist.")
96
+
97
+ assert key_columns is not None, "Key columns must be provided."
98
+
99
+ delta_merge_writer.write(
100
+ table_identifier=context.table_metadata.identifier,
101
+ storage_path=str(context.table_metadata.storage_path),
102
+ data_frame=context.data,
103
+ key_columns=key_columns,
104
+ cols_to_update=cols_to_update,
105
+ cols_to_insert=cols_to_insert,
106
+ cols_to_exclude=cols_to_exclude,
107
+ when_matched_update=when_matched_update,
108
+ when_matched_deleted=when_matched_deleted,
109
+ when_not_matched_insert=when_not_matched_insert,
110
+ use_partition_pruning=use_partition_pruning,
111
+ partition_by=context.table_metadata.partition_by,
112
+ ignore_empty_df=ignore_empty_df,
113
+ )
114
+
115
+ if refresh_table:
116
+ delta_merge_writer.table_manager.refresh_table(table_identifier=context.table_metadata.identifier)
117
+
118
+ return context.from_existing()
@@ -0,0 +1,94 @@
1
+ from typing import Any
2
+
3
+ from ...integration.writer import FileWriter
4
+ from ...pipeline import PipelineAction, PipelineContext
5
+
6
+
7
+ class WriteFileAction(PipelineAction):
8
+ """This class implements a Write action for an ETL pipeline.
9
+
10
+ The WriteFileAction writes a Dataframe to a storage location defined in the
11
+ options using the [`FileWriter`][cloe_nessy.integration.writer.FileWriter] class.
12
+
13
+ Example:
14
+ ```yaml
15
+ Write to File:
16
+ action: WRITE_FILE
17
+ options:
18
+ path: "path/to/location"
19
+ format: "parquet"
20
+ partition_cols: ["date"]
21
+ mode: "append"
22
+ is_stream: False
23
+ options:
24
+ mergeSchema: "true"
25
+ ```
26
+ """
27
+
28
+ name: str = "WRITE_FILE"
29
+
30
+ def run(
31
+ self,
32
+ context: PipelineContext,
33
+ *,
34
+ path: str = "",
35
+ format: str = "delta",
36
+ partition_cols: list[str] | None = None,
37
+ mode: str = "append",
38
+ is_stream: bool = False,
39
+ options: dict[str, str] | None = None,
40
+ **_: Any,
41
+ ) -> PipelineContext:
42
+ """Writes a file to a location.
43
+
44
+ Args:
45
+ context: Context in which this Action is executed.
46
+ path: Location to write data to.
47
+ format: Format of files to write.
48
+ partition_cols: Columns to partition on. If None, the writer will try to get the partition
49
+ columns from the metadata. Default None.
50
+ mode: Specifies the behavior when data or table already exists.
51
+ is_stream: If True, use the `write_stream` method of the writer.
52
+ options: Additional options passed to the writer.
53
+
54
+ Raises:
55
+ ValueError: If no path is provided.
56
+ ValueError: If the table metadata is empty.
57
+
58
+ Returns:
59
+ Pipeline Context
60
+ """
61
+ if not path:
62
+ raise ValueError("No path provided. Please specify path to write data to.")
63
+ if not options:
64
+ options = {}
65
+
66
+ if context.data is None:
67
+ raise ValueError("Data context is required for the operation.")
68
+
69
+ if partition_cols is None:
70
+ if context.table_metadata is None:
71
+ partition_cols = []
72
+ else:
73
+ partition_cols = context.table_metadata.partition_by
74
+ writer = FileWriter()
75
+ if not is_stream:
76
+ writer.write(
77
+ data_frame=context.data,
78
+ location=path,
79
+ format=format,
80
+ partition_cols=partition_cols,
81
+ mode=mode,
82
+ options=options,
83
+ )
84
+ else:
85
+ writer.write_stream(
86
+ data_frame=context.data,
87
+ location=path,
88
+ format=format,
89
+ mode=mode,
90
+ partition_cols=partition_cols,
91
+ options=options,
92
+ )
93
+
94
+ return context.from_existing()
@@ -134,9 +134,51 @@ class Pipeline(LoggerMixin):
134
134
  self._console_logger.debug("No more steps to run")
135
135
  return False
136
136
 
137
- def run(self) -> None:
138
- """Runs the pipeline by executing each step in the correct order."""
137
+ def _trim_graph(self, graph, until):
138
+ """Trims the pipeline graph to only include steps up to the specified 'until' step (excluding).
139
+
140
+ This method first verifies that the given step exists in the graph. It then finds all ancestors
141
+ (i.e., steps that precede the 'until' step) and creates a subgraph consisting solely of those steps.
142
+
143
+ Args:
144
+ graph: The complete directed acyclic graph representing the pipeline.
145
+ until: The identifier of the step up to which the graph should be trimmed.
146
+
147
+ Returns:
148
+ A subgraph containing only the steps leading to (and including) the 'until' step.
149
+
150
+ Raises:
151
+ ValueError: If the specified 'until' step is not found in the graph.
152
+ """
153
+ if until not in graph.nodes:
154
+ raise ValueError(f"Step '{until}' not found in the pipeline.")
155
+
156
+ predecessors = set(nx.ancestors(graph, until))
157
+ predecessors.add(until)
158
+
159
+ trimmed_graph = graph.subgraph(predecessors).copy()
160
+ return trimmed_graph
161
+
162
+ def run(self, until: str | None = None) -> None:
163
+ """Executes the pipeline steps in the correct order based on dependencies.
164
+
165
+ This method creates a directed acyclic graph (DAG) of the pipeline steps and, if specified, trims
166
+ the graph to only include steps up to the given 'until' step (excluding: the step specified as 'until' will not be executed). It then concurrently executes steps
167
+ with no pending dependencies using a ThreadPoolExecutor, ensuring that all steps are run in order.
168
+ If a cyclic dependency is detected, or if any step fails during execution, the method raises an error.
169
+
170
+ Args:
171
+ until: Optional; the identifier of the step up to which the pipeline should be executed.
172
+
173
+ Raises:
174
+ RuntimeError: If a cyclic dependency is detected.
175
+ Exception: Propagates any error raised during the execution of a step.
176
+ """
139
177
  g = self._create_graph()
178
+
179
+ if until is not None:
180
+ g = self._trim_graph(g, until)
181
+
140
182
  remaining_steps = list(g.nodes())
141
183
  self._console_logger.info(f"Pipeline [' {self.name} '] started with {len(remaining_steps)} steps.")
142
184
 
File without changes
File without changes
File without changes
File without changes
File without changes
cloe_nessy/py.typed CHANGED
File without changes
File without changes
@@ -1,3 +1,5 @@
1
+ import json
2
+ import os
1
3
  from enum import Enum
2
4
  from typing import Any
3
5
 
@@ -16,12 +18,14 @@ class SessionManager:
16
18
  - FABRIC_UI: Represents the Fabric user interface.
17
19
  - DATABRICKS_CONNECT: Represents the Databricks Connect environment.
18
20
  - OTHER_REMOTE_SPARK: Represents other remote Spark environments, such as used in tests.
21
+ - STANDALONE_SPARK: Represents a standalone Spark cluster environment.
19
22
  """
20
23
 
21
24
  DATABRICKS_UI = "databricks_ui"
22
25
  FABRIC_UI = "fabric_ui"
23
26
  DATABRICKS_CONNECT = "databricks_connect"
24
27
  OTHER_REMOTE_SPARK = "other_remote_spark"
28
+ STANDALONE_SPARK = "standalone_spark"
25
29
 
26
30
  _spark: SparkSession | None = None
27
31
  _utils = None
@@ -52,6 +56,15 @@ class SessionManager:
52
56
 
53
57
  builder = cls.get_spark_builder()
54
58
 
59
+ # Check if NESSY_SPARK_CONFIG environment variable is set and load it as config
60
+ nessy_spark_config = os.getenv("NESSY_SPARK_CONFIG")
61
+ if nessy_spark_config:
62
+ try:
63
+ # Parse the JSON configuration from the environment variable
64
+ config = json.loads(nessy_spark_config)
65
+ except json.JSONDecodeError as e:
66
+ raise ValueError(f"Invalid JSON in NESSY_SPARK_CONFIG: {e}") from e
67
+
55
68
  if config:
56
69
  for key, value in config.items():
57
70
  builder.config(key, value) # type: ignore
@@ -84,6 +97,7 @@ class SessionManager:
84
97
  cls.Environment.DATABRICKS_UI: cls._get_dbutils,
85
98
  cls.Environment.DATABRICKS_CONNECT: cls._get_dbutils,
86
99
  cls.Environment.OTHER_REMOTE_SPARK: cls._get_dbutils,
100
+ cls.Environment.STANDALONE_SPARK: cls._get_localsparkutils,
87
101
  cls.Environment.FABRIC_UI: cls._get_mssparkutils,
88
102
  }
89
103
 
@@ -112,6 +126,10 @@ class SessionManager:
112
126
 
113
127
  cls._utils = mssparkutils
114
128
 
129
+ @classmethod
130
+ def _get_localsparkutils(cls):
131
+ return None
132
+
115
133
  @classmethod
116
134
  def _detect_env(cls) -> Environment | None:
117
135
  """Detects the current execution environment for Spark.
@@ -169,6 +187,14 @@ class SessionManager:
169
187
  except ImportError:
170
188
  pass
171
189
 
190
+ try:
191
+ from pyspark.sql import SparkSession # noqa: F401
192
+
193
+ cls._env = cls.Environment.STANDALONE_SPARK
194
+ return cls._env
195
+ except ImportError:
196
+ pass
197
+
172
198
  raise RuntimeError("Cannot detect environment.")
173
199
 
174
200
  @classmethod
@@ -180,6 +206,7 @@ class SessionManager:
180
206
  cls.Environment.FABRIC_UI: SparkSession.builder,
181
207
  cls.Environment.DATABRICKS_CONNECT: cls._get_databricks_connect_builder,
182
208
  cls.Environment.OTHER_REMOTE_SPARK: cls._get_databricks_connect_builder,
209
+ cls.Environment.STANDALONE_SPARK: SparkSession.builder,
183
210
  }
184
211
  builder = builders.get(cls._env)
185
212
  if builder is None:
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.4
2
+ Name: cloe-nessy
3
+ Version: 0.3.9
4
+ Summary: Your friendly datalake monster.
5
+ Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
6
+ License: MIT
7
+ Project-URL: homepage, https://initions.com/
8
+ Classifier: Development Status :: 5 - Production/Stable
9
+ Classifier: Environment :: Console
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Topic :: Database
15
+ Requires-Python: <3.12,>=3.11
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: pydantic<3.0.0,>=2.7.2
18
+ Requires-Dist: pyyaml<7.0.0,>=6.0.1
19
+ Requires-Dist: types-pyyaml<7.0.0.0,>=6.0.12.20240311
20
+ Requires-Dist: jinja2<4.0.0,>=3.1.4
21
+ Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
22
+ Requires-Dist: openpyxl<4.0.0,>=3.1.5
23
+ Requires-Dist: requests<3.0.0,>=2.32.3
24
+ Requires-Dist: types-requests<3.0.0.0,>=2.32.0.20240712
25
+ Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
26
+ Requires-Dist: azure-identity<2.0.0,>=1.19.0
27
+ Requires-Dist: httpx<1.0.0,>=0.27.2
28
+ Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
29
+ Requires-Dist: networkx<4.0,>=3.3
30
+ Requires-Dist: matplotlib<4.0.0,>=3.9.2
31
+ Requires-Dist: types-networkx<4.0.0.0,>=3.2.1.20240820
32
+ Requires-Dist: fsspec<2025.6.0,>=2025.5.1
33
+ Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.7
34
+
35
+ # cloe-nessy
36
+
37
+ [![Copier](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/copier-org/copier/master/img/badge/badge-grayscale-inverted-border-orange.json)](https://github.com/copier-org/copier)
38
+ [![python](https://img.shields.io/badge/Python-3.11-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org)
39
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
40
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v0.json)](https://github.com/charliermarsh/ruff)
41
+ [![Checked with mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](https://mypy-lang.org/)
42
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
43
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
44
+
45
+ Owner: initions
46
+
47
+ Nessy is diving deep into Data, unleashing insights with ease.
48
+
49
+ ## Introduction
50
+
51
+ "Nessy" is a comprehensive Python datalake framework that provides a seamless,
52
+ efficient, and user-friendly platform for managing and analyzing datasets using
53
+ pyspark. Its advanced features allow operations on each level of an
54
+ Extract-Transform-Load (ETL) Workflow.
55
+
56
+ ## Contributing
57
+
58
+ When you are contributing, please refer to our Contribution Guide in the *nessy*
59
+ Docs
60
+ [here](https://white-rock-0cabbc003.1.azurestaticapps.net/tool_docs/nessy/Developer-Guide/)!
61
+
62
+ ## Usage
63
+
64
+ Please find the User Guide
65
+ [here](https://white-rock-0cabbc003.1.azurestaticapps.net/tool_docs/nessy/User-Guide/)!
66
+
67
+ ## Contact
68
+
69
+ Please reach out to the *nessy* Team for any questions around this package and
70
+ repository.
@@ -18,38 +18,51 @@ cloe_nessy/file_utilities/strategies/onelake_strategy.py,sha256=RnQjWtWIFzFj-zPq
18
18
  cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=urayKfOUpSaXKgTs1KVK0TS7FWVrJ3k4OLKh35sCxAU,3194
19
19
  cloe_nessy/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  cloe_nessy/integration/reader/__init__.py,sha256=J5vlORqHLBpHEvzIwfIjzN5xEdOat-8jlmdLcGj8nsA,239
21
- cloe_nessy/integration/reader/api_reader.py,sha256=j3Z5O1oH-Zc43TyA_aYtnDNYC9xFMxMqDsRQWtEZGD8,5636
22
- cloe_nessy/integration/reader/catalog_reader.py,sha256=tGK-Y0jZQGOrF9eZUzSr7ils-L58uex6qH9PZ81ZLy8,1835
23
- cloe_nessy/integration/reader/excel_reader.py,sha256=4kifpIakHpGmap0-P0SUgjJoQdY-eeiZBIDrQp87wK8,8012
21
+ cloe_nessy/integration/reader/api_reader.py,sha256=3Mf-txOTJ1dXCzdNtRTLC8UKftKms4NxOoLVgzcc2eo,5691
22
+ cloe_nessy/integration/reader/catalog_reader.py,sha256=lwDeWBVXfFh75XknPawetL9ZBtqS-Oss5rNzbrEeIQg,2070
23
+ cloe_nessy/integration/reader/excel_reader.py,sha256=8KCqKBYFE6RGCiahJimQOAtbYZzaUzlnoslW9yca5P8,8035
24
24
  cloe_nessy/integration/reader/exceptions.py,sha256=_A9jFpe_RIDZCGY76qzjic9bsshxns6yXPSl141dq1c,203
25
- cloe_nessy/integration/reader/file_reader.py,sha256=1os8pZIXAGTJBZjGREmHOTlZeabbikC7sDv5xn3bIjE,3950
25
+ cloe_nessy/integration/reader/file_reader.py,sha256=3DcZhyyL-Cf_R7Px1UDHJwpO8Un31dWey2Q-f4DtWfY,6879
26
26
  cloe_nessy/integration/reader/reader.py,sha256=e2KVPePQme8SBQJEbL-3zpGasOgTiEvKFTslow2wGPw,1034
27
- cloe_nessy/integration/writer/__init__.py,sha256=NIh0t1RYlG3J1Y5_CvnR36N9tISmcElD5Tq06ksmqoA,71
27
+ cloe_nessy/integration/writer/__init__.py,sha256=3yzCAGiWZdQWtsbzlTih01sxVTJV2DDYwvl34lEAUlE,243
28
28
  cloe_nessy/integration/writer/catalog_writer.py,sha256=Gb-hMdADgO_uUJ7mZPHBYyNme2qXsdFFnzwo7GcShHM,2192
29
+ cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70To4L6Q182pXx2HRM,5454
30
+ cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
31
+ cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
32
+ cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=TbpW-j87_H9dcUza34uR6VWslJez406y3_5N1ip0SnM,4740
33
+ cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=no2GOLqMAJd0fEy2mqMevMj_CvutcJPRmXJC2tD4icA,10112
34
+ cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=kiacqQ2FYQSzakJqZ9-ZHH3os4X7--QuER_2xx9y21k,971
35
+ cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=upUtDZMzwYFU0kzmkelVgkpFToXkrypcR3h_jvGjz14,8596
36
+ cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
29
37
  cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZumY,65
30
38
  cloe_nessy/logging/logger_mixin.py,sha256=9iy7BF6drYme-f7Rrt_imbVBRgVqQ89xjcP1X5aMtfY,7467
31
- cloe_nessy/models/__init__.py,sha256=_JPN_R5-QDfjYzvrvZDdeOezl0C-JTG-Rk4S1VE5vJM,242
39
+ cloe_nessy/models/__init__.py,sha256=-FmWEJ1Oq1njSopjc0R7GmT64mLSmALkm8PkHNzy9Y8,327
40
+ cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,177
32
41
  cloe_nessy/models/column.py,sha256=53fBwRnino72XKACsHZpN9QfCBqqSXyKLHZlM0huumg,1988
33
42
  cloe_nessy/models/constraint.py,sha256=hsFlhn4n928z81O3dl3v5bMetewPWzMjkJK3_4kASSM,178
34
43
  cloe_nessy/models/foreign_key.py,sha256=DwRVHs9sShqqPV-NL7ow_3AmPPWX0Od26yZn_I565pU,1001
35
- cloe_nessy/models/schema.py,sha256=8bc1fakLUWZzkVZ_Zn5iWMUvfDNxnuoToNE4kmqtBJo,2764
36
- cloe_nessy/models/table.py,sha256=lshPBA3D6vA1samtC7WmlfZZWrMUrOLna89rs8lhGCI,10472
44
+ cloe_nessy/models/schema.py,sha256=yUrjjEhAH5zbCymE67Az_jPnVB8hGO-_UNfqzeZCD_Y,3376
45
+ cloe_nessy/models/table.py,sha256=V1gsne79I-DQ47jTHNhGjWsg4i8io-VC9rv0PjlXDNQ,11848
37
46
  cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
47
+ cloe_nessy/models/volume.py,sha256=7_lH4X-Au8jYNRVFQ5F2x-fRy2J8Z5-cLTn3Z3mpLQs,2197
48
+ cloe_nessy/models/adapter/__init__.py,sha256=m36W_mqwB3dCYnCIt0fLOSHS4E1VU8FRGoaum4Gf95o,90
49
+ cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=a-14Ys-AevVYQd0xeJU1syLxjT5Wzo4uog1hFSEs76M,12651
38
50
  cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
51
  cloe_nessy/models/mixins/read_instance_mixin.py,sha256=j5Y4aNWOh1jlskEaxNooZFJgPyxRmik00gAVLJnAaRs,4507
40
52
  cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
41
- cloe_nessy/object_manager/__init__.py,sha256=tt_sBt8eC-jCP8FShqRPKJvGNVIPeb-htA7NoUivTjY,68
42
- cloe_nessy/object_manager/table_manager.py,sha256=K6OGCNNDt1ceLA0MkwtyW6AR5tYIW3tfqF3ZcvHlcUw,2717
53
+ cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
54
+ cloe_nessy/object_manager/table_manager.py,sha256=oYcYiZR0-JyoadcCcDelxfFb-ATeKDIZerYaZc-moiI,12634
55
+ cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
43
56
  cloe_nessy/pipeline/__init__.py,sha256=sespmJ5JsgyiFyZiedTiL2kg--zGIX7cjTYsD5vemEg,325
44
- cloe_nessy/pipeline/pipeline.py,sha256=oQ1PwYkOSGHOfgbmImy7IbB5Ma-NKHN_CMXq1FepTc4,9206
57
+ cloe_nessy/pipeline/pipeline.py,sha256=-1tJVs9rZf8CcwieH4IP7mqJZ6mL7bQUZ56TNKt8eO8,11154
45
58
  cloe_nessy/pipeline/pipeline_action.py,sha256=S7IVFdmG12fRBzHuE_DiWn7qlMtApz6IloVd2Fj31Sg,1944
46
59
  cloe_nessy/pipeline/pipeline_config.py,sha256=BN3ZSbr6bC-X9edoh-n5vRfPHFMbgtAU7mQ3dBrcWO8,3131
47
60
  cloe_nessy/pipeline/pipeline_context.py,sha256=csElDc6BsynDUtRXgQOSCH7ONc_b-ag0YEg0zlQTz58,1874
48
61
  cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=c_nAsgw81QYBM9AFiTxGgqRhNXABkDKplbeoCJPtbpE,6434
49
62
  cloe_nessy/pipeline/pipeline_step.py,sha256=UlnmpS6gm_dZ7m9dD1mZvye7mvUF_DA7HjOZo0oGYDU,1977
50
- cloe_nessy/pipeline/actions/__init__.py,sha256=LwKctXy4Jun52BnCVGvWa8nnKVjTSov4GT58j6Zy8zg,2273
63
+ cloe_nessy/pipeline/actions/__init__.py,sha256=9gjSQKLGrPcaYaJrTYZde8d4yNrN1SoXN_DDHq5KrvY,2600
51
64
  cloe_nessy/pipeline/actions/read_api.py,sha256=RBv5XeHtjTXuCP09Fqo6JNx6iIhQQI-nuAHCuSaGs2s,7778
52
- cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=-k2wezkv8bE_xwoW7WM1ORhrCXQagKTUuXkhI2ZEROs,2783
65
+ cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=oXbqbc6BfR82dSIGclwzWiTN8EVmpFjNIYLKm4qOU50,2754
53
66
  cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSiivbhWMglyBtkE,7961
54
67
  cloe_nessy/pipeline/actions/read_files.py,sha256=N9bFgtG1tovhp2JayxE5YiN9PiO2lgG2-6h_Y6tD2eU,5220
55
68
  cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hLOvHjhdk5zg1wVHE60m9k,2295
@@ -62,6 +75,7 @@ cloe_nessy/pipeline/actions/transform_distinct.py,sha256=c7aBxANyqT4aKhm0cSELDtD
62
75
  cloe_nessy/pipeline/actions/transform_filter.py,sha256=Nz_ggRfKIcNzYFfFOsgq1QeatjdEis0up4I7cOWBdyo,1446
63
76
  cloe_nessy/pipeline/actions/transform_generic_sql.py,sha256=_naWfmPdYAUKjPNeHu5qJAohOL7DHCSYz_kwoeRv3OI,2741
64
77
  cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=KUHeeP-RIDi34dpbsPEJkzea5zFJA6MuyjNpOsFud9o,4045
78
+ cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=heRjBA-Gfu-nmNHOjTYlipEpKY8oNPAHAY40vjJk3aI,8383
65
79
  cloe_nessy/pipeline/actions/transform_join.py,sha256=e_tvMk8YJTAWcUK_EmOgNt0s31ICZoMX_MKOTWx4lBY,3645
66
80
  cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=petF7pnNq1EKc8MqVdG0weFALAHNILSe_eAu4Z5XxIo,4833
67
81
  cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=4zJcPCONMU4C67qeuzsrX3AORRRHoq_selUI7FJyeg0,1952
@@ -69,13 +83,16 @@ cloe_nessy/pipeline/actions/transform_replace_values.py,sha256=1OPHTrjcphfyGepcO
69
83
  cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm3BG9BX4_qUDJMbpj1DsKPY046w,4574
70
84
  cloe_nessy/pipeline/actions/transform_union.py,sha256=s81Vge0AbYPc7VkskCYfOQ_LEjqcmfNFyDkytfjcZyo,2720
71
85
  cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=j7gRuG3Fedh8JgevIFBbHKock3laJVq4l6Mx3CGU5eo,2676
86
+ cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPexeK0XNXUfQscCwhXHxbKA,2498
87
+ cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=Hir7QZZZJ9hmQZXiJ9iz6u06OCmcHFpyKFVB_I1saSM,5043
88
+ cloe_nessy/pipeline/actions/write_file.py,sha256=H8LRst045yij-8XJ5pRB9m5d1lZpZjFa0WSVdSFesPo,2984
72
89
  cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
73
- cloe_nessy/session/session_manager.py,sha256=PK7awMc6fmot7f9FMmvIUbIzKFgjcy2o2bZS9kjVs10,6733
90
+ cloe_nessy/session/session_manager.py,sha256=f4OeeyGD3becDQGkdDbck3jVH9ulOCBWjW6Jaj_MIrc,7765
74
91
  cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
75
92
  cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
76
93
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
94
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
78
- cloe_nessy-0.3.5.dist-info/METADATA,sha256=UUx3aIUgvCLn7j3H4DbCL1k9-47HPKaANiMQsUj66wo,1837
79
- cloe_nessy-0.3.5.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
80
- cloe_nessy-0.3.5.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
81
- cloe_nessy-0.3.5.dist-info/RECORD,,
95
+ cloe_nessy-0.3.9.dist-info/METADATA,sha256=TUUpWkn2wjKwCwkLdCh0gkmXlebXo6XEEjsvzUs74n8,3161
96
+ cloe_nessy-0.3.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
97
+ cloe_nessy-0.3.9.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
98
+ cloe_nessy-0.3.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5