cloe-nessy 0.3.16.6__py3-none-any.whl → 0.3.16.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. cloe_nessy/integration/delta_loader/__init__.py +14 -0
  2. cloe_nessy/integration/delta_loader/delta_load_options.py +37 -0
  3. cloe_nessy/integration/delta_loader/delta_loader.py +165 -0
  4. cloe_nessy/integration/delta_loader/delta_loader_factory.py +53 -0
  5. cloe_nessy/integration/delta_loader/delta_loader_metadata_table.py +68 -0
  6. cloe_nessy/integration/delta_loader/strategies/__init__.py +9 -0
  7. cloe_nessy/integration/delta_loader/strategies/delta_cdf_loader.py +361 -0
  8. cloe_nessy/integration/delta_loader/strategies/delta_timestamp_loader.py +163 -0
  9. cloe_nessy/integration/reader/catalog_reader.py +33 -6
  10. cloe_nessy/integration/reader/file_reader.py +23 -0
  11. cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py +1 -1
  12. cloe_nessy/logging/logger_mixin.py +0 -1
  13. cloe_nessy/models/column.py +1 -1
  14. cloe_nessy/models/table.py +4 -3
  15. cloe_nessy/pipeline/actions/__init__.py +2 -0
  16. cloe_nessy/pipeline/actions/read_catalog_table.py +36 -3
  17. cloe_nessy/pipeline/actions/read_files.py +45 -3
  18. cloe_nessy/pipeline/actions/transform_deduplication.py +7 -12
  19. cloe_nessy/pipeline/actions/transform_hash_columns.py +7 -7
  20. cloe_nessy/pipeline/actions/write_catalog_table.py +5 -0
  21. cloe_nessy/pipeline/actions/write_delta_append.py +15 -0
  22. cloe_nessy/pipeline/actions/write_delta_merge.py +23 -0
  23. cloe_nessy/pipeline/actions/write_file.py +6 -1
  24. cloe_nessy/pipeline/utils/__init__.py +5 -0
  25. cloe_nessy/pipeline/utils/delta_load_utils.py +36 -0
  26. cloe_nessy/utils/column_names.py +9 -0
  27. {cloe_nessy-0.3.16.6.dist-info → cloe_nessy-0.3.16.7.dist-info}/METADATA +1 -1
  28. {cloe_nessy-0.3.16.6.dist-info → cloe_nessy-0.3.16.7.dist-info}/RECORD +30 -19
  29. {cloe_nessy-0.3.16.6.dist-info → cloe_nessy-0.3.16.7.dist-info}/WHEEL +0 -0
  30. {cloe_nessy-0.3.16.6.dist-info → cloe_nessy-0.3.16.7.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,6 @@ from pathlib import Path
2
2
  from typing import Any, Self
3
3
 
4
4
  import yaml
5
- import yaml.scanner
6
5
  from jinja2 import TemplateNotFound
7
6
  from pydantic import (
8
7
  Field,
@@ -11,6 +10,8 @@ from pydantic import (
11
10
  field_validator,
12
11
  model_validator,
13
12
  )
13
+ from yaml.parser import ParserError
14
+ from yaml.scanner import ScannerError
14
15
 
15
16
  from ..logging import LoggerMixin
16
17
  from ..utils.file_and_directory_handler import process_path
@@ -225,8 +226,8 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
225
226
  errors += sub_errors
226
227
  except (
227
228
  ValidationError,
228
- yaml.parser.ParserError,
229
- yaml.scanner.ScannerError,
229
+ ParserError,
230
+ ScannerError,
230
231
  ) as e:
231
232
  instance = None
232
233
  errors.append(e)
@@ -10,6 +10,7 @@ from .transform_change_datatype import TransformChangeDatatypeAction
10
10
  from .transform_clean_column_names import TransformCleanColumnNamesAction
11
11
  from .transform_concat_columns import TransformConcatColumnsAction
12
12
  from .transform_decode import TransformDecodeAction
13
+ from .transform_deduplication import TransformDeduplication
13
14
  from .transform_distinct import TransformDistinctAction
14
15
  from .transform_filter import TransformFilterAction
15
16
  from .transform_generic_sql import TransformSqlAction
@@ -46,6 +47,7 @@ __all__ = [
46
47
  "TransformCleanColumnNamesAction",
47
48
  "TransformConcatColumnsAction",
48
49
  "TransformDecodeAction",
50
+ "TransformDeduplication",
49
51
  "TransformDistinctAction",
50
52
  "TransformSqlAction",
51
53
  "TransformGroupAggregate",
@@ -1,8 +1,10 @@
1
1
  from typing import Any
2
2
 
3
+ from ...integration.delta_loader import DeltaLoadOptions
3
4
  from ...integration.reader import CatalogReader
4
5
  from ..pipeline_action import PipelineAction
5
6
  from ..pipeline_context import PipelineContext
7
+ from ..utils import set_delta_load_info
6
8
 
7
9
 
8
10
  class ReadCatalogTableAction(PipelineAction):
@@ -21,6 +23,12 @@ class ReadCatalogTableAction(PipelineAction):
21
23
  options:
22
24
  table_identifier: my_catalog.business_schema.sales_table
23
25
  options: <options for the CatalogReader read method>
26
+ delta_load_options:
27
+ strategy: CDF
28
+ delta_load_identifier: my_delta_load_id
29
+ strategy_options:
30
+ deduplication_columns: ["id"]
31
+ enable_full_load: true
24
32
  ```
25
33
  """
26
34
 
@@ -32,6 +40,7 @@ class ReadCatalogTableAction(PipelineAction):
32
40
  *,
33
41
  table_identifier: str | None = None,
34
42
  options: dict[str, str] | None = None,
43
+ delta_load_options: dict[Any, Any] | DeltaLoadOptions | None = None,
35
44
  **_: Any, # define kwargs to match the base class signature
36
45
  ) -> PipelineContext:
37
46
  """Reads a table from Unity Catalog using a specified table identifier and optional reader configurations.
@@ -45,6 +54,8 @@ class ReadCatalogTableAction(PipelineAction):
45
54
  options: A dictionary of options for customizing
46
55
  the [`CatalogReader`][cloe_nessy.integration.reader.catalog_reader]
47
56
  behavior, such as filters or reading modes. Defaults to None.
57
+ delta_load_options: Options for delta loading, if applicable.
58
+ Configures the [`DeltaLoader`][cloe_nessy.integration.delta_loader].
48
59
 
49
60
  Raises:
50
61
  ValueError: If neither `table_identifier` nor `table_metadata.identifier` in the `context` is provided.
@@ -53,13 +64,35 @@ class ReadCatalogTableAction(PipelineAction):
53
64
  An updated pipeline context containing the data read from the catalog table as a DataFrame.
54
65
  """
55
66
  if not options:
56
- options = dict()
67
+ options = {}
68
+
69
+ if not delta_load_options:
70
+ delta_load_options = {}
57
71
 
58
72
  if (table_metadata := context.table_metadata) and table_identifier is None:
59
73
  table_identifier = table_metadata.identifier
60
74
  if table_identifier is None:
61
75
  raise ValueError("Table name must be specified or a valid Table object with identifier must be set.")
62
76
 
77
+ if isinstance(delta_load_options, dict):
78
+ delta_options_dict = delta_load_options
79
+ if delta_load_options:
80
+ delta_load_options = DeltaLoadOptions(**delta_load_options)
81
+ else:
82
+ delta_load_options = None
83
+ else:
84
+ delta_options_dict = delta_load_options.model_dump() if delta_load_options else {}
85
+
86
+ runtime_info = set_delta_load_info(
87
+ table_identifier=table_identifier,
88
+ delta_load_options=delta_options_dict,
89
+ runtime_info=context.runtime_info or {},
90
+ )
91
+
63
92
  table_reader = CatalogReader()
64
- df = table_reader.read(table_identifier=table_identifier, options=options)
65
- return context.from_existing(data=df)
93
+ df = table_reader.read(
94
+ table_identifier=table_identifier,
95
+ options=options,
96
+ delta_load_options=delta_load_options,
97
+ )
98
+ return context.from_existing(data=df, runtime_info=runtime_info)
@@ -1,8 +1,10 @@
1
1
  from typing import Any
2
2
 
3
+ from ...integration.delta_loader import DeltaLoadOptions
3
4
  from ...integration.reader import FileReader
4
5
  from ..pipeline_action import PipelineAction
5
6
  from ..pipeline_context import PipelineContext
7
+ from ..utils import set_delta_load_info
6
8
 
7
9
 
8
10
  class ReadFilesAction(PipelineAction):
@@ -55,6 +57,24 @@ class ReadFilesAction(PipelineAction):
55
57
  Use the `extension` option to specify the extension of the files
56
58
  to read. Additionally, use the `spark_format` option to specify
57
59
  the format with which to read the files.
60
+
61
+ === "Read Delta Lake table with delta loading"
62
+ ```yaml
63
+ Read Delta Files:
64
+ action: READ_FILES
65
+ options:
66
+ location: /path/to/delta/table
67
+ spark_format: delta
68
+ delta_load_options:
69
+ strategy: CDF
70
+ delta_load_identifier: my_delta_files_load
71
+ strategy_options:
72
+ deduplication_columns: ["id"]
73
+ enable_full_load: false
74
+ ```
75
+ !!! note "Delta Loading for Files"
76
+ Use `delta_load_options` when reading Delta Lake tables to enable
77
+ incremental loading. This works with both CDF and timestamp strategies.
58
78
  """
59
79
 
60
80
  name: str = "READ_FILES"
@@ -70,6 +90,7 @@ class ReadFilesAction(PipelineAction):
70
90
  schema: str | None = None,
71
91
  add_metadata_column: bool = True,
72
92
  options: dict[str, str] | None = None,
93
+ delta_load_options: dict[Any, Any] | DeltaLoadOptions | None = None,
73
94
  **_: Any,
74
95
  ) -> PipelineContext:
75
96
  """Reads files from a specified location.
@@ -87,6 +108,8 @@ class ReadFilesAction(PipelineAction):
87
108
  add_metadata_column: Whether to include the `__metadata` column with
88
109
  file metadata in the DataFrame.
89
110
  options: Additional options passed to the reader.
111
+ delta_load_options: Options for delta loading, if applicable. When provided
112
+ for Delta format files, enables incremental loading using delta loader strategies.
90
113
 
91
114
  Raises:
92
115
  ValueError: If neither `extension` nor `spark_format` are provided, or if
@@ -105,6 +128,25 @@ class ReadFilesAction(PipelineAction):
105
128
  if (metadata := context.table_metadata) and schema is None:
106
129
  schema = metadata.schema
107
130
 
131
+ # Convert dict to DeltaLoadOptions if needed
132
+ if isinstance(delta_load_options, dict):
133
+ delta_load_options = DeltaLoadOptions(**delta_load_options)
134
+
135
+ # Set up runtime info for delta loading
136
+ runtime_info = context.runtime_info or {}
137
+ if delta_load_options:
138
+ # Convert DeltaLoadOptions to dict for runtime info storage
139
+ delta_options_dict = (
140
+ delta_load_options.model_dump()
141
+ if isinstance(delta_load_options, DeltaLoadOptions)
142
+ else delta_load_options
143
+ )
144
+ runtime_info = set_delta_load_info(
145
+ table_identifier=location, # Use location as identifier for file-based delta loading
146
+ delta_load_options=delta_options_dict,
147
+ runtime_info=runtime_info,
148
+ )
149
+
108
150
  file_reader = FileReader()
109
151
  df = file_reader.read(
110
152
  location=location,
@@ -114,11 +156,11 @@ class ReadFilesAction(PipelineAction):
114
156
  search_subdirs=search_subdirs,
115
157
  options=options,
116
158
  add_metadata_column=add_metadata_column,
159
+ delta_load_options=delta_load_options,
117
160
  )
118
161
 
119
- runtime_info = context.runtime_info
120
-
121
- if add_metadata_column:
162
+ # Only process metadata column if it exists and wasn't using delta loading
163
+ if add_metadata_column and "__metadata" in df.columns:
122
164
  read_files_list = [x.file_path for x in df.select("__metadata.file_path").drop_duplicates().collect()]
123
165
  if runtime_info is None:
124
166
  runtime_info = {"read_files": read_files_list}
@@ -1,11 +1,10 @@
1
- import random
2
- import string
3
1
  from typing import Any
4
2
 
5
3
  import pyspark.sql.functions as F
6
4
  import pyspark.sql.types as T
7
5
  from pyspark.sql import Window
8
6
 
7
+ from ...utils.column_names import generate_unique_column_name
9
8
  from ..pipeline_action import PipelineAction
10
9
  from ..pipeline_context import PipelineContext
11
10
 
@@ -105,18 +104,14 @@ class TransformDeduplication(PipelineAction):
105
104
  else:
106
105
  order_by_list = [F.col(col_name).asc() for col_name in order_by_columns]
107
106
 
108
- # create the window specification
109
- window_specification = Window.partitionBy(key_columns).orderBy(order_by_list)
110
-
111
- # generate a column name that is not in the input dataframe
112
- def generate_random_string(length):
113
- return "".join(random.choice(string.ascii_uppercase) for _ in range(length))
107
+ window_specification = (
108
+ Window.partitionBy(key_columns)
109
+ .orderBy(order_by_list)
110
+ .rowsBetween(Window.unboundedPreceding, Window.currentRow)
111
+ )
114
112
 
115
- row_number_col_name = generate_random_string(20)
116
- while row_number_col_name in context.data.columns:
117
- row_number_col_name = generate_random_string(20)
113
+ row_number_col_name = generate_unique_column_name(existing_columns=set(context.data.columns), prefix="row_num")
118
114
 
119
- # drop the duplicates
120
115
  df = (
121
116
  context.data.withColumn(row_number_col_name, F.row_number().over(window_specification))
122
117
  .filter(F.col(row_number_col_name) == 1)
@@ -132,13 +132,13 @@ class TransformHashColumnsAction(PipelineAction):
132
132
  action: TRANSFORM_HASH_COLUMNS
133
133
  options:
134
134
  hash_config:
135
- - hashed_column1:
136
- columns: ["column1", "column2"]
137
- algorithm: "sha2"
138
- bits: 224
139
- - hashed_column2:
140
- columns: ["column1"]
141
- algorithm: "crc32"
135
+ hashed_column1:
136
+ columns: ["column1", "column2"]
137
+ algorithm: "sha2"
138
+ bits: 224
139
+ hashed_column2:
140
+ columns: ["column1"]
141
+ algorithm: "crc32"
142
142
  ```
143
143
 
144
144
  Given a DataFrame `df` with the following structure:
@@ -1,5 +1,6 @@
1
1
  from typing import Any
2
2
 
3
+ from ...integration.delta_loader import consume_delta_load
3
4
  from ...integration.writer import CatalogWriter
4
5
  from ..pipeline_action import PipelineAction
5
6
  from ..pipeline_context import PipelineContext
@@ -63,6 +64,10 @@ class WriteCatalogTableAction(PipelineAction):
63
64
  if table_identifier is None:
64
65
  raise ValueError("Table name must be specified or a valid Table object with identifier must be set.")
65
66
 
67
+ runtime_info = getattr(context, "runtime_info", None)
68
+ if runtime_info and runtime_info.get("is_delta_load"):
69
+ consume_delta_load(runtime_info)
70
+
66
71
  writer = CatalogWriter()
67
72
  writer.write_table(
68
73
  df=context.data, # type: ignore
@@ -1,5 +1,6 @@
1
1
  from typing import Any
2
2
 
3
+ from ...integration.delta_loader import consume_delta_load
3
4
  from ...integration.writer import DeltaAppendWriter
4
5
  from ...models.adapter import UnityCatalogAdapter
5
6
  from ...pipeline import PipelineAction, PipelineContext
@@ -10,6 +11,15 @@ class WriteDeltaAppendAction(PipelineAction):
10
11
 
11
12
  The WriteDeltaAppendAction appends a Dataframe to Delta Table.
12
13
 
14
+ Example:
15
+ ```yaml
16
+ Write Delta Append:
17
+ action: WRITE_DELTA_APPEND
18
+ options:
19
+ table_identifier: my_catalog.my_schema.my_table
20
+ ignore_empty_df: false
21
+ ```
22
+
13
23
  Returns:
14
24
  None.
15
25
  """
@@ -66,4 +76,9 @@ class WriteDeltaAppendAction(PipelineAction):
66
76
  ignore_empty_df=ignore_empty_df,
67
77
  options=options,
68
78
  )
79
+
80
+ runtime_info = getattr(context, "runtime_info", None)
81
+ if runtime_info and runtime_info.get("is_delta_load"):
82
+ consume_delta_load(runtime_info)
83
+
69
84
  return context.from_existing()
@@ -1,5 +1,6 @@
1
1
  from typing import Any
2
2
 
3
+ from ...integration.delta_loader import consume_delta_load
3
4
  from ...integration.writer import DeltaMergeWriter
4
5
  from ...models.adapter import UnityCatalogAdapter
5
6
  from ...pipeline import PipelineAction, PipelineContext
@@ -10,6 +11,24 @@ class WriteDeltaMergeAction(PipelineAction):
10
11
 
11
12
  The MergeIntoDeltaAction merges a Dataframe to Delta Table.
12
13
 
14
+ Example:
15
+ ```yaml
16
+ Write Delta Merge:
17
+ action: WRITE_DELTA_MERGE
18
+ options:
19
+ table_identifier: my_catalog.my_schema.my_table
20
+ key_columns:
21
+ - id
22
+ - customer_id
23
+ cols_to_update:
24
+ - name
25
+ - email
26
+ - updated_at
27
+ when_matched_update: true
28
+ when_not_matched_insert: true
29
+ use_partition_pruning: true
30
+ ```
31
+
13
32
  Returns:
14
33
  None.
15
34
  """
@@ -112,6 +131,10 @@ class WriteDeltaMergeAction(PipelineAction):
112
131
  ignore_empty_df=ignore_empty_df,
113
132
  )
114
133
 
134
+ runtime_info = getattr(context, "runtime_info", None)
135
+ if runtime_info and runtime_info.get("is_delta_load"):
136
+ consume_delta_load(runtime_info)
137
+
115
138
  if refresh_table:
116
139
  delta_merge_writer.table_manager.refresh_table(table_identifier=context.table_metadata.identifier)
117
140
 
@@ -1,5 +1,6 @@
1
1
  from typing import Any
2
2
 
3
+ from ...integration.delta_loader import consume_delta_load
3
4
  from ...integration.writer import FileWriter
4
5
  from ...pipeline import PipelineAction, PipelineContext
5
6
 
@@ -21,7 +22,7 @@ class WriteFileAction(PipelineAction):
21
22
  mode: "append"
22
23
  is_stream: False
23
24
  options:
24
- mergeSchema: "true"
25
+ mergeSchema: true
25
26
  ```
26
27
  """
27
28
 
@@ -91,4 +92,8 @@ class WriteFileAction(PipelineAction):
91
92
  options=options,
92
93
  )
93
94
 
95
+ runtime_info = getattr(context, "runtime_info", None)
96
+ if runtime_info and runtime_info.get("is_delta_load"):
97
+ consume_delta_load(runtime_info)
98
+
94
99
  return context.from_existing()
@@ -0,0 +1,5 @@
1
+ """Pipeline utility modules."""
2
+
3
+ from .delta_load_utils import set_delta_load_info
4
+
5
+ __all__ = ["set_delta_load_info"]
@@ -0,0 +1,36 @@
1
+ """Utilities for managing delta load information in pipeline runtime context."""
2
+
3
+ from typing import Any
4
+
5
+
6
+ def set_delta_load_info(
7
+ table_identifier: str,
8
+ delta_load_options: dict[str, Any],
9
+ runtime_info: dict[str, Any],
10
+ ) -> dict[str, Any]:
11
+ """Update the runtime information dictionary with delta load options for a specific table.
12
+
13
+ If delta load options are provided, this function marks the runtime as a delta load and
14
+ stores the options under the given table identifier within the 'delta_load_options' key
15
+ of the runtime_info dictionary.
16
+
17
+ The method uses `setdefault("delta_load_options", {})` to ensure that the 'delta_load_options'
18
+ key exists in the runtime_info dictionary. If the key is not present, it initializes it with
19
+ an empty dictionary. This prevents overwriting existing delta load options and allows
20
+ multiple tables' options to be stored without losing previous entries.
21
+
22
+ Args:
23
+ table_identifier: The identifier for the table (can be table name or file path).
24
+ delta_load_options: Options specific to the delta load for the table.
25
+ runtime_info: The runtime information dictionary to update.
26
+
27
+ Returns:
28
+ The updated runtime information dictionary with delta load details.
29
+ """
30
+ if not delta_load_options:
31
+ return runtime_info
32
+
33
+ runtime_info["is_delta_load"] = True
34
+ runtime_info.setdefault("delta_load_options", {})[table_identifier] = delta_load_options
35
+
36
+ return runtime_info
@@ -0,0 +1,9 @@
1
+ import uuid
2
+
3
+
4
+ def generate_unique_column_name(existing_columns: set[str], prefix: str = "temp_col") -> str:
5
+ """Generate a unique column name that doesn't conflict with existing columns."""
6
+ base_name = f"{prefix}_{uuid.uuid4().hex[:8]}"
7
+ while base_name in existing_columns:
8
+ base_name = f"{prefix}_{uuid.uuid4().hex[:8]}"
9
+ return base_name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 0.3.16.6
3
+ Version: 0.3.16.7
4
4
  Summary: Your friendly datalake monster.
5
5
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
6
6
  License: MIT
@@ -17,12 +17,20 @@ cloe_nessy/file_utilities/strategies/local_strategy.py,sha256=6OcEjzLvRTBT8FKXhk
17
17
  cloe_nessy/file_utilities/strategies/onelake_strategy.py,sha256=RnQjWtWIFzFj-zPqzyZaPYIjtjXkgP-K7-VA8GhkNmg,1980
18
18
  cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=urayKfOUpSaXKgTs1KVK0TS7FWVrJ3k4OLKh35sCxAU,3194
19
19
  cloe_nessy/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ cloe_nessy/integration/delta_loader/__init__.py,sha256=ZdBDde1uPtTCL_KAhilVmtVmmGvH5dHb05QsOozkteE,438
21
+ cloe_nessy/integration/delta_loader/delta_load_options.py,sha256=bbPGhC0n8L6CmcmV91Xqq6fWRimxlUHUkr22uVqG0g4,1363
22
+ cloe_nessy/integration/delta_loader/delta_loader.py,sha256=D5oOvVLWRwl0z0iQScXVOapErAl6Z5Kt3qXedchgq0s,6878
23
+ cloe_nessy/integration/delta_loader/delta_loader_factory.py,sha256=vB1cL6-Nc3SkLH1xtazMbMF1MnNYq8-g3GHZzRE3QmE,2251
24
+ cloe_nessy/integration/delta_loader/delta_loader_metadata_table.py,sha256=G_EWUY76ZlbsPZB9LCGlOLVezk7DK6peYXEgt7-sTQE,1683
25
+ cloe_nessy/integration/delta_loader/strategies/__init__.py,sha256=1o5fRWenL5KnUg1hf7kmTuTpG9pbMxchiQTub52Qvwo,255
26
+ cloe_nessy/integration/delta_loader/strategies/delta_cdf_loader.py,sha256=FOOZqtMwp8_LoyG2ab2N19a074CFa2ArCEvNkl7wRWM,16682
27
+ cloe_nessy/integration/delta_loader/strategies/delta_timestamp_loader.py,sha256=YYFH0DkdRPvITUc1JMgkmgIHjwDyZDCjqvEk2qhBMfE,6185
20
28
  cloe_nessy/integration/reader/__init__.py,sha256=J5vlORqHLBpHEvzIwfIjzN5xEdOat-8jlmdLcGj8nsA,239
21
29
  cloe_nessy/integration/reader/api_reader.py,sha256=3Mf-txOTJ1dXCzdNtRTLC8UKftKms4NxOoLVgzcc2eo,5691
22
- cloe_nessy/integration/reader/catalog_reader.py,sha256=lwDeWBVXfFh75XknPawetL9ZBtqS-Oss5rNzbrEeIQg,2070
30
+ cloe_nessy/integration/reader/catalog_reader.py,sha256=7jFuqIPpuz03opULh2I0TCLPfW6AqkxjaW2kCc0oM1g,3292
23
31
  cloe_nessy/integration/reader/excel_reader.py,sha256=8KCqKBYFE6RGCiahJimQOAtbYZzaUzlnoslW9yca5P8,8035
24
32
  cloe_nessy/integration/reader/exceptions.py,sha256=_A9jFpe_RIDZCGY76qzjic9bsshxns6yXPSl141dq1c,203
25
- cloe_nessy/integration/reader/file_reader.py,sha256=3DcZhyyL-Cf_R7Px1UDHJwpO8Un31dWey2Q-f4DtWfY,6879
33
+ cloe_nessy/integration/reader/file_reader.py,sha256=Za_DZKUq1vATp8kIS8uY9IDHiaReZO0k80rrPHAhi5A,8132
26
34
  cloe_nessy/integration/reader/reader.py,sha256=e2KVPePQme8SBQJEbL-3zpGasOgTiEvKFTslow2wGPw,1034
27
35
  cloe_nessy/integration/writer/__init__.py,sha256=3yzCAGiWZdQWtsbzlTih01sxVTJV2DDYwvl34lEAUlE,243
28
36
  cloe_nessy/integration/writer/catalog_writer.py,sha256=Gb-hMdADgO_uUJ7mZPHBYyNme2qXsdFFnzwo7GcShHM,2192
@@ -31,18 +39,18 @@ cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuO
31
39
  cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
32
40
  cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=TbpW-j87_H9dcUza34uR6VWslJez406y3_5N1ip0SnM,4740
33
41
  cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=no2GOLqMAJd0fEy2mqMevMj_CvutcJPRmXJC2tD4icA,10112
34
- cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=kiacqQ2FYQSzakJqZ9-ZHH3os4X7--QuER_2xx9y21k,971
42
+ cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=m4YFY9_WgaOcnpBviVt3Km-w3wf3NF25wPS-n0NBGcE,970
35
43
  cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=upUtDZMzwYFU0kzmkelVgkpFToXkrypcR3h_jvGjz14,8596
36
44
  cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
37
45
  cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZumY,65
38
- cloe_nessy/logging/logger_mixin.py,sha256=9iy7BF6drYme-f7Rrt_imbVBRgVqQ89xjcP1X5aMtfY,7467
46
+ cloe_nessy/logging/logger_mixin.py,sha256=H8MyMEyb_kEDP0Ow5QStAFLuOkTIeUnneGaj916fKlU,7443
39
47
  cloe_nessy/models/__init__.py,sha256=-FmWEJ1Oq1njSopjc0R7GmT64mLSmALkm8PkHNzy9Y8,327
40
48
  cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,177
41
- cloe_nessy/models/column.py,sha256=53fBwRnino72XKACsHZpN9QfCBqqSXyKLHZlM0huumg,1988
49
+ cloe_nessy/models/column.py,sha256=t-MX9GMs7l5W0APvsUxiE1TI9SWkKdFKblmz24s4IHY,1995
42
50
  cloe_nessy/models/constraint.py,sha256=hsFlhn4n928z81O3dl3v5bMetewPWzMjkJK3_4kASSM,178
43
51
  cloe_nessy/models/foreign_key.py,sha256=DwRVHs9sShqqPV-NL7ow_3AmPPWX0Od26yZn_I565pU,1001
44
52
  cloe_nessy/models/schema.py,sha256=yUrjjEhAH5zbCymE67Az_jPnVB8hGO-_UNfqzeZCD_Y,3376
45
- cloe_nessy/models/table.py,sha256=O9vcJ1XBIb6kA-NAI3SNpB5b7MGDo3p4wMJdonPaBfA,12076
53
+ cloe_nessy/models/table.py,sha256=3AUBUKLJv1x-xN9KYc5Ndjf-lAlT83rUYdhRKy8wFU4,12105
46
54
  cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
47
55
  cloe_nessy/models/volume.py,sha256=51BE06FrL1Wv6zblFwJ_HTiR6WQqH7pSmrdH90rqwLg,2444
48
56
  cloe_nessy/models/adapter/__init__.py,sha256=m36W_mqwB3dCYnCIt0fLOSHS4E1VU8FRGoaum4Gf95o,90
@@ -63,39 +71,42 @@ cloe_nessy/pipeline/pipeline_context.py,sha256=csElDc6BsynDUtRXgQOSCH7ONc_b-ag0Y
63
71
  cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=c_nAsgw81QYBM9AFiTxGgqRhNXABkDKplbeoCJPtbpE,6434
64
72
  cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=goMQj73FzUVchKn5c2SsPcWR6fr7DtVkVrcQfJsKCq4,13111
65
73
  cloe_nessy/pipeline/pipeline_step.py,sha256=UlnmpS6gm_dZ7m9dD1mZvye7mvUF_DA7HjOZo0oGYDU,1977
66
- cloe_nessy/pipeline/actions/__init__.py,sha256=9gjSQKLGrPcaYaJrTYZde8d4yNrN1SoXN_DDHq5KrvY,2600
74
+ cloe_nessy/pipeline/actions/__init__.py,sha256=RAGwu3Xzt2JJc0AveZ_hVi3SxTIdehAG-JoQe9JPMNA,2690
67
75
  cloe_nessy/pipeline/actions/read_api.py,sha256=RBv5XeHtjTXuCP09Fqo6JNx6iIhQQI-nuAHCuSaGs2s,7778
68
- cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=oXbqbc6BfR82dSIGclwzWiTN8EVmpFjNIYLKm4qOU50,2754
76
+ cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=TBlJaXJAQwLtwvh7dXsX9ebNN3rS6En6951MnT8xGG8,4101
69
77
  cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSiivbhWMglyBtkE,7961
70
- cloe_nessy/pipeline/actions/read_files.py,sha256=N9bFgtG1tovhp2JayxE5YiN9PiO2lgG2-6h_Y6tD2eU,5220
78
+ cloe_nessy/pipeline/actions/read_files.py,sha256=hRcM7wG35vxxLVajW3SK5euHW02qxiXCYSkIl11xiQ0,7308
71
79
  cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hLOvHjhdk5zg1wVHE60m9k,2295
72
80
  cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
73
81
  cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=VxvWqENW63c50L96JA1V_ioe4By6gGzx_iY86njOXEM,3044
74
82
  cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
75
83
  cloe_nessy/pipeline/actions/transform_decode.py,sha256=JajMwHREtxa8u_1Q3RZDBVMjncoSel-WzQFVTO0MREg,4455
76
- cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=E0ypz9qkHMSatNfnHekP-E6svQVL149M4PV02M03drg,5099
84
+ cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=SfTDrOL0TNSC4wITbozabC0jYvceTLnqU4urnEjYk9g,4910
77
85
  cloe_nessy/pipeline/actions/transform_distinct.py,sha256=c7aBxANyqT4aKhm0cSELDtD-bP0Se9vxlBF0K4AgQWs,1976
78
86
  cloe_nessy/pipeline/actions/transform_filter.py,sha256=Nz_ggRfKIcNzYFfFOsgq1QeatjdEis0up4I7cOWBdyo,1446
79
87
  cloe_nessy/pipeline/actions/transform_generic_sql.py,sha256=_naWfmPdYAUKjPNeHu5qJAohOL7DHCSYz_kwoeRv3OI,2741
80
88
  cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=KUHeeP-RIDi34dpbsPEJkzea5zFJA6MuyjNpOsFud9o,4045
81
- cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=heRjBA-Gfu-nmNHOjTYlipEpKY8oNPAHAY40vjJk3aI,8383
89
+ cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=H8j_Xadnm3npVNA_nu7Be7v0bJV20ELKMxSsVHHl6CY,8407
82
90
  cloe_nessy/pipeline/actions/transform_join.py,sha256=e_tvMk8YJTAWcUK_EmOgNt0s31ICZoMX_MKOTWx4lBY,3645
83
91
  cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=petF7pnNq1EKc8MqVdG0weFALAHNILSe_eAu4Z5XxIo,4833
84
92
  cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=4zJcPCONMU4C67qeuzsrX3AORRRHoq_selUI7FJyeg0,1952
85
93
  cloe_nessy/pipeline/actions/transform_replace_values.py,sha256=1OPHTrjcphfyGepcO7ozYfeqfwA18pjlyHpVKUS_AAU,2049
86
94
  cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm3BG9BX4_qUDJMbpj1DsKPY046w,4574
87
95
  cloe_nessy/pipeline/actions/transform_union.py,sha256=s81Vge0AbYPc7VkskCYfOQ_LEjqcmfNFyDkytfjcZyo,2720
88
- cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=j7gRuG3Fedh8JgevIFBbHKock3laJVq4l6Mx3CGU5eo,2676
89
- cloe_nessy/pipeline/actions/write_delta_append.py,sha256=fuL29SK9G5K14ycckU3iPexeK0XNXUfQscCwhXHxbKA,2498
90
- cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=Hir7QZZZJ9hmQZXiJ9iz6u06OCmcHFpyKFVB_I1saSM,5043
91
- cloe_nessy/pipeline/actions/write_file.py,sha256=H8LRst045yij-8XJ5pRB9m5d1lZpZjFa0WSVdSFesPo,2984
96
+ cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=vZ7bZcrZY47P_EVYNshMNZ34l7Orhs8Q9--5Ud5hhLI,2906
97
+ cloe_nessy/pipeline/actions/write_delta_append.py,sha256=2F5qnKPsY_F-2672Ce4Gub7qdna157jEqHHc429fO2A,2962
98
+ cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=kQE4xLbVEUnpYImZLnpZxp88Tuf6VNSeU1W-zI8Wuvw,5805
99
+ cloe_nessy/pipeline/actions/write_file.py,sha256=JZ8UZslxUn_ttYt5wDyvtHFq2FqYk3vOR8kvExJI8pk,3212
100
+ cloe_nessy/pipeline/utils/__init__.py,sha256=xi02UjBMiXWD7b9gDvww4gyRyowb0eRd_6Wbu0F_cro,118
101
+ cloe_nessy/pipeline/utils/delta_load_utils.py,sha256=KitMNruxePEkecI0h4Jint1JwJpaEog5mCOchMkgan8,1495
92
102
  cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
93
103
  cloe_nessy/session/session_manager.py,sha256=VCUPhACeN5armd4D0TqDeH4Ih9nu6XvXSREFqHUwt4s,9710
94
104
  cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
95
105
  cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
96
106
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
+ cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
97
108
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
98
- cloe_nessy-0.3.16.6.dist-info/METADATA,sha256=YfBuBVqeRWjBTWlj4SQKyUVrc-PX78fK_MnHhO2MQv4,3292
99
- cloe_nessy-0.3.16.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
- cloe_nessy-0.3.16.6.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
101
- cloe_nessy-0.3.16.6.dist-info/RECORD,,
109
+ cloe_nessy-0.3.16.7.dist-info/METADATA,sha256=Rz35trsVqlMw7hkcDBWFr-f9EOtqII9EwZogBABy6gw,3292
110
+ cloe_nessy-0.3.16.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
111
+ cloe_nessy-0.3.16.7.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
112
+ cloe_nessy-0.3.16.7.dist-info/RECORD,,