cloe-nessy 0.3.16.3b0__py3-none-any.whl → 0.3.16.5b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,17 +42,17 @@ ALTER TABLE {{ table.escaped_identifier }} ADD CONSTRAINT {{constraint.name}} CH
42
42
  {%- if table.comment %}
43
43
  COMMENT ON TABLE {{ table.escaped_identifier }} IS '{{ table.comment }}';
44
44
  {%- endif %}
45
-
45
+ {# Tags do not yet work in Databricks
46
46
  {%- if table.business_properties %}
47
47
  {%- for tag_key, tag_value in table.business_properties.items() %}
48
- SET TAG ON TABLE {{ table.escaped_identifier }} "{{ tag_key }}"{% if tag_value %}="{{ tag_value }}"{% endif %};
48
+ SET TAG ON TABLE {{ table.escaped_identifier }} `{{ tag_key }}`{% if tag_value %} = `{{ tag_value }}`{% endif %};
49
49
  {%- endfor %}
50
50
  {%- endif %}
51
51
 
52
52
  {%- for column in table.columns %}
53
53
  {%- if column.business_properties %}
54
54
  {%- for tag_key, tag_value in column.business_properties.items() %}
55
- SET TAG ON COLUMN {{ table.escaped_identifier }}.`{{ column.name }}` "{{ tag_key }}"{% if tag_value %}="{{ tag_value }}"{% endif %};
55
+ SET TAG ON COLUMN {{ table.escaped_identifier }}.`{{ column.name }}` `{{ tag_key }}`{% if tag_value %} = `{{ tag_value }}`{% endif %};
56
56
  {%- endfor %}
57
57
  {%- endif %}
58
- {%- endfor %}
58
+ {%- endfor %} #}
@@ -3,10 +3,10 @@ from collections import OrderedDict
3
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
4
  from threading import Lock
5
5
 
6
- import matplotlib.pyplot as plt
7
6
  import networkx as nx
8
7
 
9
8
  from ..logging.logger_mixin import LoggerMixin
9
+ from .pipeline_plotting_service import PipelinePlottingService
10
10
  from .pipeline_step import PipelineStep
11
11
 
12
12
 
@@ -214,30 +214,13 @@ class Pipeline(LoggerMixin):
214
214
  self._console_logger.info(f"Pipeline [' {self.name} '] completed successfully.")
215
215
 
216
216
  def plot_graph(self, save_path: str | None = None) -> None:
217
- """Visualizes the graph of the pipeline using matplotlib.
217
+ """Generates a visual representation of the pipeline steps and their dependencies.
218
+
219
+ This method uses the PipelinePlottingService to create a plot of the pipeline graph. If a save path
220
+ is specified, the plot will be saved to that location; otherwise, it will be displayed interactively.
218
221
 
219
222
  Args:
220
- save_path: If provided, the graph will be saved to this path. Otherwise, it will be shown.
223
+ save_path: Optional; the file path where the plot should be saved. If None, the plot is displayed interactively.
221
224
  """
222
- pos = nx.spring_layout(self._graph) # Position steps (nodes) using the spring layout
223
- plt.figure(figsize=(12, 8))
224
- nx.draw(
225
- self._graph,
226
- pos,
227
- with_labels=True,
228
- node_color="lightblue",
229
- font_weight="bold",
230
- node_size=3000,
231
- font_size=10,
232
- edge_color="gray",
233
- )
234
-
235
- # Draw edge labels if needed
236
- edge_labels = nx.get_edge_attributes(self._graph, "label")
237
- nx.draw_networkx_edge_labels(self._graph, pos, edge_labels=edge_labels)
238
-
239
- if save_path:
240
- plt.savefig(save_path)
241
- self._console_logger.info(f"Graph visual saved to {save_path}")
242
- else:
243
- plt.show()
225
+ plotting_service = PipelinePlottingService()
226
+ plotting_service.plot_graph(self, save_path)
@@ -0,0 +1,340 @@
1
+ """Pipeline plotting service for visualizing pipeline graphs using matplotlib."""
2
+
3
+ import textwrap
4
+
5
+ import matplotlib.patches as patches
6
+ import matplotlib.pyplot as plt
7
+ import networkx as nx
8
+
9
+ from ..logging import LoggerMixin
10
+
11
+
12
+ class PipelinePlottingService(LoggerMixin):
13
+ """Service for plotting pipeline graphs using matplotlib.
14
+
15
+ This service handles the visualization of pipeline DAGs, including node positioning,
16
+ edge drawing, and proper layout management.
17
+ """
18
+
19
+ def __init__(self):
20
+ """Initialize the plotting service."""
21
+ self.context_colors = {
22
+ "initialized": "lightgrey",
23
+ "successful": "lightgreen",
24
+ "failed": "red",
25
+ }
26
+ self._console_logger = self.get_console_logger()
27
+
28
+ def plot_graph(self, pipeline, save_path: str | None = None):
29
+ """Plot and save the pipeline graph as an image.
30
+
31
+ Args:
32
+ pipeline: The Pipeline object to visualize.
33
+ save_path: Path where the graph image should be saved.
34
+ """
35
+ g: nx.DiGraph = nx.DiGraph()
36
+ g.add_edges_from(self._generate_edges(pipeline))
37
+ pos = self._determine_number_of_rows(g.nodes, 75, 5, pipeline)
38
+
39
+ _, ax = plt.subplots(figsize=(16, 12))
40
+
41
+ adjusted_pos = {}
42
+ scaling_factor = 0.7
43
+
44
+ for node, (x, y) in pos.items():
45
+ text_length = len(node)
46
+ node_width = max(7.5, text_length * 0.2)
47
+ node_height = 2.0
48
+ x_spacing = max(9.0, node_width * scaling_factor)
49
+ y_spacing = max(4.5, node_height * scaling_factor)
50
+ adjusted_pos[node] = (x * x_spacing, y * y_spacing)
51
+
52
+ # Calculate bounds to set proper axis limits
53
+ if adjusted_pos:
54
+ x_coords = [pos[0] for pos in adjusted_pos.values()]
55
+ y_coords = [pos[1] for pos in adjusted_pos.values()]
56
+ x_min, x_max = min(x_coords), max(x_coords)
57
+ y_min, y_max = min(y_coords), max(y_coords)
58
+
59
+ # Add padding
60
+ x_padding = (x_max - x_min) * 0.1 if x_max != x_min else 5
61
+ y_padding = (y_max - y_min) * 0.1 if y_max != y_min else 5
62
+
63
+ ax.set_xlim(x_min - x_padding, x_max + x_padding)
64
+ ax.set_ylim(y_min - y_padding, y_max + y_padding)
65
+
66
+ self._draw_edges(ax, g, adjusted_pos)
67
+ self._draw_nodes(ax, pipeline, adjusted_pos)
68
+ self._add_legend(ax, adjusted_pos)
69
+
70
+ ax.set_title(pipeline.name, fontsize=18, weight="bold", pad=20)
71
+ ax.set_aspect("equal", adjustable="box")
72
+ ax.axis("off")
73
+
74
+ plt.tight_layout()
75
+ if save_path:
76
+ plt.savefig(save_path, dpi=300, bbox_inches="tight", facecolor="white", edgecolor="none", format="png")
77
+ self._console_logger.info(f"Graph saved to {save_path}")
78
+ else:
79
+ plt.show()
80
+ plt.close()
81
+
82
+ def _generate_edges(self, pipeline):
83
+ """Generate edges from pipeline steps."""
84
+ input_collection = []
85
+ for _, value in pipeline.steps.items():
86
+ for predecessor in value._predecessors:
87
+ input_collection.append((predecessor, value.name))
88
+ return input_collection
89
+
90
+ def _draw_edges(self, ax, graph, adjusted_pos):
91
+ """Draw edges between nodes with proper arrow positioning."""
92
+ for src, tgt in graph.edges():
93
+ x0, y0 = adjusted_pos[src]
94
+ x1, y1 = adjusted_pos[tgt]
95
+
96
+ # Get the dimensions of the source and target nodes
97
+ src_width = max(7.5, len(src) * 0.2) / 2 # Half width
98
+ src_height = 1.7 / 2 # Half height
99
+ tgt_width = max(7.5, len(tgt) * 0.2) / 2 # Half width
100
+ tgt_height = 1.7 / 2 # Half height
101
+
102
+ dx = x1 - x0
103
+ dy = y1 - y0
104
+
105
+ # Calculation of scaling factors so that the arrow starts/ends at rectangle boundary
106
+ if abs(dx) < 0.001 and abs(dy) < 0.001:
107
+ # Skip if nodes are at the same position
108
+ continue
109
+ if abs(dx) < 0.001:
110
+ # Vertical line
111
+ scale_src = src_height / abs(dy) if dy != 0 else 0
112
+ scale_tgt = tgt_height / abs(dy) if dy != 0 else 0
113
+ elif abs(dy) < 0.001:
114
+ # Horizontal line
115
+ scale_src = src_width / abs(dx)
116
+ scale_tgt = tgt_width / abs(dx)
117
+ else:
118
+ # Diagonal line
119
+ scale_x_src = src_width / abs(dx)
120
+ scale_y_src = src_height / abs(dy)
121
+ scale_src = min(scale_x_src, scale_y_src)
122
+
123
+ scale_x_tgt = tgt_width / abs(dx)
124
+ scale_y_tgt = tgt_height / abs(dy)
125
+ scale_tgt = min(scale_x_tgt, scale_y_tgt)
126
+
127
+ start_x = x0 + dx * scale_src
128
+ start_y = y0 + dy * scale_src
129
+ end_x = x1 - dx * scale_tgt
130
+ end_y = y1 - dy * scale_tgt
131
+
132
+ ax.annotate(
133
+ "",
134
+ xy=(end_x, end_y),
135
+ xytext=(start_x, start_y),
136
+ arrowprops={"arrowstyle": "->", "color": "gray", "lw": 2, "shrinkA": 0, "shrinkB": 0},
137
+ )
138
+
139
+ def _draw_nodes(self, ax, pipeline, adjusted_pos):
140
+ """Draw nodes as rectangles with text labels."""
141
+ for node, (x, y) in adjusted_pos.items():
142
+ context = self._get_context_by_step_info(pipeline, node)
143
+ fillcolor = self.context_colors.get(context, "lightgrey")
144
+
145
+ text_length = len(node)
146
+ node_width = max(7.5, text_length * 0.2)
147
+ node_height = 2.0
148
+
149
+ rect = patches.Rectangle(
150
+ (x - node_width / 2, y - node_height / 2),
151
+ node_width,
152
+ node_height,
153
+ linewidth=1,
154
+ edgecolor="black",
155
+ facecolor=fillcolor,
156
+ zorder=2,
157
+ )
158
+ ax.add_patch(rect)
159
+
160
+ wrapped_label = self._wrap_text(node).replace("<br>", "\n")
161
+ ax.text(x, y, wrapped_label, ha="center", va="center", fontsize=10, weight="bold", zorder=3)
162
+
163
+ def _add_legend(self, ax, adjusted_pos):
164
+ """Add legend at the bottom of the graph."""
165
+ legend_text = "Success = Light Green; Waiting = Light Grey; Failed = Bright Red"
166
+
167
+ if adjusted_pos:
168
+ y_coords = [pos[1] for pos in adjusted_pos.values()]
169
+ legend_y = min(y_coords) - abs(max(y_coords) - min(y_coords)) * 0.2 - 3
170
+ x_coords = [pos[0] for pos in adjusted_pos.values()]
171
+ legend_x = (min(x_coords) + max(x_coords)) / 2
172
+ else:
173
+ legend_x, legend_y = 0, -5
174
+
175
+ ax.text(
176
+ legend_x,
177
+ legend_y,
178
+ legend_text,
179
+ ha="center",
180
+ va="center",
181
+ fontsize=12,
182
+ bbox={"boxstyle": "round,pad=0.3", "facecolor": "white", "alpha": 0.8},
183
+ )
184
+
185
+ def _determine_number_of_rows(self, nodes, max_row_length, max_nodes_in_row, pipeline):
186
+ """Structure the node's position in a DAG-aware layout that better shows dependencies."""
187
+ # Build the graph to understand dependencies
188
+ graph = nx.DiGraph()
189
+ graph.add_edges_from(self._generate_edges(pipeline))
190
+
191
+ # Try to use a topological layout that respects dependencies
192
+ try:
193
+ # Use a hierarchical layout approach
194
+ pos = self._create_hierarchical_layout(graph, pipeline)
195
+ except Exception:
196
+ # Fallback to the original layout if there are issues
197
+ pos = self._create_simple_layout(nodes, max_row_length, max_nodes_in_row)
198
+
199
+ pos = self._post_process_grouping(nodes, pos, pipeline)
200
+ return pos
201
+
202
+ def _create_hierarchical_layout(self, graph, pipeline):
203
+ """Create a layout based on dependency levels to show parallel branches clearly."""
204
+ levels = self._calculate_dependency_levels(graph)
205
+ level_groups = self._group_nodes_by_level(levels)
206
+ return self._position_nodes_in_levels(level_groups)
207
+
208
+ def _calculate_dependency_levels(self, graph):
209
+ """Calculate the dependency level for each node."""
210
+ levels = {}
211
+ remaining_nodes = set(graph.nodes())
212
+ current_level = 0
213
+
214
+ while remaining_nodes:
215
+ current_level_nodes = self._find_nodes_without_dependencies(graph, remaining_nodes)
216
+
217
+ if not current_level_nodes:
218
+ # Circular dependency or other issue, place remaining nodes at current level
219
+ current_level_nodes = list(remaining_nodes)
220
+
221
+ # Assign level to these nodes
222
+ for node in current_level_nodes:
223
+ levels[node] = current_level
224
+ remaining_nodes.remove(node)
225
+
226
+ current_level += 1
227
+
228
+ return levels
229
+
230
+ def _find_nodes_without_dependencies(self, graph, remaining_nodes):
231
+ """Find nodes that have no dependencies in the remaining nodes."""
232
+ nodes_without_deps = []
233
+ for node in remaining_nodes:
234
+ predecessors = set(graph.predecessors(node))
235
+ if not predecessors or predecessors.isdisjoint(remaining_nodes):
236
+ nodes_without_deps.append(node)
237
+ return nodes_without_deps
238
+
239
+ def _group_nodes_by_level(self, levels):
240
+ """Group nodes by their dependency level."""
241
+ level_groups = {}
242
+ for node, level in levels.items():
243
+ if level not in level_groups:
244
+ level_groups[level] = []
245
+ level_groups[level].append(node)
246
+ return level_groups
247
+
248
+ def _position_nodes_in_levels(self, level_groups):
249
+ """Position nodes within their levels."""
250
+ pos = {}
251
+ for level, nodes_in_level in level_groups.items():
252
+ for i, node in enumerate(nodes_in_level):
253
+ # Spread parallel nodes vertically to show they can run in parallel
254
+ y_pos = i - (len(nodes_in_level) - 1) / 2 # Center around 0
255
+ pos[node] = (level, -y_pos) # Negative y to match the original coordinate system
256
+ return pos
257
+
258
+ def _create_simple_layout(self, nodes, max_row_length, max_nodes_in_row):
259
+ """Fallback to simple row-based layout."""
260
+ pos = {}
261
+ current_row = []
262
+ current_row_length = 0
263
+ row_index = 0
264
+ left_to_right = True
265
+
266
+ for node in nodes:
267
+ node_length = len(node)
268
+ if (current_row_length + node_length > max_row_length) or (len(current_row) >= max_nodes_in_row):
269
+ self._position_row_nodes(pos, current_row, row_index, left_to_right)
270
+ current_row = []
271
+ current_row_length = 0
272
+ row_index += 1
273
+ left_to_right = not left_to_right
274
+
275
+ current_row.append(node)
276
+ current_row_length += node_length
277
+
278
+ # Handle the last row
279
+ self._position_row_nodes(pos, current_row, row_index, left_to_right)
280
+ return pos
281
+
282
+ def _position_row_nodes(self, pos, row_nodes, row_index, left_to_right):
283
+ """Position nodes in a row, either left-to-right or right-to-left."""
284
+ nodes_to_position = row_nodes if left_to_right else reversed(row_nodes)
285
+ for i, row_node in enumerate(nodes_to_position):
286
+ pos[row_node] = (i, -row_index)
287
+
288
+ def _post_process_grouping(self, nodes, pos, pipeline):
289
+ """Re-arrange the node's positions to fit more complex connections."""
290
+ for node_name in nodes:
291
+ predecessors = self._get_predecessors_by_step_info(pipeline, node_name)
292
+ if predecessors is not None and len(predecessors) > 1:
293
+ pos = self._shift_row_down(pos, node_name)
294
+ return pos
295
+
296
+ def _shift_row_down(self, pos, split_key):
297
+ """Shift a node and subsequent nodes down by one row."""
298
+ part1 = {}
299
+ part2 = {}
300
+ found_split_key = False
301
+
302
+ for key, value in pos.items():
303
+ if found_split_key:
304
+ part2[key] = value
305
+ else:
306
+ if key == split_key:
307
+ found_split_key = True
308
+ part2[key] = value
309
+ else:
310
+ part1[key] = value
311
+
312
+ for key in part2:
313
+ if key != split_key:
314
+ new_tuple = (part2[key][0], part2[key][1] - 1)
315
+ part2[key] = new_tuple
316
+
317
+ part1.update(part2)
318
+ return part1
319
+
320
+ def _wrap_text(self, text, max_length=20):
321
+ """Add line breaks to text if it exceeds max_length."""
322
+ if len(text) <= max_length:
323
+ return text
324
+
325
+ wrapped_lines = textwrap.wrap(text, width=max_length)
326
+ return "<br>".join(wrapped_lines)
327
+
328
+ def _get_context_by_step_info(self, pipeline, step_name):
329
+ """Get the context status of a step."""
330
+ for _, step in pipeline.steps.items():
331
+ if step.name == step_name:
332
+ return step.context.status
333
+ return None
334
+
335
+ def _get_predecessors_by_step_info(self, pipeline, step_name):
336
+ """Get predecessors of a step."""
337
+ for _, step in pipeline.steps.items():
338
+ if step.name == step_name:
339
+ return step._predecessors
340
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 0.3.16.3b0
3
+ Version: 0.3.16.5b0
4
4
  Summary: Your friendly datalake monster.
5
5
  Project-URL: homepage, https://initions.com/
6
6
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
@@ -50,17 +50,18 @@ cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=a-14Ys-AevVYQd0xeJU1sy
50
50
  cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  cloe_nessy/models/mixins/read_instance_mixin.py,sha256=j5Y4aNWOh1jlskEaxNooZFJgPyxRmik00gAVLJnAaRs,4507
52
52
  cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
53
- cloe_nessy/models/templates/create_table.sql.j2,sha256=_nENxBHT3drRvZYttFgy5o7tWvN86aENgGy1yrSISj0,2435
53
+ cloe_nessy/models/templates/create_table.sql.j2,sha256=71JpUyUZ_ZYO2M0tfIrTXHR7JycypAGsELt2-2d3oO0,2479
54
54
  cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ1cfDw_ra3_FKmOuLoBs,289
55
55
  cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
56
56
  cloe_nessy/object_manager/table_manager.py,sha256=suHx56TYXagaJ2dVkvTP7vwSI4xgTqXNkHYBbYh2pd4,13913
57
57
  cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
58
58
  cloe_nessy/pipeline/__init__.py,sha256=sespmJ5JsgyiFyZiedTiL2kg--zGIX7cjTYsD5vemEg,325
59
- cloe_nessy/pipeline/pipeline.py,sha256=-1tJVs9rZf8CcwieH4IP7mqJZ6mL7bQUZ56TNKt8eO8,11154
59
+ cloe_nessy/pipeline/pipeline.py,sha256=L4wk3b06LNWRj01nnAkuQpeRrwFTyaV1xTpgYAg4sak,10819
60
60
  cloe_nessy/pipeline/pipeline_action.py,sha256=S7IVFdmG12fRBzHuE_DiWn7qlMtApz6IloVd2Fj31Sg,1944
61
61
  cloe_nessy/pipeline/pipeline_config.py,sha256=BN3ZSbr6bC-X9edoh-n5vRfPHFMbgtAU7mQ3dBrcWO8,3131
62
62
  cloe_nessy/pipeline/pipeline_context.py,sha256=csElDc6BsynDUtRXgQOSCH7ONc_b-ag0YEg0zlQTz58,1874
63
63
  cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=c_nAsgw81QYBM9AFiTxGgqRhNXABkDKplbeoCJPtbpE,6434
64
+ cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=goMQj73FzUVchKn5c2SsPcWR6fr7DtVkVrcQfJsKCq4,13111
64
65
  cloe_nessy/pipeline/pipeline_step.py,sha256=UlnmpS6gm_dZ7m9dD1mZvye7mvUF_DA7HjOZo0oGYDU,1977
65
66
  cloe_nessy/pipeline/actions/__init__.py,sha256=9gjSQKLGrPcaYaJrTYZde8d4yNrN1SoXN_DDHq5KrvY,2600
66
67
  cloe_nessy/pipeline/actions/read_api.py,sha256=RBv5XeHtjTXuCP09Fqo6JNx6iIhQQI-nuAHCuSaGs2s,7778
@@ -95,6 +96,6 @@ cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEv
95
96
  cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
96
97
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
98
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
98
- cloe_nessy-0.3.16.3b0.dist-info/METADATA,sha256=xkQ6JykVWTv9VwFW9vwuY2jsjen3bpDVXBkZeSKGB40,3328
99
- cloe_nessy-0.3.16.3b0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
100
- cloe_nessy-0.3.16.3b0.dist-info/RECORD,,
99
+ cloe_nessy-0.3.16.5b0.dist-info/METADATA,sha256=k06Hu0tT4c7JkwmN7WmDVTBvZi8ypdB5Agxfty42CtQ,3328
100
+ cloe_nessy-0.3.16.5b0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
101
+ cloe_nessy-0.3.16.5b0.dist-info/RECORD,,