imsciences 0.9.7.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imsciences/__init__.py +2 -2
- imsciences/geo.py +173 -115
- imsciences/mmm.py +921 -395
- imsciences/pull.py +1952 -1154
- imsciences/unittesting.py +729 -478
- imsciences/vis.py +681 -125
- {imsciences-0.9.7.0.dist-info → imsciences-1.0.2.dist-info}/METADATA +1 -1
- imsciences-1.0.2.dist-info/RECORD +12 -0
- {imsciences-0.9.7.0.dist-info → imsciences-1.0.2.dist-info}/WHEEL +1 -1
- imsciences-0.9.7.0.dist-info/RECORD +0 -12
- {imsciences-0.9.7.0.dist-info → imsciences-1.0.2.dist-info}/LICENSE.txt +0 -0
- {imsciences-0.9.7.0.dist-info → imsciences-1.0.2.dist-info}/PKG-INFO-TomG-HP-290722 +0 -0
- {imsciences-0.9.7.0.dist-info → imsciences-1.0.2.dist-info}/top_level.txt +0 -0
imsciences/vis.py
CHANGED
|
@@ -1,179 +1,732 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
1
4
|
import pandas as pd
|
|
2
5
|
import plotly.express as px
|
|
3
|
-
import plotly.
|
|
6
|
+
import plotly.graph_objects as go
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class datavis:
|
|
10
|
+
def __init__(self):
|
|
11
|
+
"""Initialize DataVis with default theme settings."""
|
|
12
|
+
self.themes = {
|
|
13
|
+
"default": {
|
|
14
|
+
"template": "plotly_white",
|
|
15
|
+
"colorscale": "viridis",
|
|
16
|
+
"line_color": "#1f77b4",
|
|
17
|
+
"background_color": "white",
|
|
18
|
+
"grid_color": "lightgray",
|
|
19
|
+
"text_color": "black",
|
|
20
|
+
"font_family": "Raleway, sans-serif",
|
|
21
|
+
"font_size": 12,
|
|
22
|
+
},
|
|
23
|
+
"dark": {
|
|
24
|
+
"template": "plotly_dark",
|
|
25
|
+
"colorscale": "plasma",
|
|
26
|
+
"line_color": "#f07b16",
|
|
27
|
+
"background_color": "#2f3136",
|
|
28
|
+
"grid_color": "#1cd416",
|
|
29
|
+
"text_color": "white",
|
|
30
|
+
"font_family": "Raleway, sans-serif",
|
|
31
|
+
"font_size": 12,
|
|
32
|
+
},
|
|
33
|
+
"business": {
|
|
34
|
+
"template": "plotly_white",
|
|
35
|
+
"colorscale": "blues",
|
|
36
|
+
"line_color": "#0e4272",
|
|
37
|
+
"background_color": "white",
|
|
38
|
+
"grid_color": "#e6e6e6",
|
|
39
|
+
"text_color": "#921919",
|
|
40
|
+
"font_family": "Raleway, sans-serif",
|
|
41
|
+
"font_size": 11,
|
|
42
|
+
},
|
|
43
|
+
"scientific": {
|
|
44
|
+
"template": "plotly_dark",
|
|
45
|
+
"colorscale": "rdylbu",
|
|
46
|
+
"line_color": "#d62728",
|
|
47
|
+
"background_color": "white",
|
|
48
|
+
"grid_color": "#f0f0f0",
|
|
49
|
+
"text_color": "black",
|
|
50
|
+
"font_family": "Raleway, sans-serif",
|
|
51
|
+
"font_size": 10,
|
|
52
|
+
},
|
|
53
|
+
}
|
|
54
|
+
self.current_theme = "default"
|
|
55
|
+
|
|
56
|
+
def help(self, method=None, *, show_examples=True):
|
|
57
|
+
"""
|
|
58
|
+
Enhanced help system with detailed information about all methods.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
method : str, optional
|
|
63
|
+
Specific method to get help for. If None, shows overview of all methods.
|
|
64
|
+
show_examples : bool, default True
|
|
65
|
+
Whether to show usage examples.
|
|
66
|
+
|
|
67
|
+
Usage:
|
|
68
|
+
------
|
|
69
|
+
vis.help() # Show all methods
|
|
70
|
+
vis.help('plot_one') # Show help for specific method
|
|
71
|
+
vis.help('plot_chart', show_examples=False) # Show help without examples
|
|
72
|
+
|
|
73
|
+
"""
|
|
74
|
+
if method:
|
|
75
|
+
self._show_method_help(method, show_examples=show_examples)
|
|
76
|
+
else:
|
|
77
|
+
self._show_overview_help(show_examples=show_examples)
|
|
78
|
+
|
|
79
|
+
def _show_overview_help(self, *, show_examples=True):
|
|
80
|
+
"""Display overview of all available methods."""
|
|
81
|
+
print("=" * 80)
|
|
82
|
+
print("DataVis Class - Comprehensive Data Visualization Tool")
|
|
83
|
+
print("=" * 80)
|
|
84
|
+
print(f"Current Theme: {self.current_theme}")
|
|
85
|
+
print(f"Available Themes: {', '.join(self.themes.keys())}")
|
|
86
|
+
print("\n📊 AVAILABLE METHODS:\n")
|
|
87
|
+
|
|
88
|
+
methods_info = [
|
|
89
|
+
{
|
|
90
|
+
"name": "plot_one",
|
|
91
|
+
"description": "Plot a single time series from a DataFrame",
|
|
92
|
+
"params": "df, column, date_column",
|
|
93
|
+
"use_case": "Single metric tracking over time",
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
"name": "plot_two",
|
|
97
|
+
"description": "Compare two metrics from different DataFrames",
|
|
98
|
+
"params": "data_config, same_axis=True",
|
|
99
|
+
"use_case": "Comparative analysis of two time series",
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"name": "plot_chart",
|
|
103
|
+
"description": "Create various chart types (line, bar, scatter, etc.)",
|
|
104
|
+
"params": "data_config",
|
|
105
|
+
"use_case": "Flexible charting with multiple chart types",
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
"name": "plot_correlation",
|
|
109
|
+
"description": "Generate correlation heatmaps",
|
|
110
|
+
"params": 'df, columns=None, method="pearson"',
|
|
111
|
+
"use_case": "Analyze relationships between variables",
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
"name": "plot_sankey",
|
|
115
|
+
"description": "Create Sankey diagrams for flow visualization",
|
|
116
|
+
"params": "df, source_col, target_col, value_col, title=None, color_mapping=None",
|
|
117
|
+
"use_case": "Visualize flow/process data",
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
"name": "set_theme",
|
|
121
|
+
"description": "Set global theme for all charts",
|
|
122
|
+
"params": "theme_name",
|
|
123
|
+
"use_case": "Consistent styling across visualizations",
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"name": "help",
|
|
127
|
+
"description": "Get detailed help for methods",
|
|
128
|
+
"params": "method=None, show_examples=True",
|
|
129
|
+
"use_case": "Learn how to use the visualization tools",
|
|
130
|
+
},
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
for i, method in enumerate(methods_info, 1):
|
|
134
|
+
print(f"{i}. {method['name']}")
|
|
135
|
+
print(f" 📝 Description: {method['description']}")
|
|
136
|
+
print(f" ⚙️ Parameters: {method['params']}")
|
|
137
|
+
print(f" 🎯 Use Case: {method['use_case']}")
|
|
138
|
+
print()
|
|
139
|
+
|
|
140
|
+
if show_examples:
|
|
141
|
+
print("💡 QUICK START EXAMPLES:")
|
|
142
|
+
print(" vis.help('plot_one') # Get detailed help for plot_one")
|
|
143
|
+
print(" vis.set_theme('dark') # Switch to dark theme")
|
|
144
|
+
print(" vis.plot_one(df, 'sales', 'date') # Plot sales over time")
|
|
145
|
+
print(" vis.plot_correlation(df) # Create correlation heatmap")
|
|
146
|
+
print()
|
|
147
|
+
|
|
148
|
+
print("🔧 For detailed help on any method, use: vis.help('method_name')")
|
|
149
|
+
print("=" * 80)
|
|
150
|
+
|
|
151
|
+
def _show_method_help(self, method_name, *, show_examples=True):
|
|
152
|
+
"""Display detailed help for a specific method."""
|
|
153
|
+
if not hasattr(self, method_name):
|
|
154
|
+
print(f"❌ Method '{method_name}' not found!")
|
|
155
|
+
print(
|
|
156
|
+
f"Available methods: {[m for m in dir(self) if not m.startswith('_') and callable(getattr(self, m))]}"
|
|
157
|
+
)
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
method = getattr(self, method_name)
|
|
161
|
+
|
|
162
|
+
print("=" * 60)
|
|
163
|
+
print(f"📊 DETAILED HELP: {method_name}")
|
|
164
|
+
print("=" * 60)
|
|
165
|
+
|
|
166
|
+
# Get docstring
|
|
167
|
+
doc = inspect.getdoc(method)
|
|
168
|
+
if doc:
|
|
169
|
+
print(f"📝 {doc}")
|
|
170
|
+
print()
|
|
171
|
+
|
|
172
|
+
# Get method signature
|
|
173
|
+
sig = inspect.signature(method)
|
|
174
|
+
print(f"🔧 Signature: {method_name}{sig}")
|
|
175
|
+
print()
|
|
176
|
+
|
|
177
|
+
# Method-specific examples
|
|
178
|
+
if show_examples:
|
|
179
|
+
examples = self._get_method_examples(method_name)
|
|
180
|
+
if examples:
|
|
181
|
+
print("💡 EXAMPLES:")
|
|
182
|
+
for example in examples:
|
|
183
|
+
print(f" {example}")
|
|
184
|
+
print()
|
|
185
|
+
|
|
186
|
+
print("=" * 60)
|
|
187
|
+
|
|
188
|
+
def _get_method_examples(self, method_name):
|
|
189
|
+
"""Get examples for specific methods."""
|
|
190
|
+
examples = {
|
|
191
|
+
"plot_one": [
|
|
192
|
+
"vis.plot_one(df, 'sales', 'date') # Plot sales over time",
|
|
193
|
+
"vis.plot_one(stock_df, 'price', 'timestamp') # Stock price chart",
|
|
194
|
+
],
|
|
195
|
+
"plot_two": [
|
|
196
|
+
"config = {'df1': df1, 'col1': 'sales', 'df2': df2, 'col2': 'revenue', 'date_column': 'date'}",
|
|
197
|
+
"vis.plot_two(config, same_axis=True) # Same y-axis",
|
|
198
|
+
"vis.plot_two(config, same_axis=False) # Separate y-axes",
|
|
199
|
+
],
|
|
200
|
+
"plot_chart": [
|
|
201
|
+
"config = {'df': df, 'date_col': 'date', 'value_cols': ['sales'], 'chart_type': 'line'}",
|
|
202
|
+
"vis.plot_chart(config) # Line chart",
|
|
203
|
+
"config['chart_type'] = 'bar' # Change to bar chart",
|
|
204
|
+
],
|
|
205
|
+
"plot_correlation": [
|
|
206
|
+
"vis.plot_correlation(df) # All numeric columns",
|
|
207
|
+
"vis.plot_correlation(df, columns=['sales', 'profit', 'cost']) # Specific columns",
|
|
208
|
+
"vis.plot_correlation(df, method='spearman') # Spearman correlation",
|
|
209
|
+
],
|
|
210
|
+
"plot_sankey": [
|
|
211
|
+
"# Basic multi-layer Sankey",
|
|
212
|
+
"vis.plot_sankey(df, 'Source', 'Target', 'Value')",
|
|
213
|
+
"",
|
|
214
|
+
"# Sankey with custom colors and title",
|
|
215
|
+
"color_map = {",
|
|
216
|
+
" 'Brand Media': 'rgba(246, 107, 109, 0.6)',",
|
|
217
|
+
" 'TV': 'rgba(246, 107, 109, 0.6)',",
|
|
218
|
+
" 'default': 'rgba(175, 175, 175, 0.6)'",
|
|
219
|
+
"}",
|
|
220
|
+
"vis.plot_sankey(df, 'Source', 'Target', 'Value', title='Brand Media Effects', color_mapping=color_map)",
|
|
221
|
+
],
|
|
222
|
+
"set_theme": [
|
|
223
|
+
"vis.set_theme('dark') # Switch to dark theme",
|
|
224
|
+
"vis.set_theme('business') # Professional business theme",
|
|
225
|
+
"vis.set_theme('scientific') # Scientific publication theme",
|
|
226
|
+
],
|
|
227
|
+
}
|
|
228
|
+
return examples.get(method_name, [])
|
|
229
|
+
|
|
230
|
+
def set_theme(self, theme_name):
|
|
231
|
+
"""
|
|
232
|
+
Set the global theme for all charts.
|
|
233
|
+
|
|
234
|
+
Parameters
|
|
235
|
+
----------
|
|
236
|
+
theme_name : str
|
|
237
|
+
Theme name. Available options: 'default', 'dark', 'business', 'scientific'
|
|
238
|
+
|
|
239
|
+
Returns
|
|
240
|
+
-------
|
|
241
|
+
None
|
|
242
|
+
|
|
243
|
+
Examples
|
|
244
|
+
--------
|
|
245
|
+
vis.set_theme('dark') # Dark theme with plasma colors
|
|
246
|
+
vis.set_theme('business') # Professional business theme
|
|
247
|
+
vis.set_theme('scientific') # Scientific publication theme
|
|
248
|
+
|
|
249
|
+
"""
|
|
250
|
+
if theme_name not in self.themes:
|
|
251
|
+
available_themes = ", ".join(self.themes.keys())
|
|
252
|
+
error_msg = (
|
|
253
|
+
f"Theme '{theme_name}' not found. Available themes: {available_themes}"
|
|
254
|
+
)
|
|
255
|
+
raise ValueError(error_msg)
|
|
256
|
+
|
|
257
|
+
self.current_theme = theme_name
|
|
258
|
+
print(f"✅ Theme set to: {theme_name}")
|
|
259
|
+
|
|
260
|
+
def _apply_theme(self, fig):
|
|
261
|
+
"""Apply current theme to a figure."""
|
|
262
|
+
theme = self.themes[self.current_theme]
|
|
263
|
+
|
|
264
|
+
fig.update_layout(
|
|
265
|
+
template=theme["template"],
|
|
266
|
+
plot_bgcolor=theme["background_color"],
|
|
267
|
+
font={
|
|
268
|
+
"family": theme["font_family"],
|
|
269
|
+
"size": theme["font_size"],
|
|
270
|
+
"color": theme["text_color"],
|
|
271
|
+
},
|
|
272
|
+
xaxis={
|
|
273
|
+
"showline": True,
|
|
274
|
+
"linecolor": theme["text_color"],
|
|
275
|
+
"gridcolor": theme["grid_color"],
|
|
276
|
+
},
|
|
277
|
+
yaxis={
|
|
278
|
+
"showline": True,
|
|
279
|
+
"linecolor": theme["text_color"],
|
|
280
|
+
"gridcolor": theme["grid_color"],
|
|
281
|
+
"rangemode": "tozero",
|
|
282
|
+
},
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
return fig
|
|
286
|
+
|
|
287
|
+
def plot_correlation(self, df, columns=None, method="pearson", title=None):
|
|
288
|
+
"""
|
|
289
|
+
Create a correlation heatmap for numeric columns in a DataFrame.
|
|
290
|
+
|
|
291
|
+
Parameters
|
|
292
|
+
----------
|
|
293
|
+
df : pandas.DataFrame
|
|
294
|
+
Input DataFrame with numeric columns
|
|
295
|
+
columns : list, optional
|
|
296
|
+
Specific columns to include in correlation. If None, uses all numeric columns
|
|
297
|
+
method : str, default 'pearson'
|
|
298
|
+
Correlation method: 'pearson', 'kendall', 'spearman'
|
|
299
|
+
title : str, optional
|
|
300
|
+
Custom title for the heatmap
|
|
301
|
+
|
|
302
|
+
Returns
|
|
303
|
+
-------
|
|
304
|
+
plotly.graph_objects.Figure
|
|
305
|
+
The correlation heatmap figure
|
|
306
|
+
|
|
307
|
+
Example:
|
|
308
|
+
--------
|
|
309
|
+
# Basic correlation heatmap
|
|
310
|
+
fig = vis.plot_correlation(df)
|
|
311
|
+
|
|
312
|
+
# Specific columns with Spearman correlation
|
|
313
|
+
fig = vis.plot_correlation(df, columns=['sales', 'profit', 'cost'], method='spearman')
|
|
314
|
+
|
|
315
|
+
"""
|
|
316
|
+
# Select numeric columns
|
|
317
|
+
if columns is None:
|
|
318
|
+
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
|
|
319
|
+
else:
|
|
320
|
+
numeric_cols = [
|
|
321
|
+
col
|
|
322
|
+
for col in columns
|
|
323
|
+
if col in df.columns and df[col].dtype in ["int64", "float64"]
|
|
324
|
+
]
|
|
325
|
+
|
|
326
|
+
# Minimum columns required for correlation
|
|
327
|
+
min_correlation_cols = 2
|
|
328
|
+
|
|
329
|
+
if len(numeric_cols) < min_correlation_cols:
|
|
330
|
+
error_msg = "Need at least 2 numeric columns for correlation analysis"
|
|
331
|
+
raise ValueError(error_msg)
|
|
332
|
+
|
|
333
|
+
# Calculate correlation matrix
|
|
334
|
+
corr_matrix = df[numeric_cols].corr(method=method)
|
|
335
|
+
|
|
336
|
+
# Create heatmap
|
|
337
|
+
fig = px.imshow(
|
|
338
|
+
corr_matrix,
|
|
339
|
+
text_auto=True,
|
|
340
|
+
aspect="auto",
|
|
341
|
+
color_continuous_scale=self.themes[self.current_theme]["colorscale"],
|
|
342
|
+
title=title or f"{method.title()} Correlation Matrix",
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
# Apply theme
|
|
346
|
+
fig = self._apply_theme(fig)
|
|
347
|
+
|
|
348
|
+
# Update text color for better readability
|
|
349
|
+
fig.update_traces(
|
|
350
|
+
textfont={"color": self.themes[self.current_theme]["text_color"]}
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
return fig
|
|
354
|
+
|
|
355
|
+
def plot_sankey(self, df, source_col, target_col, value_col, **kwargs):
|
|
356
|
+
"""
|
|
357
|
+
Create a multi-layer Sankey diagram from a single DataFrame.
|
|
358
|
+
|
|
359
|
+
Parameters
|
|
360
|
+
----------
|
|
361
|
+
df : pandas.DataFrame
|
|
362
|
+
Input DataFrame with source, target, and value columns
|
|
363
|
+
source_col : str
|
|
364
|
+
Column name for source nodes
|
|
365
|
+
target_col : str
|
|
366
|
+
Column name for target nodes
|
|
367
|
+
value_col : str
|
|
368
|
+
Column name for flow values (must be numeric)
|
|
369
|
+
title : str, optional
|
|
370
|
+
Custom title for the diagram
|
|
371
|
+
color_mapping : dict, optional
|
|
372
|
+
Dictionary mapping source/target names to colors
|
|
373
|
+
Format: {'Brand Media': 'rgba(246, 107, 109, 0.6)', 'default': 'rgba(175, 175, 175, 0.6)'}
|
|
374
|
+
Pass as keyword argument: color_mapping={...}
|
|
375
|
+
**kwargs : dict
|
|
376
|
+
Additional keyword arguments including title and color_mapping
|
|
377
|
+
|
|
378
|
+
Returns
|
|
379
|
+
-------
|
|
380
|
+
plotly.graph_objects.Figure
|
|
381
|
+
The multi-layer Sankey diagram figure
|
|
382
|
+
|
|
383
|
+
DataFrame Format Requirements:
|
|
384
|
+
-----------------------------
|
|
385
|
+
Single DataFrame with all flow data:
|
|
386
|
+
| Source | Target | Value |
|
|
387
|
+
|-------------|-----------|-------|
|
|
388
|
+
| Brand Media | TV | 100 |
|
|
389
|
+
| Brand Media | Radio | 50 |
|
|
390
|
+
| TV | BU_North | 60 |
|
|
391
|
+
| TV | BU_South | 40 |
|
|
392
|
+
| Radio | BU_North | 30 |
|
|
393
|
+
|
|
394
|
+
Example:
|
|
395
|
+
--------
|
|
396
|
+
# Basic multi-layer Sankey
|
|
397
|
+
fig = vis.plot_sankey(df, 'Source', 'Target', 'Value')
|
|
398
|
+
|
|
399
|
+
# Sankey with custom colors
|
|
400
|
+
color_map = {
|
|
401
|
+
'Brand Media': 'rgba(246, 107, 109, 0.6)',
|
|
402
|
+
'TV': 'rgba(246, 107, 109, 0.6)',
|
|
403
|
+
'Radio': 'rgba(246, 107, 109, 0.6)',
|
|
404
|
+
'default': 'rgba(175, 175, 175, 0.6)'
|
|
405
|
+
}
|
|
406
|
+
fig = vis.plot_sankey(df, 'Source', 'Target', 'Value',
|
|
407
|
+
title='Brand Media Effects', color_mapping=color_map)
|
|
4
408
|
|
|
5
|
-
class datavis:
|
|
6
|
-
|
|
7
|
-
def help(self):
|
|
8
409
|
"""
|
|
9
|
-
|
|
10
|
-
""
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
410
|
+
# Extract keyword arguments
|
|
411
|
+
title = kwargs.get("title")
|
|
412
|
+
color_mapping = kwargs.get("color_mapping")
|
|
413
|
+
|
|
414
|
+
# Validate required columns
|
|
415
|
+
required_cols = [source_col, target_col, value_col]
|
|
416
|
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
|
417
|
+
if missing_cols:
|
|
418
|
+
error_msg = f"Missing columns: {missing_cols}"
|
|
419
|
+
raise ValueError(error_msg)
|
|
420
|
+
|
|
421
|
+
# Ensure value column is numeric
|
|
422
|
+
if not pd.api.types.is_numeric_dtype(df[value_col]):
|
|
423
|
+
error_msg = f"Value column '{value_col}' must be numeric"
|
|
424
|
+
raise ValueError(error_msg)
|
|
425
|
+
|
|
426
|
+
# Create working copy and remove any rows with missing values or zero values
|
|
427
|
+
work_df = df[required_cols].dropna()
|
|
428
|
+
work_df = work_df[work_df[value_col] != 0] # Remove zero values
|
|
429
|
+
|
|
430
|
+
if work_df.empty:
|
|
431
|
+
error_msg = "No valid data rows found after removing missing/zero values"
|
|
432
|
+
raise ValueError(error_msg)
|
|
433
|
+
|
|
434
|
+
# Get all unique nodes
|
|
435
|
+
all_sources = set(work_df[source_col].unique())
|
|
436
|
+
all_targets = set(work_df[target_col].unique())
|
|
437
|
+
|
|
438
|
+
# Create layers for proper node positioning
|
|
439
|
+
# Layer 1: sources that don't appear as targets (starting nodes)
|
|
440
|
+
layer_1_nodes = list(all_sources - all_targets)
|
|
441
|
+
# Final layer: targets that don't appear as sources (ending nodes)
|
|
442
|
+
final_layer_nodes = list(all_targets - all_sources)
|
|
443
|
+
# Intermediate: nodes that are both source and target
|
|
444
|
+
intermediate_nodes = list(all_sources & all_targets)
|
|
445
|
+
|
|
446
|
+
# Create ordered node list for proper left-to-right flow
|
|
447
|
+
all_nodes = layer_1_nodes + intermediate_nodes + final_layer_nodes
|
|
448
|
+
node_dict = {node: i for i, node in enumerate(all_nodes)}
|
|
449
|
+
|
|
450
|
+
# Create source, target, and value lists for Sankey
|
|
451
|
+
source_indices = [node_dict[source] for source in work_df[source_col]]
|
|
452
|
+
target_indices = [node_dict[target] for target in work_df[target_col]]
|
|
453
|
+
values = work_df[value_col].tolist()
|
|
454
|
+
|
|
455
|
+
# Apply color mapping if provided
|
|
456
|
+
if color_mapping:
|
|
457
|
+
link_colors = []
|
|
458
|
+
for _, row in work_df.iterrows():
|
|
459
|
+
source_name = row[source_col]
|
|
460
|
+
target_name = row[target_col]
|
|
461
|
+
if source_name in color_mapping:
|
|
462
|
+
link_colors.append(color_mapping[source_name])
|
|
463
|
+
elif target_name in color_mapping:
|
|
464
|
+
link_colors.append(color_mapping[target_name])
|
|
465
|
+
else:
|
|
466
|
+
link_colors.append(
|
|
467
|
+
color_mapping.get("default", "rgba(175, 175, 175, 0.6)")
|
|
468
|
+
)
|
|
469
|
+
else:
|
|
470
|
+
# Use theme-based default colors
|
|
471
|
+
link_colors = (
|
|
472
|
+
"rgba(255,255,255,0.3)"
|
|
473
|
+
if self.current_theme == "dark"
|
|
474
|
+
else "rgba(0,0,0,0.3)"
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
# Create Sankey diagram
|
|
478
|
+
fig = go.Figure(
|
|
479
|
+
data=[
|
|
480
|
+
go.Sankey(
|
|
481
|
+
node={
|
|
482
|
+
"pad": 15,
|
|
483
|
+
"thickness": 20,
|
|
484
|
+
"line": {"color": "black", "width": 0.5},
|
|
485
|
+
"label": all_nodes,
|
|
486
|
+
"color": self.themes[self.current_theme]["line_color"],
|
|
487
|
+
},
|
|
488
|
+
link={
|
|
489
|
+
"source": source_indices,
|
|
490
|
+
"target": target_indices,
|
|
491
|
+
"value": values,
|
|
492
|
+
"color": link_colors,
|
|
493
|
+
},
|
|
494
|
+
)
|
|
495
|
+
]
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
fig.update_layout(
|
|
499
|
+
title_text=title or f"Sankey Diagram - {source_col} to {target_col}",
|
|
500
|
+
font_size=self.themes[self.current_theme]["font_size"],
|
|
501
|
+
font_color=self.themes[self.current_theme]["text_color"],
|
|
502
|
+
paper_bgcolor=self.themes[self.current_theme]["background_color"],
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
return fig
|
|
506
|
+
|
|
26
507
|
def plot_one(self, df1, col1, date_column):
|
|
27
508
|
"""
|
|
28
|
-
Plots specified column from a DataFrame with
|
|
29
|
-
|
|
509
|
+
Plots specified column from a DataFrame with themed styling.
|
|
510
|
+
|
|
511
|
+
Uses a specified date column as the X-axis.
|
|
512
|
+
|
|
513
|
+
Parameters
|
|
514
|
+
----------
|
|
515
|
+
df1 : pandas.DataFrame
|
|
516
|
+
Input DataFrame
|
|
517
|
+
col1 : str
|
|
518
|
+
Column name from the DataFrame to plot
|
|
519
|
+
date_column : str
|
|
520
|
+
The name of the date column to use for the X-axis
|
|
521
|
+
|
|
522
|
+
Returns
|
|
523
|
+
-------
|
|
524
|
+
plotly.graph_objects.Figure
|
|
525
|
+
The line plot figure
|
|
30
526
|
|
|
31
|
-
:param df1: DataFrame
|
|
32
|
-
:param col1: Column name from the DataFrame
|
|
33
|
-
:param date_column: The name of the date column to use for the X-axis
|
|
34
527
|
"""
|
|
35
528
|
# Check if columns exist in the DataFrame
|
|
36
529
|
if col1 not in df1.columns or date_column not in df1.columns:
|
|
37
|
-
|
|
530
|
+
error_msg = "Column not found in DataFrame"
|
|
531
|
+
raise ValueError(error_msg)
|
|
38
532
|
|
|
39
533
|
# Check if the date column is in datetime format, if not convert it
|
|
40
534
|
if not pd.api.types.is_datetime64_any_dtype(df1[date_column]):
|
|
41
535
|
try:
|
|
42
|
-
# Convert with dayfirst=True to interpret dates correctly
|
|
43
536
|
df1[date_column] = pd.to_datetime(df1[date_column], dayfirst=True)
|
|
44
|
-
except
|
|
45
|
-
|
|
537
|
+
except (ValueError, TypeError) as e:
|
|
538
|
+
error_msg = f"Error converting {date_column} to datetime: {e}"
|
|
539
|
+
raise ValueError(error_msg) from e
|
|
46
540
|
|
|
47
541
|
# Plotting using Plotly Express
|
|
48
542
|
fig = px.line(df1, x=date_column, y=col1)
|
|
49
543
|
|
|
50
|
-
#
|
|
51
|
-
fig.
|
|
52
|
-
plot_bgcolor='white',
|
|
53
|
-
xaxis=dict(
|
|
54
|
-
showline=True,
|
|
55
|
-
linecolor='black'
|
|
56
|
-
),
|
|
57
|
-
yaxis=dict(
|
|
58
|
-
showline=True,
|
|
59
|
-
linecolor='black',
|
|
60
|
-
rangemode='tozero' # Setting Y-axis to start at 0 if suitable
|
|
61
|
-
)
|
|
62
|
-
)
|
|
544
|
+
# Apply theme
|
|
545
|
+
fig = self._apply_theme(fig)
|
|
63
546
|
|
|
64
547
|
return fig
|
|
65
548
|
|
|
66
|
-
def plot_two(self,
|
|
549
|
+
def plot_two(self, data_config, *, same_axis=True, title="Comparison Plot"):
|
|
67
550
|
"""
|
|
68
|
-
Plots specified columns from two different DataFrames with
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
551
|
+
Plots specified columns from two different DataFrames with themed styling.
|
|
552
|
+
|
|
553
|
+
Parameters
|
|
554
|
+
----------
|
|
555
|
+
data_config : dict
|
|
556
|
+
Dictionary with keys: 'df1', 'col1', 'df2', 'col2', 'date_column'
|
|
557
|
+
same_axis : bool, default True
|
|
558
|
+
If True, plot both traces on the same y-axis; otherwise, use separate y-axes
|
|
559
|
+
title : str, default "Comparison Plot"
|
|
560
|
+
Custom title for the plot
|
|
561
|
+
|
|
562
|
+
Returns
|
|
563
|
+
-------
|
|
564
|
+
plotly.graph_objects.Figure
|
|
565
|
+
The comparison plot figure
|
|
566
|
+
|
|
78
567
|
"""
|
|
568
|
+
# Extract parameters from config
|
|
569
|
+
df1 = data_config["df1"]
|
|
570
|
+
col1 = data_config["col1"]
|
|
571
|
+
df2 = data_config["df2"]
|
|
572
|
+
col2 = data_config["col2"]
|
|
573
|
+
date_column = data_config["date_column"]
|
|
574
|
+
|
|
79
575
|
# Validate inputs
|
|
80
576
|
if col1 not in df1.columns or date_column not in df1.columns:
|
|
81
|
-
|
|
577
|
+
error_msg = (
|
|
578
|
+
f"Column {col1} or {date_column} not found in the first DataFrame."
|
|
579
|
+
)
|
|
580
|
+
raise ValueError(error_msg)
|
|
82
581
|
if col2 not in df2.columns or date_column not in df2.columns:
|
|
83
|
-
|
|
582
|
+
error_msg = (
|
|
583
|
+
f"Column {col2} or {date_column} not found in the second DataFrame."
|
|
584
|
+
)
|
|
585
|
+
raise ValueError(error_msg)
|
|
84
586
|
|
|
85
587
|
# Ensure date columns are in datetime format
|
|
86
|
-
df1[date_column] = pd.to_datetime(df1[date_column], errors=
|
|
87
|
-
df2[date_column] = pd.to_datetime(df2[date_column], errors=
|
|
588
|
+
df1[date_column] = pd.to_datetime(df1[date_column], errors="coerce")
|
|
589
|
+
df2[date_column] = pd.to_datetime(df2[date_column], errors="coerce")
|
|
88
590
|
|
|
89
591
|
# Drop rows with invalid dates
|
|
90
592
|
df1 = df1.dropna(subset=[date_column])
|
|
91
593
|
df2 = df2.dropna(subset=[date_column])
|
|
92
594
|
|
|
93
|
-
# Create traces
|
|
94
|
-
trace1 = go.Scatter(
|
|
595
|
+
# Create traces
|
|
596
|
+
trace1 = go.Scatter(
|
|
597
|
+
x=df1[date_column],
|
|
598
|
+
y=df1[col1],
|
|
599
|
+
mode="lines",
|
|
600
|
+
name=col1,
|
|
601
|
+
yaxis="y1",
|
|
602
|
+
line={"color": self.themes[self.current_theme]["line_color"]},
|
|
603
|
+
)
|
|
95
604
|
|
|
96
605
|
if same_axis:
|
|
97
|
-
trace2 = go.Scatter(
|
|
606
|
+
trace2 = go.Scatter(
|
|
607
|
+
x=df2[date_column],
|
|
608
|
+
y=df2[col2],
|
|
609
|
+
mode="lines",
|
|
610
|
+
name=col2,
|
|
611
|
+
yaxis="y1",
|
|
612
|
+
)
|
|
98
613
|
else:
|
|
99
|
-
trace2 = go.Scatter(
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
614
|
+
trace2 = go.Scatter(
|
|
615
|
+
x=df2[date_column],
|
|
616
|
+
y=df2[col2],
|
|
617
|
+
mode="lines",
|
|
618
|
+
name=col2,
|
|
619
|
+
yaxis="y2",
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
# Create figure
|
|
623
|
+
fig = go.Figure(data=[trace1, trace2])
|
|
624
|
+
|
|
625
|
+
# Apply theme
|
|
626
|
+
fig = self._apply_theme(fig)
|
|
627
|
+
|
|
628
|
+
# Update layout for dual axis if needed
|
|
629
|
+
if not same_axis:
|
|
630
|
+
fig.update_layout(
|
|
631
|
+
yaxis2={
|
|
632
|
+
"title": f"{col2} (y2)",
|
|
633
|
+
"overlaying": "y",
|
|
634
|
+
"side": "right",
|
|
635
|
+
"showline": True,
|
|
636
|
+
"linecolor": self.themes[self.current_theme]["text_color"],
|
|
637
|
+
"rangemode": "tozero",
|
|
638
|
+
}
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
# Update layout with custom title and legend positioning
|
|
642
|
+
fig.update_layout(
|
|
643
|
+
title=title,
|
|
119
644
|
showlegend=True,
|
|
120
|
-
|
|
645
|
+
legend={
|
|
646
|
+
"orientation": "h",
|
|
647
|
+
"yanchor": "bottom",
|
|
648
|
+
"y": 1.02,
|
|
649
|
+
"xanchor": "center",
|
|
650
|
+
"x": 0.5
|
|
651
|
+
}
|
|
121
652
|
)
|
|
122
653
|
|
|
123
|
-
# Create the figure with the defined layout and traces
|
|
124
|
-
fig = go.Figure(data=[trace1, trace2], layout=layout)
|
|
125
|
-
|
|
126
654
|
return fig
|
|
127
655
|
|
|
128
|
-
def plot_chart(self,
|
|
656
|
+
def plot_chart(self, data_config):
|
|
129
657
|
"""
|
|
130
|
-
Plot various types of charts using Plotly.
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
658
|
+
Plot various types of charts using Plotly with themed styling.
|
|
659
|
+
|
|
660
|
+
Parameters
|
|
661
|
+
----------
|
|
662
|
+
data_config : dict
|
|
663
|
+
Configuration dictionary with keys:
|
|
664
|
+
- df: DataFrame containing the data
|
|
665
|
+
- date_col: The name of the column with date information
|
|
666
|
+
- value_cols: List of columns to plot
|
|
667
|
+
- chart_type: Type of chart ('line', 'bar', 'scatter', etc.)
|
|
668
|
+
- title: Title of the chart
|
|
669
|
+
- x_title: Title of the x-axis
|
|
670
|
+
- y_title: Title of the y-axis
|
|
671
|
+
- kwargs: Additional keyword arguments
|
|
672
|
+
|
|
673
|
+
Returns
|
|
674
|
+
-------
|
|
675
|
+
plotly.graph_objects.Figure
|
|
676
|
+
The chart figure
|
|
677
|
+
|
|
144
678
|
"""
|
|
145
|
-
|
|
146
|
-
|
|
679
|
+
# Extract parameters with defaults
|
|
680
|
+
dataframe = data_config["df"]
|
|
681
|
+
date_col = data_config["date_col"]
|
|
682
|
+
value_cols = data_config["value_cols"]
|
|
683
|
+
chart_type = data_config.get("chart_type", "line")
|
|
684
|
+
title = data_config.get("title", "Chart")
|
|
685
|
+
x_title = data_config.get("x_title", "Date")
|
|
686
|
+
y_title = data_config.get("y_title", "Values")
|
|
687
|
+
kwargs = data_config.get("kwargs", {})
|
|
147
688
|
|
|
148
689
|
# Ensure the date column is in datetime format
|
|
149
|
-
|
|
690
|
+
dataframe[date_col] = pd.to_datetime(dataframe[date_col])
|
|
150
691
|
|
|
151
692
|
# Validate input columns
|
|
152
|
-
value_cols = [
|
|
693
|
+
value_cols = [
|
|
694
|
+
col for col in value_cols if col in dataframe.columns and col != date_col
|
|
695
|
+
]
|
|
153
696
|
if not value_cols:
|
|
154
|
-
|
|
697
|
+
error_msg = "No valid columns provided for plotting."
|
|
698
|
+
raise ValueError(error_msg)
|
|
155
699
|
|
|
156
700
|
# Initialize the figure
|
|
157
701
|
fig = go.Figure()
|
|
158
702
|
|
|
159
703
|
# Define a mapping for chart types to corresponding Plotly trace types
|
|
160
704
|
chart_trace_map = {
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
705
|
+
"line": lambda col: go.Scatter(
|
|
706
|
+
x=dataframe[date_col],
|
|
707
|
+
y=dataframe[col],
|
|
708
|
+
mode="lines",
|
|
709
|
+
name=col,
|
|
710
|
+
**kwargs,
|
|
711
|
+
),
|
|
712
|
+
"bar": lambda col: go.Bar(
|
|
713
|
+
x=dataframe[date_col], y=dataframe[col], name=col, **kwargs
|
|
714
|
+
),
|
|
715
|
+
"scatter": lambda col: go.Scatter(
|
|
716
|
+
x=dataframe[date_col],
|
|
717
|
+
y=dataframe[col],
|
|
718
|
+
mode="markers",
|
|
719
|
+
name=col,
|
|
720
|
+
**kwargs,
|
|
721
|
+
),
|
|
722
|
+
"area": lambda col: go.Scatter(
|
|
723
|
+
x=dataframe[date_col],
|
|
724
|
+
y=dataframe[col],
|
|
725
|
+
mode="lines",
|
|
726
|
+
fill="tozeroy",
|
|
727
|
+
name=col,
|
|
728
|
+
**kwargs,
|
|
170
729
|
),
|
|
171
|
-
'funnel': lambda col: go.Funnel(y=df[date_col], x=df[col], **kwargs),
|
|
172
|
-
'waterfall': lambda col: go.Waterfall(x=df[date_col], y=df[col], measure=df[value_cols[1]], **kwargs),
|
|
173
|
-
'scatter3d': lambda _: go.Scatter3d(
|
|
174
|
-
x=df[value_cols[0]], y=df[value_cols[1]], z=df[value_cols[2]],
|
|
175
|
-
mode='markers', **kwargs
|
|
176
|
-
)
|
|
177
730
|
}
|
|
178
731
|
|
|
179
732
|
# Generate traces for the selected chart type
|
|
@@ -182,15 +735,18 @@ class datavis:
|
|
|
182
735
|
trace = chart_trace_map[chart_type](col)
|
|
183
736
|
fig.add_trace(trace)
|
|
184
737
|
else:
|
|
185
|
-
|
|
738
|
+
error_msg = f"Unsupported chart type: {chart_type}"
|
|
739
|
+
raise ValueError(error_msg)
|
|
740
|
+
|
|
741
|
+
# Apply theme
|
|
742
|
+
fig = self._apply_theme(fig)
|
|
186
743
|
|
|
187
|
-
# Update the layout
|
|
744
|
+
# Update the layout
|
|
188
745
|
fig.update_layout(
|
|
189
746
|
title=title,
|
|
190
747
|
xaxis_title=x_title,
|
|
191
748
|
yaxis_title=y_title,
|
|
192
|
-
legend_title=
|
|
193
|
-
template='plotly_dark'
|
|
749
|
+
legend_title="Series",
|
|
194
750
|
)
|
|
195
751
|
|
|
196
|
-
return fig
|
|
752
|
+
return fig
|