kollabor 0.4.9__py3-none-any.whl → 0.4.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +2 -0
- agents/coder/__init__.py +0 -0
- agents/coder/agent.json +4 -0
- agents/coder/api-integration.md +2150 -0
- agents/coder/cli-pretty.md +765 -0
- agents/coder/code-review.md +1092 -0
- agents/coder/database-design.md +1525 -0
- agents/coder/debugging.md +1102 -0
- agents/coder/dependency-management.md +1397 -0
- agents/coder/git-workflow.md +1099 -0
- agents/coder/refactoring.md +1454 -0
- agents/coder/security-hardening.md +1732 -0
- agents/coder/system_prompt.md +1448 -0
- agents/coder/tdd.md +1367 -0
- agents/creative-writer/__init__.py +0 -0
- agents/creative-writer/agent.json +4 -0
- agents/creative-writer/character-development.md +1852 -0
- agents/creative-writer/dialogue-craft.md +1122 -0
- agents/creative-writer/plot-structure.md +1073 -0
- agents/creative-writer/revision-editing.md +1484 -0
- agents/creative-writer/system_prompt.md +690 -0
- agents/creative-writer/worldbuilding.md +2049 -0
- agents/data-analyst/__init__.py +30 -0
- agents/data-analyst/agent.json +4 -0
- agents/data-analyst/data-visualization.md +992 -0
- agents/data-analyst/exploratory-data-analysis.md +1110 -0
- agents/data-analyst/pandas-data-manipulation.md +1081 -0
- agents/data-analyst/sql-query-optimization.md +881 -0
- agents/data-analyst/statistical-analysis.md +1118 -0
- agents/data-analyst/system_prompt.md +928 -0
- agents/default/__init__.py +0 -0
- agents/default/agent.json +4 -0
- agents/default/dead-code.md +794 -0
- agents/default/explore-agent-system.md +585 -0
- agents/default/system_prompt.md +1448 -0
- agents/kollabor/__init__.py +0 -0
- agents/kollabor/analyze-plugin-lifecycle.md +175 -0
- agents/kollabor/analyze-terminal-rendering.md +388 -0
- agents/kollabor/code-review.md +1092 -0
- agents/kollabor/debug-mcp-integration.md +521 -0
- agents/kollabor/debug-plugin-hooks.md +547 -0
- agents/kollabor/debugging.md +1102 -0
- agents/kollabor/dependency-management.md +1397 -0
- agents/kollabor/git-workflow.md +1099 -0
- agents/kollabor/inspect-llm-conversation.md +148 -0
- agents/kollabor/monitor-event-bus.md +558 -0
- agents/kollabor/profile-performance.md +576 -0
- agents/kollabor/refactoring.md +1454 -0
- agents/kollabor/system_prompt copy.md +1448 -0
- agents/kollabor/system_prompt.md +757 -0
- agents/kollabor/trace-command-execution.md +178 -0
- agents/kollabor/validate-config.md +879 -0
- agents/research/__init__.py +0 -0
- agents/research/agent.json +4 -0
- agents/research/architecture-mapping.md +1099 -0
- agents/research/codebase-analysis.md +1077 -0
- agents/research/dependency-audit.md +1027 -0
- agents/research/performance-profiling.md +1047 -0
- agents/research/security-review.md +1359 -0
- agents/research/system_prompt.md +492 -0
- agents/technical-writer/__init__.py +0 -0
- agents/technical-writer/agent.json +4 -0
- agents/technical-writer/api-documentation.md +2328 -0
- agents/technical-writer/changelog-management.md +1181 -0
- agents/technical-writer/readme-writing.md +1360 -0
- agents/technical-writer/style-guide.md +1410 -0
- agents/technical-writer/system_prompt.md +653 -0
- agents/technical-writer/tutorial-creation.md +1448 -0
- core/__init__.py +0 -2
- core/application.py +343 -88
- core/cli.py +229 -10
- core/commands/menu_renderer.py +463 -59
- core/commands/registry.py +14 -9
- core/commands/system_commands.py +2461 -14
- core/config/loader.py +151 -37
- core/config/service.py +18 -6
- core/events/bus.py +29 -9
- core/events/executor.py +205 -75
- core/events/models.py +27 -8
- core/fullscreen/command_integration.py +20 -24
- core/fullscreen/components/__init__.py +10 -1
- core/fullscreen/components/matrix_components.py +1 -2
- core/fullscreen/components/space_shooter_components.py +654 -0
- core/fullscreen/plugin.py +5 -0
- core/fullscreen/renderer.py +52 -13
- core/fullscreen/session.py +52 -15
- core/io/__init__.py +29 -5
- core/io/buffer_manager.py +6 -1
- core/io/config_status_view.py +7 -29
- core/io/core_status_views.py +267 -347
- core/io/input/__init__.py +25 -0
- core/io/input/command_mode_handler.py +711 -0
- core/io/input/display_controller.py +128 -0
- core/io/input/hook_registrar.py +286 -0
- core/io/input/input_loop_manager.py +421 -0
- core/io/input/key_press_handler.py +502 -0
- core/io/input/modal_controller.py +1011 -0
- core/io/input/paste_processor.py +339 -0
- core/io/input/status_modal_renderer.py +184 -0
- core/io/input_errors.py +5 -1
- core/io/input_handler.py +211 -2452
- core/io/key_parser.py +7 -0
- core/io/layout.py +15 -3
- core/io/message_coordinator.py +111 -2
- core/io/message_renderer.py +129 -4
- core/io/status_renderer.py +147 -607
- core/io/terminal_renderer.py +97 -51
- core/io/terminal_state.py +21 -4
- core/io/visual_effects.py +816 -165
- core/llm/agent_manager.py +1063 -0
- core/llm/api_adapters/__init__.py +44 -0
- core/llm/api_adapters/anthropic_adapter.py +432 -0
- core/llm/api_adapters/base.py +241 -0
- core/llm/api_adapters/openai_adapter.py +326 -0
- core/llm/api_communication_service.py +167 -113
- core/llm/conversation_logger.py +322 -16
- core/llm/conversation_manager.py +556 -30
- core/llm/file_operations_executor.py +84 -32
- core/llm/llm_service.py +934 -103
- core/llm/mcp_integration.py +541 -57
- core/llm/message_display_service.py +135 -18
- core/llm/plugin_sdk.py +1 -2
- core/llm/profile_manager.py +1183 -0
- core/llm/response_parser.py +274 -56
- core/llm/response_processor.py +16 -3
- core/llm/tool_executor.py +6 -1
- core/logging/__init__.py +2 -0
- core/logging/setup.py +34 -6
- core/models/resume.py +54 -0
- core/plugins/__init__.py +4 -2
- core/plugins/base.py +127 -0
- core/plugins/collector.py +23 -161
- core/plugins/discovery.py +37 -3
- core/plugins/factory.py +6 -12
- core/plugins/registry.py +5 -17
- core/ui/config_widgets.py +128 -28
- core/ui/live_modal_renderer.py +2 -1
- core/ui/modal_actions.py +5 -0
- core/ui/modal_overlay_renderer.py +0 -60
- core/ui/modal_renderer.py +268 -7
- core/ui/modal_state_manager.py +29 -4
- core/ui/widgets/base_widget.py +7 -0
- core/updates/__init__.py +10 -0
- core/updates/version_check_service.py +348 -0
- core/updates/version_comparator.py +103 -0
- core/utils/config_utils.py +685 -526
- core/utils/plugin_utils.py +1 -1
- core/utils/session_naming.py +111 -0
- fonts/LICENSE +21 -0
- fonts/README.md +46 -0
- fonts/SymbolsNerdFont-Regular.ttf +0 -0
- fonts/SymbolsNerdFontMono-Regular.ttf +0 -0
- fonts/__init__.py +44 -0
- {kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/METADATA +54 -4
- kollabor-0.4.15.dist-info/RECORD +228 -0
- {kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/top_level.txt +2 -0
- plugins/agent_orchestrator/__init__.py +39 -0
- plugins/agent_orchestrator/activity_monitor.py +181 -0
- plugins/agent_orchestrator/file_attacher.py +77 -0
- plugins/agent_orchestrator/message_injector.py +135 -0
- plugins/agent_orchestrator/models.py +48 -0
- plugins/agent_orchestrator/orchestrator.py +403 -0
- plugins/agent_orchestrator/plugin.py +976 -0
- plugins/agent_orchestrator/xml_parser.py +191 -0
- plugins/agent_orchestrator_plugin.py +9 -0
- plugins/enhanced_input/box_styles.py +1 -0
- plugins/enhanced_input/color_engine.py +19 -4
- plugins/enhanced_input/config.py +2 -2
- plugins/enhanced_input_plugin.py +61 -11
- plugins/fullscreen/__init__.py +6 -2
- plugins/fullscreen/example_plugin.py +1035 -222
- plugins/fullscreen/setup_wizard_plugin.py +592 -0
- plugins/fullscreen/space_shooter_plugin.py +131 -0
- plugins/hook_monitoring_plugin.py +436 -78
- plugins/query_enhancer_plugin.py +66 -30
- plugins/resume_conversation_plugin.py +1494 -0
- plugins/save_conversation_plugin.py +98 -32
- plugins/system_commands_plugin.py +70 -56
- plugins/tmux_plugin.py +154 -78
- plugins/workflow_enforcement_plugin.py +94 -92
- system_prompt/default.md +952 -886
- core/io/input_mode_manager.py +0 -402
- core/io/modal_interaction_handler.py +0 -315
- core/io/raw_input_processor.py +0 -946
- core/storage/__init__.py +0 -5
- core/storage/state_manager.py +0 -84
- core/ui/widget_integration.py +0 -222
- core/utils/key_reader.py +0 -171
- kollabor-0.4.9.dist-info/RECORD +0 -128
- {kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/WHEEL +0 -0
- {kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/entry_points.txt +0 -0
- {kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,992 @@
|
|
|
1
|
+
<!-- Data Visualization skill - create compelling charts and graphs with matplotlib/seaborn -->
|
|
2
|
+
|
|
3
|
+
data visualization mode: VISUAL STORYTELLING
|
|
4
|
+
|
|
5
|
+
when this skill is active, you create charts that communicate insights clearly
|
|
6
|
+
and effectively. this is a comprehensive guide to data visualization best
|
|
7
|
+
practices and techniques.
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
PHASE 0: VISUALIZATION ENVIRONMENT VERIFICATION
|
|
11
|
+
|
|
12
|
+
before creating ANY visualizations, verify your tools are ready.
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
check matplotlib availability
|
|
16
|
+
|
|
17
|
+
<terminal>python -c "import matplotlib; print('matplotlib', matplotlib.__version__)"</terminal>
|
|
18
|
+
|
|
19
|
+
if matplotlib not available:
|
|
20
|
+
<terminal>pip install matplotlib</terminal>
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
check seaborn availability (recommended)
|
|
24
|
+
|
|
25
|
+
<terminal>python -c "import seaborn; print('seaborn', seaborn.__version__)" 2>/dev/null || echo "seaborn not installed"</terminal>
|
|
26
|
+
|
|
27
|
+
if seaborn not installed (highly recommended):
|
|
28
|
+
<terminal>pip install seaborn</terminal>
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
check matplotlib backends
|
|
32
|
+
|
|
33
|
+
<terminal>python -c "import matplotlib.pyplot as plt; print('backend:', plt.get_backend())"</terminal>
|
|
34
|
+
|
|
35
|
+
check available backends:
|
|
36
|
+
<terminal>python -c "import matplotlib; print('available:', matplotlib.rcsetup.all_backends)"</terminal>
|
|
37
|
+
|
|
38
|
+
for jupyter notebooks:
|
|
39
|
+
<terminal>python -c "import matplotlib.pyplot as plt; plt.ion(); print('interactive mode enabled')"</terminal>
|
|
40
|
+
|
|
41
|
+
for static images (script output):
|
|
42
|
+
<terminal>python -c "import matplotlib; matplotlib.use('Agg'); print('Agg backend configured')"</terminal>
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
check figure display environment
|
|
46
|
+
|
|
47
|
+
<terminal>echo $DISPLAY</terminal>
|
|
48
|
+
|
|
49
|
+
<terminal>python -c "import os; print('JUPYTER_NOTEBOOK:', 'notebook' in os.environ.get('IPythonKernel', ''))"</terminal>
|
|
50
|
+
|
|
51
|
+
<terminal>python -c "import matplotlib.pyplot as plt; plt.figure(); print('figure creation works')"</terminal>
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
verify output directory
|
|
55
|
+
|
|
56
|
+
<terminal>ls -la plots/ 2>/dev/null || mkdir -p plots && echo "created plots/ directory"</terminal>
|
|
57
|
+
|
|
58
|
+
<terminal>ls -la figures/ 2>/dev/null || mkdir -p figures && echo "created figures/ directory"</terminal>
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
check common data libraries
|
|
62
|
+
|
|
63
|
+
<terminal>python -c "import pandas; print('pandas', pandas.__version__)" 2>/dev/null || echo "pandas not installed"</terminal>
|
|
64
|
+
<terminal>python -c "import numpy; print('numpy', numpy.__version__)" 2>/dev/null || echo "numpy not installed"</terminal>
|
|
65
|
+
|
|
66
|
+
if missing:
|
|
67
|
+
<terminal>pip install pandas numpy</terminal>
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
PHASE 1: VISUALIZATION FUNDAMENTALS
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
understand your audience and purpose
|
|
74
|
+
|
|
75
|
+
before creating any chart, answer these questions:
|
|
76
|
+
|
|
77
|
+
who will see this visualization?
|
|
78
|
+
- technical audience (developers, data scientists)
|
|
79
|
+
- business stakeholders (managers, executives)
|
|
80
|
+
- general audience (customers, public)
|
|
81
|
+
- mixed audience
|
|
82
|
+
|
|
83
|
+
what is the purpose?
|
|
84
|
+
- exploration (discovering patterns)
|
|
85
|
+
- explanation (communicating findings)
|
|
86
|
+
- persuasion (convincing action)
|
|
87
|
+
- monitoring (tracking metrics)
|
|
88
|
+
|
|
89
|
+
what action should the viewer take?
|
|
90
|
+
- make a decision
|
|
91
|
+
- understand a trend
|
|
92
|
+
- compare options
|
|
93
|
+
- spot anomalies
|
|
94
|
+
|
|
95
|
+
what data complexity can they handle?
|
|
96
|
+
- simple metrics and comparisons
|
|
97
|
+
- multivariate relationships
|
|
98
|
+
- statistical distributions
|
|
99
|
+
- time series with seasonality
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
choose the right chart type
|
|
103
|
+
|
|
104
|
+
based on your analysis goal:
|
|
105
|
+
|
|
106
|
+
comparing values:
|
|
107
|
+
[ok] bar chart - compare categories
|
|
108
|
+
[ok] column chart - compare categories (vertical)
|
|
109
|
+
[ok] grouped bars - compare multiple series
|
|
110
|
+
[ok] stacked bars - show part-to-whole
|
|
111
|
+
|
|
112
|
+
showing distribution:
|
|
113
|
+
[ok] histogram - frequency distribution
|
|
114
|
+
[ok] density plot - smooth distribution
|
|
115
|
+
[ok] box plot - quartiles and outliers
|
|
116
|
+
[ok] violin plot - distribution + box plot
|
|
117
|
+
[ok] ridgeline plot - multiple distributions
|
|
118
|
+
|
|
119
|
+
showing relationships:
|
|
120
|
+
[ok] scatter plot - two variables
|
|
121
|
+
[ok] line chart - trends over time
|
|
122
|
+
[ok] bubble chart - three variables
|
|
123
|
+
[ok] heat map - correlation matrix
|
|
124
|
+
|
|
125
|
+
showing composition:
|
|
126
|
+
[ok] pie chart - part-to-whole (avoid if >5 categories)
|
|
127
|
+
[ok] stacked bar - composition over time
|
|
128
|
+
[ok] area chart - composition over time
|
|
129
|
+
[ok] treemap - hierarchical composition
|
|
130
|
+
|
|
131
|
+
showing geospatial:
|
|
132
|
+
[ok] choropleth map - values by region
|
|
133
|
+
[ok] bubble map - locations with magnitude
|
|
134
|
+
[ok] flow map - movement/connections
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
chart selection decision tree
|
|
138
|
+
|
|
139
|
+
is it time series data?
|
|
140
|
+
yes -> line chart (if few series) or small multiples (if many)
|
|
141
|
+
no -> continue
|
|
142
|
+
|
|
143
|
+
are you comparing categories?
|
|
144
|
+
yes -> bar chart (if horizontal labels long) or column chart
|
|
145
|
+
no -> continue
|
|
146
|
+
|
|
147
|
+
are you showing distribution?
|
|
148
|
+
yes -> histogram (simple) or box plot (with outliers)
|
|
149
|
+
no -> continue
|
|
150
|
+
|
|
151
|
+
are you showing correlation?
|
|
152
|
+
yes -> scatter plot (2 variables) or heat map (many variables)
|
|
153
|
+
no -> continue
|
|
154
|
+
|
|
155
|
+
are you showing part-to-whole?
|
|
156
|
+
yes -> pie chart (if <=5 categories) or stacked bar (if time series)
|
|
157
|
+
no -> reconsider data and goals
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
PHASE 2: PLOT SETUP AND CONFIGURATION
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
basic plot template
|
|
164
|
+
|
|
165
|
+
import matplotlib.pyplot as plt
|
|
166
|
+
import seaborn as sns
|
|
167
|
+
import pandas as pd
|
|
168
|
+
import numpy as np
|
|
169
|
+
|
|
170
|
+
# set style
|
|
171
|
+
sns.set_style("whitegrid")
|
|
172
|
+
plt.figure(figsize=(12, 6))
|
|
173
|
+
|
|
174
|
+
# create your plot
|
|
175
|
+
# ... plotting code ...
|
|
176
|
+
|
|
177
|
+
# add labels and title
|
|
178
|
+
plt.xlabel("X-axis label")
|
|
179
|
+
plt.ylabel("Y-axis label")
|
|
180
|
+
plt.title("Descriptive Title Here")
|
|
181
|
+
|
|
182
|
+
# add grid
|
|
183
|
+
plt.grid(True, alpha=0.3)
|
|
184
|
+
|
|
185
|
+
# adjust layout
|
|
186
|
+
plt.tight_layout()
|
|
187
|
+
|
|
188
|
+
# save or show
|
|
189
|
+
plt.savefig("plots/output.png", dpi=300, bbox_inches="tight")
|
|
190
|
+
# plt.show()
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
configure matplotlib defaults
|
|
194
|
+
|
|
195
|
+
import matplotlib.pyplot as plt
|
|
196
|
+
|
|
197
|
+
# set global style
|
|
198
|
+
plt.style.use('seaborn-v0_8-whitegrid')
|
|
199
|
+
|
|
200
|
+
# configure defaults
|
|
201
|
+
plt.rcParams.update({
|
|
202
|
+
'figure.figsize': (12, 6),
|
|
203
|
+
'font.size': 11,
|
|
204
|
+
'axes.labelsize': 12,
|
|
205
|
+
'axes.titlesize': 14,
|
|
206
|
+
'xtick.labelsize': 10,
|
|
207
|
+
'ytick.labelsize': 10,
|
|
208
|
+
'legend.fontsize': 10,
|
|
209
|
+
'figure.dpi': 100,
|
|
210
|
+
'savefig.dpi': 300,
|
|
211
|
+
'axes.grid': True,
|
|
212
|
+
'grid.alpha': 0.3,
|
|
213
|
+
})
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
seaborn style options
|
|
217
|
+
|
|
218
|
+
import seaborn as sns
|
|
219
|
+
|
|
220
|
+
# available styles
|
|
221
|
+
# 'darkgrid', 'whitegrid', 'dark', 'white', 'ticks'
|
|
222
|
+
|
|
223
|
+
# set style
|
|
224
|
+
sns.set_style("whitegrid")
|
|
225
|
+
|
|
226
|
+
# set color palette
|
|
227
|
+
sns.set_palette("husl")
|
|
228
|
+
|
|
229
|
+
# available palettes:
|
|
230
|
+
# 'deep', 'muted', 'pastel', 'bright', 'dark', 'colorblind'
|
|
231
|
+
# 'husl', 'hls', 'Set1', 'Set2', 'Set3'
|
|
232
|
+
# 'Blues', 'Reds', 'Greens', etc. (sequential)
|
|
233
|
+
# 'RdBu', 'RdYlBu', etc. (diverging)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
color palette best practices
|
|
237
|
+
|
|
238
|
+
categorical data (5-7 categories):
|
|
239
|
+
sns.set_palette("husl", n_colors=len(categories))
|
|
240
|
+
|
|
241
|
+
sequential data (low to high):
|
|
242
|
+
sns.set_palette("Blues")
|
|
243
|
+
|
|
244
|
+
diverging data (neutral + extremes):
|
|
245
|
+
sns.set_palette("RdBu_r")
|
|
246
|
+
|
|
247
|
+
accessible colors:
|
|
248
|
+
sns.set_palette("colorblind")
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
PHASE 3: BASIC CHART TYPES
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
bar charts (categorical comparisons)
|
|
255
|
+
|
|
256
|
+
import matplotlib.pyplot as plt
|
|
257
|
+
import seaborn as sns
|
|
258
|
+
import pandas as pd
|
|
259
|
+
|
|
260
|
+
# simple bar chart
|
|
261
|
+
plt.figure(figsize=(10, 6))
|
|
262
|
+
sns.barplot(data=df, x='category', y='value')
|
|
263
|
+
plt.title("Sales by Category")
|
|
264
|
+
plt.xlabel("Category")
|
|
265
|
+
plt.ylabel("Sales ($)")
|
|
266
|
+
plt.xticks(rotation=45)
|
|
267
|
+
plt.tight_layout()
|
|
268
|
+
plt.savefig("plots/bar_chart.png", dpi=300)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
# horizontal bar chart (better for long labels)
|
|
272
|
+
plt.figure(figsize=(10, 6))
|
|
273
|
+
sns.barplot(data=df, y='category', x='value')
|
|
274
|
+
plt.title("Sales by Category")
|
|
275
|
+
plt.xlabel("Sales ($)")
|
|
276
|
+
plt.ylabel("Category")
|
|
277
|
+
plt.tight_layout()
|
|
278
|
+
plt.savefig("plots/horizontal_bar_chart.png", dpi=300)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
# grouped bar chart (multiple series)
|
|
282
|
+
plt.figure(figsize=(12, 6))
|
|
283
|
+
sns.barplot(data=df, x='category', y='value', hue='year')
|
|
284
|
+
plt.title("Sales by Category and Year")
|
|
285
|
+
plt.xlabel("Category")
|
|
286
|
+
plt.ylabel("Sales ($)")
|
|
287
|
+
plt.legend(title="Year")
|
|
288
|
+
plt.tight_layout()
|
|
289
|
+
plt.savefig("plots/grouped_bar_chart.png", dpi=300)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
histograms (distributions)
|
|
293
|
+
|
|
294
|
+
import matplotlib.pyplot as plt
|
|
295
|
+
import seaborn as sns
|
|
296
|
+
|
|
297
|
+
# basic histogram
|
|
298
|
+
plt.figure(figsize=(10, 6))
|
|
299
|
+
sns.histplot(data=df, x='value', bins=30)
|
|
300
|
+
plt.title("Distribution of Values")
|
|
301
|
+
plt.xlabel("Value")
|
|
302
|
+
plt.ylabel("Frequency")
|
|
303
|
+
plt.tight_layout()
|
|
304
|
+
plt.savefig("plots/histogram.png", dpi=300)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
# histogram with kde curve
|
|
308
|
+
plt.figure(figsize=(10, 6))
|
|
309
|
+
sns.histplot(data=df, x='value', bins=30, kde=True)
|
|
310
|
+
plt.title("Distribution of Values with KDE")
|
|
311
|
+
plt.xlabel("Value")
|
|
312
|
+
plt.ylabel("Frequency")
|
|
313
|
+
plt.tight_layout()
|
|
314
|
+
plt.savefig("plots/histogram_kde.png", dpi=300)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
# multiple histograms
|
|
318
|
+
plt.figure(figsize=(10, 6))
|
|
319
|
+
sns.histplot(data=df, x='value', hue='category', bins=30, alpha=0.5)
|
|
320
|
+
plt.title("Distribution by Category")
|
|
321
|
+
plt.xlabel("Value")
|
|
322
|
+
plt.ylabel("Frequency")
|
|
323
|
+
plt.tight_layout()
|
|
324
|
+
plt.savefig("plots/histogram_multiple.png", dpi=300)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
box plots (distributions with outliers)
|
|
328
|
+
|
|
329
|
+
import matplotlib.pyplot as plt
|
|
330
|
+
import seaborn as sns
|
|
331
|
+
|
|
332
|
+
# single box plot
|
|
333
|
+
plt.figure(figsize=(10, 6))
|
|
334
|
+
sns.boxplot(data=df, y='value')
|
|
335
|
+
plt.title("Distribution of Values")
|
|
336
|
+
plt.ylabel("Value")
|
|
337
|
+
plt.tight_layout()
|
|
338
|
+
plt.savefig("plots/boxplot.png", dpi=300)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
# box plot by category
|
|
342
|
+
plt.figure(figsize=(12, 6))
|
|
343
|
+
sns.boxplot(data=df, x='category', y='value')
|
|
344
|
+
plt.title("Distribution by Category")
|
|
345
|
+
plt.xlabel("Category")
|
|
346
|
+
plt.ylabel("Value")
|
|
347
|
+
plt.xticks(rotation=45)
|
|
348
|
+
plt.tight_layout()
|
|
349
|
+
plt.savefig("plots/boxplot_category.png", dpi=300)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
# box plot with outliers highlighted
|
|
353
|
+
plt.figure(figsize=(12, 6))
|
|
354
|
+
sns.boxplot(data=df, x='category', y='value', showfliers=True)
|
|
355
|
+
plt.title("Distribution by Category with Outliers")
|
|
356
|
+
plt.xlabel("Category")
|
|
357
|
+
plt.ylabel("Value")
|
|
358
|
+
plt.xticks(rotation=45)
|
|
359
|
+
plt.tight_layout()
|
|
360
|
+
plt.savefig("plots/boxplot_outliers.png", dpi=300)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
scatter plots (relationships)
|
|
364
|
+
|
|
365
|
+
import matplotlib.pyplot as plt
|
|
366
|
+
import seaborn as sns
|
|
367
|
+
|
|
368
|
+
# basic scatter plot
|
|
369
|
+
plt.figure(figsize=(10, 6))
|
|
370
|
+
sns.scatterplot(data=df, x='variable_x', y='variable_y')
|
|
371
|
+
plt.title("Relationship between X and Y")
|
|
372
|
+
plt.xlabel("Variable X")
|
|
373
|
+
plt.ylabel("Variable Y")
|
|
374
|
+
plt.tight_layout()
|
|
375
|
+
plt.savefig("plots/scatter.png", dpi=300)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
# scatter with hue (color by category)
|
|
379
|
+
plt.figure(figsize=(10, 6))
|
|
380
|
+
sns.scatterplot(data=df, x='variable_x', y='variable_y', hue='category')
|
|
381
|
+
plt.title("Relationship by Category")
|
|
382
|
+
plt.xlabel("Variable X")
|
|
383
|
+
plt.ylabel("Variable Y")
|
|
384
|
+
plt.legend(title="Category")
|
|
385
|
+
plt.tight_layout()
|
|
386
|
+
plt.savefig("plots/scatter_hue.png", dpi=300)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
# scatter with size (third variable)
|
|
390
|
+
plt.figure(figsize=(10, 6))
|
|
391
|
+
sns.scatterplot(data=df, x='variable_x', y='variable_y',
|
|
392
|
+
hue='category', size='variable_z')
|
|
393
|
+
plt.title("Multi-variable Relationship")
|
|
394
|
+
plt.xlabel("Variable X")
|
|
395
|
+
plt.ylabel("Variable Y")
|
|
396
|
+
plt.legend(title="Category")
|
|
397
|
+
plt.tight_layout()
|
|
398
|
+
plt.savefig("plots/scatter_size.png", dpi=300)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
line charts (time series)
|
|
402
|
+
|
|
403
|
+
import matplotlib.pyplot as plt
|
|
404
|
+
import seaborn as sns
|
|
405
|
+
|
|
406
|
+
# single line chart
|
|
407
|
+
plt.figure(figsize=(14, 6))
|
|
408
|
+
sns.lineplot(data=df, x='date', y='value')
|
|
409
|
+
plt.title("Value Over Time")
|
|
410
|
+
plt.xlabel("Date")
|
|
411
|
+
plt.ylabel("Value")
|
|
412
|
+
plt.tight_layout()
|
|
413
|
+
plt.savefig("plots/line_chart.png", dpi=300)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
# multiple lines
|
|
417
|
+
plt.figure(figsize=(14, 6))
|
|
418
|
+
sns.lineplot(data=df, x='date', y='value', hue='category')
|
|
419
|
+
plt.title("Values Over Time by Category")
|
|
420
|
+
plt.xlabel("Date")
|
|
421
|
+
plt.ylabel("Value")
|
|
422
|
+
plt.legend(title="Category")
|
|
423
|
+
plt.tight_layout()
|
|
424
|
+
plt.savefig("plots/line_chart_multiple.png", dpi=300)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
# line chart with confidence interval
|
|
428
|
+
plt.figure(figsize=(14, 6))
|
|
429
|
+
sns.lineplot(data=df, x='date', y='value', ci=95)
|
|
430
|
+
plt.title("Value Over Time with 95% CI")
|
|
431
|
+
plt.xlabel("Date")
|
|
432
|
+
plt.ylabel("Value")
|
|
433
|
+
plt.tight_layout()
|
|
434
|
+
plt.savefig("plots/line_chart_ci.png", dpi=300)
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
PHASE 4: ADVANCED VISUALIZATION TECHNIQUES
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
small multiples (facet grids)
|
|
441
|
+
|
|
442
|
+
import matplotlib.pyplot as plt
|
|
443
|
+
import seaborn as sns
|
|
444
|
+
|
|
445
|
+
# facet grid by category
|
|
446
|
+
g = sns.FacetGrid(df, col='category', col_wrap=3,
|
|
447
|
+
height=4, aspect=1.2)
|
|
448
|
+
g.map(sns.histplot, 'value', bins=20)
|
|
449
|
+
g.fig.suptitle("Distribution by Category", y=1.02)
|
|
450
|
+
plt.tight_layout()
|
|
451
|
+
plt.savefig("plots/facet_grid.png", dpi=300)
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
# facet grid with multiple variables
|
|
455
|
+
g = sns.FacetGrid(df, row='category1', col='category2',
|
|
456
|
+
height=4, aspect=1.2)
|
|
457
|
+
g.map(sns.scatterplot, 'x', 'y')
|
|
458
|
+
g.fig.suptitle("Scatter Plots by Category", y=1.02)
|
|
459
|
+
plt.tight_layout()
|
|
460
|
+
plt.savefig("plots/facet_grid_2d.png", dpi=300)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
# pair plot (all pairwise relationships)
|
|
464
|
+
sns.pairplot(df, hue='category', diag_kind='hist')
|
|
465
|
+
plt.suptitle("Pairwise Relationships", y=1.02)
|
|
466
|
+
plt.tight_layout()
|
|
467
|
+
plt.savefig("plots/pair_plot.png", dpi=300)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
heat maps (correlation matrices)
|
|
471
|
+
|
|
472
|
+
import matplotlib.pyplot as plt
|
|
473
|
+
import seaborn as sns
|
|
474
|
+
import pandas as pd
|
|
475
|
+
|
|
476
|
+
# correlation heatmap
|
|
477
|
+
plt.figure(figsize=(10, 8))
|
|
478
|
+
correlation_matrix = df.corr()
|
|
479
|
+
sns.heatmap(correlation_matrix, annot=True, fmt='.2f',
|
|
480
|
+
cmap='coolwarm', center=0,
|
|
481
|
+
square=True, linewidths=0.5)
|
|
482
|
+
plt.title("Correlation Matrix")
|
|
483
|
+
plt.tight_layout()
|
|
484
|
+
plt.savefig("plots/correlation_heatmap.png", dpi=300)
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
# heatmap with custom palette
|
|
488
|
+
plt.figure(figsize=(12, 8))
|
|
489
|
+
sns.heatmap(correlation_matrix, annot=True, fmt='.2f',
|
|
490
|
+
cmap='RdBu_r', center=0, vmin=-1, vmax=1,
|
|
491
|
+
cbar_kws={'label': 'Correlation'},
|
|
492
|
+
square=True, linewidths=0.5)
|
|
493
|
+
plt.title("Correlation Matrix")
|
|
494
|
+
plt.tight_layout()
|
|
495
|
+
plt.savefig("plots/correlation_heatmap_custom.png", dpi=300)
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
violin plots (detailed distributions)
|
|
499
|
+
|
|
500
|
+
import matplotlib.pyplot as plt
|
|
501
|
+
import seaborn as sns
|
|
502
|
+
|
|
503
|
+
# single violin plot
|
|
504
|
+
plt.figure(figsize=(8, 6))
|
|
505
|
+
sns.violinplot(data=df, y='value')
|
|
506
|
+
plt.title("Distribution of Values")
|
|
507
|
+
plt.ylabel("Value")
|
|
508
|
+
plt.tight_layout()
|
|
509
|
+
plt.savefig("plots/violinplot.png", dpi=300)
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
# violin plot by category
|
|
513
|
+
plt.figure(figsize=(12, 6))
|
|
514
|
+
sns.violinplot(data=df, x='category', y='value')
|
|
515
|
+
plt.title("Distribution by Category")
|
|
516
|
+
plt.xlabel("Category")
|
|
517
|
+
plt.ylabel("Value")
|
|
518
|
+
plt.xticks(rotation=45)
|
|
519
|
+
plt.tight_layout()
|
|
520
|
+
plt.savefig("plots/violinplot_category.png", dpi=300)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
# split violin plot
|
|
524
|
+
plt.figure(figsize=(12, 6))
|
|
525
|
+
sns.violinplot(data=df, x='category', y='value',
|
|
526
|
+
hue='subcategory', split=True)
|
|
527
|
+
plt.title("Distribution by Category and Subcategory")
|
|
528
|
+
plt.xlabel("Category")
|
|
529
|
+
plt.ylabel("Value")
|
|
530
|
+
plt.legend(title="Subcategory")
|
|
531
|
+
plt.xticks(rotation=45)
|
|
532
|
+
plt.tight_layout()
|
|
533
|
+
plt.savefig("plots/violinplot_split.png", dpi=300)
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
time series visualization
|
|
537
|
+
|
|
538
|
+
import matplotlib.pyplot as plt
|
|
539
|
+
import seaborn as sns
|
|
540
|
+
|
|
541
|
+
# time series with trend line
|
|
542
|
+
plt.figure(figsize=(14, 6))
|
|
543
|
+
sns.lineplot(data=df, x='date', y='value', label='Value')
|
|
544
|
+
sns.regplot(data=df, x='date', y='value', scatter=False,
|
|
545
|
+
label='Trend', color='red')
|
|
546
|
+
plt.title("Value Over Time with Trend")
|
|
547
|
+
plt.xlabel("Date")
|
|
548
|
+
plt.ylabel("Value")
|
|
549
|
+
plt.legend()
|
|
550
|
+
plt.tight_layout()
|
|
551
|
+
plt.savefig("plots/time_series_trend.png", dpi=300)
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
# time series with moving average
|
|
555
|
+
df['moving_avg'] = df['value'].rolling(window=7).mean()
|
|
556
|
+
|
|
557
|
+
plt.figure(figsize=(14, 6))
|
|
558
|
+
sns.lineplot(data=df, x='date', y='value',
|
|
559
|
+
label='Value', alpha=0.6)
|
|
560
|
+
sns.lineplot(data=df, x='date', y='moving_avg',
|
|
561
|
+
label='7-day Moving Avg', linewidth=2)
|
|
562
|
+
plt.title("Value Over Time with Moving Average")
|
|
563
|
+
plt.xlabel("Date")
|
|
564
|
+
plt.ylabel("Value")
|
|
565
|
+
plt.legend()
|
|
566
|
+
plt.tight_layout()
|
|
567
|
+
plt.savefig("plots/time_series_ma.png", dpi=300)
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
# time series with seasonality decomposition
|
|
571
|
+
from statsmodels.tsa.seasonal import seasonal_decompose
|
|
572
|
+
|
|
573
|
+
df.set_index('date', inplace=True)
|
|
574
|
+
decomposition = seasonal_decompose(df['value'], model='additive',
|
|
575
|
+
period=365)
|
|
576
|
+
|
|
577
|
+
fig, axes = plt.subplots(4, 1, figsize=(14, 10))
|
|
578
|
+
decomposition.observed.plot(ax=axes[0], title='Observed')
|
|
579
|
+
decomposition.trend.plot(ax=axes[1], title='Trend')
|
|
580
|
+
decomposition.seasonal.plot(ax=axes[2], title='Seasonal')
|
|
581
|
+
decomposition.resid.plot(ax=axes[3], title='Residual')
|
|
582
|
+
plt.tight_layout()
|
|
583
|
+
plt.savefig("plots/time_series_decomposition.png", dpi=300)
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
PHASE 5: CUSTOMIZATION AND STYLING
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
annotations and text
|
|
590
|
+
|
|
591
|
+
import matplotlib.pyplot as plt
|
|
592
|
+
import seaborn as sns
|
|
593
|
+
|
|
594
|
+
fig, ax = plt.subplots(figsize=(10, 6))
|
|
595
|
+
|
|
596
|
+
sns.barplot(data=df, x='category', y='value', ax=ax)
|
|
597
|
+
|
|
598
|
+
# add value labels on bars
|
|
599
|
+
for i, v in enumerate(df['value']):
|
|
600
|
+
ax.text(i, v, f'{v:.1f}', ha='center', va='bottom')
|
|
601
|
+
|
|
602
|
+
# add annotation for max value
|
|
603
|
+
max_idx = df['value'].idxmax()
|
|
604
|
+
max_cat = df.loc[max_idx, 'category']
|
|
605
|
+
max_val = df.loc[max_idx, 'value']
|
|
606
|
+
ax.annotate(f'Max: {max_val}',
|
|
607
|
+
xy=(max_idx, max_val),
|
|
608
|
+
xytext=(max_idx, max_val * 1.1),
|
|
609
|
+
arrowprops=dict(arrowstyle='->', color='red'))
|
|
610
|
+
|
|
611
|
+
plt.title("Sales by Category with Annotations")
|
|
612
|
+
plt.xlabel("Category")
|
|
613
|
+
plt.ylabel("Sales ($)")
|
|
614
|
+
plt.xticks(rotation=45)
|
|
615
|
+
plt.tight_layout()
|
|
616
|
+
plt.savefig("plots/annotated_bars.png", dpi=300)
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
custom legends
|
|
620
|
+
|
|
621
|
+
import matplotlib.pyplot as plt
|
|
622
|
+
import seaborn as sns
|
|
623
|
+
|
|
624
|
+
fig, ax = plt.subplots(figsize=(10, 6))
|
|
625
|
+
|
|
626
|
+
sns.lineplot(data=df, x='date', y='value',
|
|
627
|
+
hue='category', ax=ax)
|
|
628
|
+
|
|
629
|
+
# customize legend
|
|
630
|
+
ax.legend(title='Category',
|
|
631
|
+
bbox_to_anchor=(1.05, 1),
|
|
632
|
+
loc='upper left',
|
|
633
|
+
ncol=1,
|
|
634
|
+
frameon=True,
|
|
635
|
+
shadow=True)
|
|
636
|
+
|
|
637
|
+
plt.title("Values Over Time")
|
|
638
|
+
plt.xlabel("Date")
|
|
639
|
+
plt.ylabel("Value")
|
|
640
|
+
plt.tight_layout()
|
|
641
|
+
plt.savefig("plots/custom_legend.png", dpi=300)
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
custom axes and limits
|
|
645
|
+
|
|
646
|
+
import matplotlib.pyplot as plt
|
|
647
|
+
import seaborn as sns
|
|
648
|
+
|
|
649
|
+
fig, ax = plt.subplots(figsize=(10, 6))
|
|
650
|
+
|
|
651
|
+
sns.scatterplot(data=df, x='x', y='y', hue='category', ax=ax)
|
|
652
|
+
|
|
653
|
+
# set custom limits
|
|
654
|
+
ax.set_xlim(0, 100)
|
|
655
|
+
ax.set_ylim(0, 100)
|
|
656
|
+
|
|
657
|
+
# log scale
|
|
658
|
+
ax.set_xscale('log')
|
|
659
|
+
ax.set_yscale('log')
|
|
660
|
+
|
|
661
|
+
# custom ticks
|
|
662
|
+
ax.set_xticks([1, 10, 100])
|
|
663
|
+
ax.set_yticks([1, 10, 100])
|
|
664
|
+
|
|
665
|
+
plt.title("Log-scale Scatter Plot")
|
|
666
|
+
plt.xlabel("X (log scale)")
|
|
667
|
+
plt.ylabel("Y (log scale)")
|
|
668
|
+
plt.tight_layout()
|
|
669
|
+
plt.savefig("plots/custom_axes.png", dpi=300)
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
subplots and figure layout
|
|
673
|
+
|
|
674
|
+
import matplotlib.pyplot as plt
|
|
675
|
+
import seaborn as sns
|
|
676
|
+
|
|
677
|
+
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
|
678
|
+
|
|
679
|
+
# subplot 1: histogram
|
|
680
|
+
sns.histplot(data=df, x='value', bins=30, ax=axes[0, 0])
|
|
681
|
+
axes[0, 0].set_title('Distribution')
|
|
682
|
+
|
|
683
|
+
# subplot 2: box plot
|
|
684
|
+
sns.boxplot(data=df, x='category', y='value', ax=axes[0, 1])
|
|
685
|
+
axes[0, 1].set_title('By Category')
|
|
686
|
+
axes[0, 1].tick_params(axis='x', rotation=45)
|
|
687
|
+
|
|
688
|
+
# subplot 3: scatter plot
|
|
689
|
+
sns.scatterplot(data=df, x='x', y='y', ax=axes[1, 0])
|
|
690
|
+
axes[1, 0].set_title('Relationship')
|
|
691
|
+
|
|
692
|
+
# subplot 4: time series
|
|
693
|
+
sns.lineplot(data=df, x='date', y='value', ax=axes[1, 1])
|
|
694
|
+
axes[1, 1].set_title('Over Time')
|
|
695
|
+
|
|
696
|
+
plt.tight_layout()
|
|
697
|
+
plt.savefig("plots/subplots.png", dpi=300)
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
PHASE 6: EXPORTING AND FORMATTING
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
high-quality output formats
|
|
704
|
+
|
|
705
|
+
import matplotlib.pyplot as plt
|
|
706
|
+
|
|
707
|
+
# PNG (lossless, transparency support)
|
|
708
|
+
plt.savefig("plots/output.png",
|
|
709
|
+
dpi=300,
|
|
710
|
+
bbox_inches='tight',
|
|
711
|
+
transparent=False,
|
|
712
|
+
facecolor='white')
|
|
713
|
+
|
|
714
|
+
# PDF (vector, publication quality)
|
|
715
|
+
plt.savefig("plots/output.pdf",
|
|
716
|
+
bbox_inches='tight',
|
|
717
|
+
transparent=False,
|
|
718
|
+
facecolor='white')
|
|
719
|
+
|
|
720
|
+
# SVG (vector, web-friendly)
|
|
721
|
+
plt.savefig("plots/output.svg",
|
|
722
|
+
bbox_inches='tight',
|
|
723
|
+
transparent=False,
|
|
724
|
+
facecolor='white')
|
|
725
|
+
|
|
726
|
+
# EPS (vector, LaTeX friendly)
|
|
727
|
+
plt.savefig("plots/output.eps",
|
|
728
|
+
bbox_inches='tight',
|
|
729
|
+
transparent=False,
|
|
730
|
+
facecolor='white')
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
resolution and size guidelines
|
|
734
|
+
|
|
735
|
+
for web display:
|
|
736
|
+
dpi: 72-100
|
|
737
|
+
figsize: (10, 6) or (12, 6)
|
|
738
|
+
format: PNG
|
|
739
|
+
|
|
740
|
+
for presentations:
|
|
741
|
+
dpi: 150-200
|
|
742
|
+
figsize: (12, 7) or (14, 8)
|
|
743
|
+
format: PNG or PDF
|
|
744
|
+
|
|
745
|
+
for publications:
|
|
746
|
+
dpi: 300-600
|
|
747
|
+
figsize: (8, 5) or (10, 6)
|
|
748
|
+
format: PDF, EPS, or SVG
|
|
749
|
+
|
|
750
|
+
for posters:
|
|
751
|
+
dpi: 300+
|
|
752
|
+
figsize: (20, 15) or larger
|
|
753
|
+
format: PDF or PNG
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
PHASE 7: INTERACTIVE VISUALIZATION
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
plotly interactive charts
|
|
760
|
+
|
|
761
|
+
import plotly.express as px
|
|
762
|
+
import plotly.graph_objects as go
|
|
763
|
+
|
|
764
|
+
# install if needed
|
|
765
|
+
# pip install plotly
|
|
766
|
+
|
|
767
|
+
# interactive scatter plot
|
|
768
|
+
fig = px.scatter(df, x='x', y='y', color='category',
|
|
769
|
+
hover_data=['value'], title="Interactive Scatter")
|
|
770
|
+
fig.write_html("plots/interactive_scatter.html")
|
|
771
|
+
fig.show()
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
# interactive line chart
|
|
775
|
+
fig = px.line(df, x='date', y='value', color='category',
|
|
776
|
+
title="Interactive Time Series")
|
|
777
|
+
fig.write_html("plots/interactive_line.html")
|
|
778
|
+
fig.show()
|
|
779
|
+
|
|
780
|
+
|
|
781
|
+
# interactive histogram
|
|
782
|
+
fig = px.histogram(df, x='value', color='category',
|
|
783
|
+
nbins=30, title="Interactive Distribution")
|
|
784
|
+
fig.write_html("plots/interactive_histogram.html")
|
|
785
|
+
fig.show()
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
PHASE 8: DATA VISUALIZATION CHECKLIST
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
pre-visualization checklist
|
|
792
|
+
|
|
793
|
+
[ ] understand audience and purpose
|
|
794
|
+
[ ] define the message you want to communicate
|
|
795
|
+
[ ] choose appropriate chart type for data and goal
|
|
796
|
+
[ ] verify data quality and completeness
|
|
797
|
+
[ ] handle missing values appropriately
|
|
798
|
+
[ ] ensure data types are correct
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
design checklist
|
|
802
|
+
|
|
803
|
+
[ ] axis labels are clear and descriptive
|
|
804
|
+
[ ] title communicates main insight
|
|
805
|
+
[ ] color palette is accessible and appropriate
|
|
806
|
+
[ ] legend is positioned to not obscure data
|
|
807
|
+
[ ] font size is readable at output size
|
|
808
|
+
[ ] aspect ratio preserves data proportions
|
|
809
|
+
[ ] grid lines aid reading without distraction
|
|
810
|
+
[ ] annotations add value, not clutter
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
accuracy checklist
|
|
814
|
+
|
|
815
|
+
[ ] y-axis starts at zero (unless intentionally broken)
|
|
816
|
+
[ ] scales are appropriate for data range
|
|
817
|
+
[ ] error bars/CI shown where appropriate
|
|
818
|
+
[ ] sample size indicated for small datasets
|
|
819
|
+
[ ] outliers are not arbitrarily removed
|
|
820
|
+
[ ] time series uses consistent intervals
|
|
821
|
+
[ ] geographic projections are accurate
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
accessibility checklist
|
|
825
|
+
|
|
826
|
+
[ ] color-blind friendly palette
|
|
827
|
+
[ ] high contrast (minimum 4.5:1 for text)
|
|
828
|
+
[ ] patterns in addition to colors
|
|
829
|
+
[ ] alt text provided for web use
|
|
830
|
+
[ ] scalable vector format for large displays
|
|
831
|
+
[ ] annotations are readable
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
PHASE 9: COMMON VISUALIZATION MISTAKES
|
|
835
|
+
|
|
836
|
+
|
|
837
|
+
mistake: misleading y-axis
|
|
838
|
+
|
|
839
|
+
wrong:
|
|
840
|
+
y-axis doesn't start at zero, exaggerating differences
|
|
841
|
+
different scales used for comparison
|
|
842
|
+
|
|
843
|
+
correct:
|
|
844
|
+
start y-axis at zero for bar/column charts
|
|
845
|
+
use consistent scales for comparison
|
|
846
|
+
if breaking axis is necessary, clearly mark it
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
mistake: too much data in one chart
|
|
850
|
+
|
|
851
|
+
wrong:
|
|
852
|
+
20+ categories in a single pie chart
|
|
853
|
+
50+ time series in one line chart
|
|
854
|
+
all variables in one scatter plot matrix
|
|
855
|
+
|
|
856
|
+
correct:
|
|
857
|
+
aggregate small categories into "other"
|
|
858
|
+
use small multiples for many series
|
|
859
|
+
filter to most important variables
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
mistake: 3d charts for 2d data
|
|
863
|
+
|
|
864
|
+
wrong:
|
|
865
|
+
3d pie charts
|
|
866
|
+
3d bar charts
|
|
867
|
+
3d line charts
|
|
868
|
+
|
|
869
|
+
correct:
|
|
870
|
+
use 2d charts - they're more accurate and readable
|
|
871
|
+
3d only adds value for actual 3d data
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
mistake: rainbow color palettes
|
|
875
|
+
|
|
876
|
+
wrong:
|
|
877
|
+
using all colors of the rainbow
|
|
878
|
+
colors that don't have perceptual ordering
|
|
879
|
+
|
|
880
|
+
correct:
|
|
881
|
+
use sequential palettes for ordered data
|
|
882
|
+
use diverging palettes for data with meaningful midpoint
|
|
883
|
+
use categorical palettes for nominal data
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
mistake: decorative over functional
|
|
887
|
+
|
|
888
|
+
wrong:
|
|
889
|
+
excessive styling that obscures data
|
|
890
|
+
fancy fonts that are hard to read
|
|
891
|
+
effects that don't add insight
|
|
892
|
+
|
|
893
|
+
correct:
|
|
894
|
+
form follows function
|
|
895
|
+
every visual element should convey information
|
|
896
|
+
keep it simple and clear
|
|
897
|
+
|
|
898
|
+
|
|
899
|
+
PHASE 10: VISUALIZATION RULES (MANDATORY)
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
while this skill is active, these rules are MANDATORY:
|
|
903
|
+
|
|
904
|
+
[1] ALWAYS START WITH DATA EXPLORATION
|
|
905
|
+
never visualize without understanding the data
|
|
906
|
+
check distributions, missing values, outliers first
|
|
907
|
+
|
|
908
|
+
[2] CHOOSE THE RIGHT CHART TYPE
|
|
909
|
+
match chart to data type and analysis goal
|
|
910
|
+
don't force data into inappropriate visualizations
|
|
911
|
+
|
|
912
|
+
[3] LABEL EVERYTHING CLEARLY
|
|
913
|
+
axis labels, title, legend - all must be descriptive
|
|
914
|
+
assume reader knows nothing about the context
|
|
915
|
+
|
|
916
|
+
[4] USE ACCESSIBLE COLORS
|
|
917
|
+
color-blind friendly palettes
|
|
918
|
+
sufficient contrast
|
|
919
|
+
multiple cues (color + pattern) when possible
|
|
920
|
+
|
|
921
|
+
[5] PRESERVE DATA PROPORTIONS
|
|
922
|
+
y-axis starts at zero for comparison charts
|
|
923
|
+
don't distort with broken axes
|
|
924
|
+
maintain aspect ratio for spatial data
|
|
925
|
+
|
|
926
|
+
[6] HANDLE OUTLIERS APPROPRIATELY
|
|
927
|
+
show outliers in distributions
|
|
928
|
+
don't hide them
|
|
929
|
+
explain if they're excluded
|
|
930
|
+
|
|
931
|
+
[7] PROVIDE CONTEXT
|
|
932
|
+
sample size, time period, data source
|
|
933
|
+
confidence intervals where appropriate
|
|
934
|
+
methodology notes
|
|
935
|
+
|
|
936
|
+
[8] AVOID CHART JUNK
|
|
937
|
+
remove 3d effects, unnecessary borders
|
|
938
|
+
simplify grid lines
|
|
939
|
+
every element should serve a purpose
|
|
940
|
+
|
|
941
|
+
[9] TEST AT OUTPUT SIZE
|
|
942
|
+
verify readability at intended display size
|
|
943
|
+
adjust font sizes and figure dimensions
|
|
944
|
+
ensure legends don't overlap
|
|
945
|
+
|
|
946
|
+
[10] TELL A STORY
|
|
947
|
+
highlight key insights
|
|
948
|
+
guide viewer to important patterns
|
|
949
|
+
use annotations strategically
|
|
950
|
+
|
|
951
|
+
|
|
952
|
+
FINAL REMINDERS
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
clarity over cleverness
|
|
956
|
+
|
|
957
|
+
the goal is communication, not decoration.
|
|
958
|
+
if a chart doesn't communicate, it's failed.
|
|
959
|
+
|
|
960
|
+
|
|
961
|
+
less is more
|
|
962
|
+
|
|
963
|
+
remove everything that doesn't add information.
|
|
964
|
+
simplify, simplify, simplify.
|
|
965
|
+
|
|
966
|
+
|
|
967
|
+
data first, visualization second
|
|
968
|
+
|
|
969
|
+
understand the data before visualizing.
|
|
970
|
+
visualization should reveal insights,
|
|
971
|
+
not create them.
|
|
972
|
+
|
|
973
|
+
|
|
974
|
+
test with your audience
|
|
975
|
+
|
|
976
|
+
show drafts to intended audience.
|
|
977
|
+
get feedback on clarity and effectiveness.
|
|
978
|
+
iterate based on feedback.
|
|
979
|
+
|
|
980
|
+
|
|
981
|
+
document your choices
|
|
982
|
+
|
|
983
|
+
why this chart type?
|
|
984
|
+
why these colors?
|
|
985
|
+
why this aggregation?
|
|
986
|
+
why this filtering?
|
|
987
|
+
|
|
988
|
+
future viewers will thank you.
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
now create visualizations that inform,
|
|
992
|
+
insight, and inspire action.
|