charty 0.2.6 → 0.2.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/charty.gemspec +2 -1
  3. data/examples/bar_plot.rb +19 -0
  4. data/examples/box_plot.rb +17 -0
  5. data/examples/scatter_plot.rb +17 -0
  6. data/images/penguins_body_mass_g_flipper_length_mm_species_scatter_plot.png +0 -0
  7. data/images/penguins_body_mass_g_flipper_length_mm_species_sex_scatter_plot.png +0 -0
  8. data/images/penguins_species_body_mass_g_bar_plot_h.png +0 -0
  9. data/images/penguins_species_body_mass_g_bar_plot_v.png +0 -0
  10. data/images/penguins_species_body_mass_g_box_plot_h.png +0 -0
  11. data/images/penguins_species_body_mass_g_box_plot_v.png +0 -0
  12. data/images/penguins_species_body_mass_g_sex_bar_plot_v.png +0 -0
  13. data/images/penguins_species_body_mass_g_sex_box_plot_v.png +0 -0
  14. data/lib/charty.rb +2 -0
  15. data/lib/charty/backends/plotly.rb +127 -24
  16. data/lib/charty/backends/plotly_helpers/html_renderer.rb +203 -0
  17. data/lib/charty/backends/plotly_helpers/notebook_renderer.rb +89 -0
  18. data/lib/charty/backends/plotly_helpers/plotly_renderer.rb +121 -0
  19. data/lib/charty/backends/pyplot.rb +74 -0
  20. data/lib/charty/backends/unicode_plot.rb +9 -9
  21. data/lib/charty/cache_dir.rb +27 -0
  22. data/lib/charty/iruby_helper.rb +18 -0
  23. data/lib/charty/plot_methods.rb +82 -6
  24. data/lib/charty/plotters.rb +3 -0
  25. data/lib/charty/plotters/abstract_plotter.rb +56 -16
  26. data/lib/charty/plotters/bar_plotter.rb +39 -0
  27. data/lib/charty/plotters/categorical_plotter.rb +9 -1
  28. data/lib/charty/plotters/distribution_plotter.rb +180 -0
  29. data/lib/charty/plotters/histogram_plotter.rb +244 -0
  30. data/lib/charty/plotters/line_plotter.rb +38 -5
  31. data/lib/charty/plotters/scatter_plotter.rb +4 -2
  32. data/lib/charty/statistics.rb +9 -0
  33. data/lib/charty/table.rb +30 -23
  34. data/lib/charty/table_adapters/base_adapter.rb +88 -0
  35. data/lib/charty/table_adapters/daru_adapter.rb +41 -1
  36. data/lib/charty/table_adapters/hash_adapter.rb +59 -1
  37. data/lib/charty/table_adapters/pandas_adapter.rb +49 -1
  38. data/lib/charty/vector.rb +29 -1
  39. data/lib/charty/vector_adapters.rb +16 -0
  40. data/lib/charty/vector_adapters/pandas_adapter.rb +10 -1
  41. data/lib/charty/version.rb +1 -1
  42. metadata +39 -15
@@ -10,3 +10,6 @@ require_relative "plotters/vector_plotter"
10
10
  require_relative "plotters/relational_plotter"
11
11
  require_relative "plotters/scatter_plotter"
12
12
  require_relative "plotters/line_plotter"
13
+
14
+ require_relative "plotters/distribution_plotter"
15
+ require_relative "plotters/histogram_plotter"
@@ -35,6 +35,8 @@ module Charty
35
35
  end
36
36
 
37
37
  def data=(data)
38
+ # TODO: Convert a Charty::Vector to a Charty::Table so that
39
+ # the Charty::Vector is handled as a wide form data
38
40
  @data = case data
39
41
  when nil, Charty::Table
40
42
  data
@@ -81,6 +83,24 @@ module Charty
81
83
  end
82
84
  end
83
85
 
86
+ attr_reader :x_label
87
+
88
+ def x_label=(val)
89
+ @x_label = check_string(val, :x_label, allow_nil: true)
90
+ end
91
+
92
+ attr_reader :y_label
93
+
94
+ def y_label=(val)
95
+ @y_label = check_string(val, :y_label, allow_nil: true)
96
+ end
97
+
98
+ attr_reader :title
99
+
100
+ def title=(val)
101
+ @title = check_string(val, :title, allow_nil: true)
102
+ end
103
+
84
104
  private def substitute_options(options)
85
105
  options.each do |key, val|
86
106
  send("#{key}=", val)
@@ -138,6 +158,27 @@ module Charty
138
158
  end
139
159
  end
140
160
 
161
+ private def check_string(value, name, allow_nil: false)
162
+ case value
163
+ when Symbol
164
+ value.to_s
165
+ else
166
+ if allow_nil && value.nil?
167
+ nil
168
+ else
169
+ orig_value = value
170
+ value = String.try_convert(value)
171
+ if value.nil?
172
+ raise ArgumentError,
173
+ "`#{name}` must be convertible to String: %p" % orig_value,
174
+ caller
175
+ else
176
+ value
177
+ end
178
+ end
179
+ end
180
+ end
181
+
141
182
  private def variable_type(vector, boolean_type=:numeric)
142
183
  if vector.numeric?
143
184
  :numeric
@@ -181,15 +222,6 @@ module Charty
181
222
  data = processed ? processed_data : plot_data
182
223
  data = data.drop_na if drop_na
183
224
 
184
- levels = var_levels.dup
185
-
186
- [:x, :y].each do |axis|
187
- levels[axis] = plot_data[axis].categorical_order()
188
- if processed
189
- # TODO: perform inverse conversion of axis scaling here
190
- end
191
- end
192
-
193
225
  if not grouping_vars.empty?
194
226
  grouped = data.group_by(grouping_vars, sort: false)
195
227
  grouped.each_group do |group_key, group_data|
@@ -213,16 +245,19 @@ module Charty
213
245
 
214
246
  def save(filename, **kwargs)
215
247
  backend = Backends.current
216
- backend.begin_figure
217
- render_plot(backend, **kwargs)
248
+ call_render_plot(backend, notebook: false, **kwargs)
218
249
  backend.save(filename, **kwargs)
219
250
  end
220
251
 
221
252
  def render(notebook: false, **kwargs)
222
253
  backend = Backends.current
254
+ call_render_plot(backend, notebook: notebook, **kwargs)
255
+ backend.render(notebook: notebook, **kwargs)
256
+ end
257
+
258
+ private def call_render_plot(backend, notebook: false, **kwargs)
223
259
  backend.begin_figure
224
260
  render_plot(backend, notebook: notebook, **kwargs)
225
- backend.render(notebook: notebook, **kwargs)
226
261
  end
227
262
 
228
263
  private def render_plot(*, **)
@@ -231,12 +266,17 @@ module Charty
231
266
  end
232
267
 
233
268
  def to_iruby
234
- render(notebook: iruby_notebook?)
269
+ render(notebook: IRubyHelper.iruby_notebook?)
235
270
  end
236
271
 
237
- private def iruby_notebook?
238
- return false unless defined?(IRuby)
239
- true # TODO: Check the server is notebook or not
272
+ def to_iruby_mimebundle(include: [], exclude: [])
273
+ backend = Backends.current
274
+ if backend.respond_to?(:render_mimebundle)
275
+ call_render_plot(backend, notebook: true)
276
+ backend.render_mimebundle(include: include, exclude: exclude)
277
+ else
278
+ {}
279
+ end
240
280
  end
241
281
  end
242
282
  end
@@ -42,6 +42,12 @@ module Charty
42
42
  @cap_size = check_number(cap_size, :cap_size, allow_nil: true)
43
43
  end
44
44
 
45
+ attr_reader :log
46
+
47
+ def log=(val)
48
+ @log = check_boolean(val, :log)
49
+ end
50
+
45
51
  private def render_plot(backend, **)
46
52
  draw_bars(backend)
47
53
  annotate_axes(backend)
@@ -81,6 +87,39 @@ module Charty
81
87
  end
82
88
  end
83
89
 
90
+ private def annotate_axes(backend)
91
+ super
92
+
93
+ if self.log
94
+ min_value, max_value = @estimations.minmax
95
+ if @plot_colors
96
+ unless @conf_int.empty?
97
+ min_value = [min_value, @conf_int[0]].min
98
+ max_value = [max_value, @conf_int[1]].max
99
+ end
100
+ else
101
+ ci_min = Util.filter_map(@conf_int) { |ci| ci[0] unless ci.empty? }
102
+ ci_max = Util.filter_map(@conf_int) { |ci| ci[1] unless ci.empty? }
103
+ min_value = [min_value, ci_min.min].min unless ci_min.empty?
104
+ max_value = [max_value, ci_max.max].max unless ci_max.empty?
105
+ end
106
+ if min_value > 1
107
+ min_value = 0
108
+ else
109
+ min_value = Math.log10(min_value).floor
110
+ end
111
+ max_value = Math.log10(max_value).ceil
112
+ case self.orient
113
+ when :v
114
+ backend.set_yscale(:log)
115
+ backend.set_ylim(min_value, max_value)
116
+ else
117
+ backend.set_xscale(:log)
118
+ backend.set_xlim(min_value, max_value)
119
+ end
120
+ end
121
+ end
122
+
84
123
  private def setup_estimations
85
124
  if @color_names.nil?
86
125
  setup_estimations_with_single_color_group
@@ -134,6 +134,7 @@ module Charty
134
134
  order = @order # TODO: supply order via parameter
135
135
  unless order
136
136
  order = @data.column_names.select do |cn|
137
+ # TODO: Use Charty::Vector#numeric?
137
138
  @data[cn].all? {|x| Float(x, exception: false) }
138
139
  end
139
140
  end
@@ -230,6 +231,7 @@ module Charty
230
231
  end
231
232
  return :h
232
233
  end
234
+
233
235
  case orient
234
236
  when :v
235
237
  if require_numeric && y_type != :numeric
@@ -263,7 +265,9 @@ module Charty
263
265
  private def group_long_form(vals, groups, group_order)
264
266
  grouped_vals = vals.group_by(groups)
265
267
 
266
- plot_data = group_order.map {|g| grouped_vals[g] || [] }
268
+ plot_data = group_order.map do |g|
269
+ grouped_vals[g] || Charty::Vector.new([])
270
+ end
267
271
 
268
272
  if vals.respond_to?(:name)
269
273
  value_label = vals.name
@@ -347,11 +351,15 @@ module Charty
347
351
  end
348
352
 
349
353
  private def annotate_axes(backend)
354
+ backend.set_title(self.title) if self.title
355
+
350
356
  if orient == :v
351
357
  xlabel, ylabel = @group_label, @value_label
352
358
  else
353
359
  xlabel, ylabel = @value_label, @group_label
354
360
  end
361
+ xlabel = self.x_label if self.x_label
362
+ ylabel = self.y_label if self.y_label
355
363
  backend.set_xlabel(xlabel) unless xlabel.nil?
356
364
  backend.set_ylabel(ylabel) unless ylabel.nil?
357
365
 
@@ -0,0 +1,180 @@
1
+ module Charty
2
+ module Plotters
3
+ class DistributionPlotter < AbstractPlotter
4
+ def flat_structure
5
+ {
6
+ x: :@values
7
+ }
8
+ end
9
+
10
+ def wide_structure
11
+ {
12
+ x: :@values,
13
+ color: :@columns
14
+ }
15
+ end
16
+
17
+ def initialize(data:, variables:, **options, &block)
18
+ x, y, color = variables.values_at(:x, :y, :color)
19
+ super(x, y, color, data: data, **options, &block)
20
+
21
+ setup_variables
22
+ end
23
+
24
+ attr_reader :weights
25
+
26
+ def weights=(val)
27
+ @weights = check_dimension(val, :weights)
28
+ end
29
+
30
+ attr_reader :variables
31
+
32
+ attr_reader :color_norm
33
+
34
+ def color_norm=(val)
35
+ unless val.nil?
36
+ raise NotImplementedError,
37
+ "Specifying color_norm is not supported yet"
38
+ end
39
+ end
40
+
41
+ attr_reader :legend
42
+
43
+ def legend=(val)
44
+ @legend = check_legend(val)
45
+ end
46
+
47
+ private def check_legend(val)
48
+ check_boolean(val, :legend)
49
+ end
50
+
51
+ attr_reader :input_format, :plot_data, :variables, :var_types
52
+
53
+ # This should be the same as one in RelationalPlotter
54
+ # TODO: move this to AbstractPlotter and refactor with CategoricalPlotter
55
+ private def setup_variables
56
+ if x.nil? && y.nil?
57
+ @input_format = :wide
58
+ setup_variables_with_wide_form_dataset
59
+ else
60
+ @input_format = :long
61
+ setup_variables_with_long_form_dataset
62
+ end
63
+
64
+ @var_types = @plot_data.columns.map { |k|
65
+ [k, variable_type(@plot_data[k], :categorical)]
66
+ }.to_h
67
+ end
68
+
69
+ private def setup_variables_with_wide_form_dataset
70
+ unless color.nil?
71
+ raise ArgumentError,
72
+ "Unable to assign the following variables in wide-form data: color"
73
+ end
74
+
75
+ if data.nil? || data.empty?
76
+ @plot_data = Charty::Table.new({})
77
+ @variables = {}
78
+ return
79
+ end
80
+
81
+ flat = data.is_a?(Charty::Vector)
82
+ if flat
83
+ @plot_data = {}
84
+ @variables = {}
85
+
86
+ [:x, :y].each do |var|
87
+ case self.flat_structure[var]
88
+ when :@index
89
+ @plot_data[var] = data.index.to_a
90
+ @variables[var] = data.index.name
91
+ when :@values
92
+ @plot_data[var] = data.to_a
93
+ @variables[var] = data.name
94
+ end
95
+ end
96
+
97
+ @plot_data = Charty::Table.new(@plot_data)
98
+ else
99
+ numeric_columns = @data.column_names.select do |cn|
100
+ @data[cn].numeric?
101
+ end
102
+ wide_data = @data[numeric_columns]
103
+
104
+ melt_params = {var_name: :@columns, value_name: :@values }
105
+ if self.wide_structure.include?(:index)
106
+ melt_params[:id_vars] = :@index
107
+ end
108
+
109
+ @plot_data = wide_data.melt(**melt_params)
110
+ @variables = {}
111
+ self.wide_structure.each do |var, attr|
112
+ @plot_data[var] = @plot_data[attr]
113
+
114
+ @variables[var] = case attr
115
+ when :@columns
116
+ wide_data.columns.name
117
+ when :@index
118
+ wide_data.index.name
119
+ else
120
+ nil
121
+ end
122
+ end
123
+
124
+ @plot_data = @plot_data[self.wide_structure.keys]
125
+ end
126
+ end
127
+
128
+ private def setup_variables_with_long_form_dataset
129
+ if data.nil? || data.empty?
130
+ @plot_data = Charty::Table.new({})
131
+ @variables = {}
132
+ return
133
+ end
134
+
135
+ plot_data = {}
136
+ variables = {}
137
+
138
+ {
139
+ x: self.x,
140
+ y: self.y,
141
+ color: self.color,
142
+ weights: self.weights
143
+ }.each do |key, val|
144
+ next if val.nil?
145
+
146
+ if data.column?(val)
147
+ plot_data[key] = data[val]
148
+ variables[key] = val
149
+ else
150
+ case val
151
+ when Charty::Vector
152
+ plot_data[key] = val
153
+ variables[key] = val.name
154
+ else
155
+ raise ArgumentError,
156
+ "Could not interpret value %p for parameter %p" % [val, key]
157
+ end
158
+ end
159
+ end
160
+
161
+ @plot_data = Charty::Table.new(plot_data)
162
+ @variables = variables.select do |var, name|
163
+ @plot_data[var].notnull.any?
164
+ end
165
+ end
166
+
167
+ private def map_color(palette: nil, order: nil, norm: nil)
168
+ @color_mapper = ColorMapper.new(self, palette, order, norm)
169
+ end
170
+
171
+ private def map_size(sizes: nil, order: nil, norm: nil)
172
+ @size_mapper = SizeMapper.new(self, sizes, order, norm)
173
+ end
174
+
175
+ private def map_style(markers: nil, dashes: nil, order: nil)
176
+ @style_mapper = StyleMapper.new(self, markers, dashes, order)
177
+ end
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,244 @@
1
+ module Charty
2
+ module Plotters
3
+ class HistogramPlotter < DistributionPlotter
4
+ def univariate?
5
+ self.variables.key?(:x) != self.variables.key?(:y)
6
+ end
7
+
8
+ def univariate_variable
9
+ unless univariate?
10
+ raise TypeError, "This is not a univariate plot"
11
+ end
12
+ ([:x, :y] & self.variables.keys)[0]
13
+ end
14
+
15
+ attr_reader :stat
16
+
17
+ def stat=(val)
18
+ @stat = check_stat(val)
19
+ end
20
+
21
+ private def check_stat(val)
22
+ case val
23
+ when :count, "count"
24
+ val.to_sym
25
+ when :frequency, "frequency",
26
+ :density, "density",
27
+ :probability, "probability"
28
+ raise ArgumentError,
29
+ "%p for `stat` is not supported yet" % val,
30
+ caller
31
+ else
32
+ raise ArgumentError,
33
+ "Invalid value for `stat` (%p)" % val,
34
+ caller
35
+ end
36
+ end
37
+
38
+ attr_reader :bins
39
+
40
+ def bins=(val)
41
+ @bins = check_bins(val)
42
+ end
43
+
44
+ private def check_bins(val)
45
+ case val
46
+ when :auto, "auto"
47
+ val.to_sym
48
+ when Integer
49
+ val
50
+ else
51
+ raise ArgumentError,
52
+ "Invalid value for `bins` (%p)" % val,
53
+ caller
54
+ end
55
+ end
56
+
57
+ # TODO: bin_width
58
+
59
+ attr_reader :bin_range
60
+
61
+ def bin_range=(val)
62
+ @bin_range = check_bin_range(val)
63
+ end
64
+
65
+ private def check_bin_range(val)
66
+ case val
67
+ when nil, Range
68
+ return val
69
+ when Array
70
+ if val.length == 2
71
+ val.each_with_index do |v, i|
72
+ check_number(v, "bin_range[#{i}]")
73
+ end
74
+ return val
75
+ else
76
+ amount = val.length < 2 ? "few" : "many"
77
+ raise ArgumentError,
78
+ "Too #{amount} items in `bin_range` array (%p for 2)" % val.length
79
+ end
80
+ else
81
+ raise ArgumentError,
82
+ "Invalid value for `bin_range` " +
83
+ "(%p for a range or a pair of numbers)" % val
84
+ end
85
+ end
86
+
87
+ # TODO: discrete
88
+ # TODO: cumulative
89
+
90
+ attr_reader :common_bins
91
+
92
+ def common_bins=(val)
93
+ @common_bins = check_boolean(val, :common_bins)
94
+ end
95
+
96
+ # TODO: common_norm
97
+
98
+ attr_reader :multiple
99
+
100
+ def multiple=(val)
101
+ @multiple = check_multiple(val)
102
+ end
103
+
104
+ private def check_multiple(val)
105
+ case val
106
+ when :layer, "layer"
107
+ val.to_sym
108
+ when :dodge, "dodge",
109
+ :stack, "stack",
110
+ :fill, "fill"
111
+ val = val.to_sym
112
+ raise NotImplementedError,
113
+ "%p for `multiple` is not supported yet" % val,
114
+ caller
115
+ else
116
+ raise ArgumentError,
117
+ "Invalid value for `multiple` (%p)" % val,
118
+ caller
119
+ end
120
+ end
121
+
122
+ # TODO: element
123
+ # TODO: fill
124
+ # TODO: shrink
125
+
126
+ attr_reader :kde
127
+
128
+ def kde=(val)
129
+ raise NotImplementedError, "kde is not supported yet"
130
+ end
131
+
132
+ attr_reader :kde_params
133
+
134
+ def kde_params=(val)
135
+ raise NotImplementedError, "kde_params is not supported yet"
136
+ end
137
+
138
+ # TODO: thresh
139
+ # TODO: pthresh
140
+ # TODO: pmax
141
+ # TODO: cbar
142
+ # TODO: cbar_params
143
+ # TODO: x_log_scale
144
+ # TODO: y_log_scale
145
+
146
+ private def render_plot(backend, **)
147
+ draw_univariate_histogram(backend)
148
+ annotate_axes(backend)
149
+ end
150
+
151
+ private def draw_univariate_histogram(backend)
152
+ map_color(palette: palette, order: color_order, norm: color_norm)
153
+
154
+ key_color = self.key_color
155
+ if key_color.nil? && !self.variables.key?(:color)
156
+ palette = case self.palette
157
+ when Palette
158
+ self.palette
159
+ when nil
160
+ Palette.default
161
+ else
162
+ Palette[self.palette]
163
+ end
164
+ key_color = palette[0]
165
+ end
166
+
167
+ # TODO: calculate histogram here and use bar plot to visualize
168
+ data_variable = self.univariate_variable
169
+
170
+ if common_bins
171
+ all_data = processed_data.drop_na
172
+ all_observations = all_data[data_variable].to_a
173
+
174
+ bins = self.bins
175
+ bins = 10 if self.variables.key?(:color) && bins == :auto
176
+
177
+ case bins
178
+ when Integer
179
+ case bin_range
180
+ when Range
181
+ start = bin_range.begin
182
+ stop = bin_range.end
183
+ when Array
184
+ start, stop = bin_range.minmax
185
+ end
186
+ data_range = all_observations.minmax
187
+ start ||= data_range[0]
188
+ stop ||= data_range[1]
189
+ if start == stop
190
+ start -= 0.5
191
+ stop += 0.5
192
+ end
193
+ common_bin_edges = Linspace.new(start .. stop, bins + 1).map(&:to_f)
194
+ else
195
+ params = {}
196
+ params[:weights] = all_data[:weights].to_a if all_data.column?(:weights)
197
+ h = Statistics.histogram(all_observations, bins, **params)
198
+ common_bin_edges = h.edges
199
+ end
200
+ end
201
+
202
+ if self.variables.key?(:color)
203
+ alpha = 0.5
204
+ else
205
+ alpha = 0.75
206
+ end
207
+
208
+ each_subset([:color], processed: true) do |sub_vars, sub_data|
209
+ observations = sub_data[data_variable].drop_na.to_a
210
+ params = {}
211
+ params[:weights] = sub_data[:weights].to_a if sub_data.column?(:weights)
212
+ params[:edges] = common_bin_edges if common_bin_edges
213
+ hist = Statistics.histogram(observations, bins, **params)
214
+
215
+ name = sub_vars[:color]
216
+ backend.univariate_histogram(hist, name, data_variable, stat,
217
+ alpha, name, key_color, @color_mapper,
218
+ multiple, :bars, true, 1r)
219
+ end
220
+ end
221
+
222
+ private def annotate_axes(backend)
223
+ backend.set_title(self.title) if self.title
224
+
225
+ if univariate?
226
+ xlabel = self.x_label || self.variables[:x]
227
+ ylabel = self.y_label || self.variables[:y]
228
+ case self.univariate_variable
229
+ when :x
230
+ ylabel ||= self.stat.to_s.capitalize
231
+ else
232
+ xlabel ||= self.stat.to_s.capitalize
233
+ end
234
+ backend.set_ylabel(ylabel) if ylabel
235
+ backend.set_xlabel(xlabel) if xlabel
236
+
237
+ if self.variables.key?(:color)
238
+ backend.legend(loc: :best, title: self.variables[:color])
239
+ end
240
+ end
241
+ end
242
+ end
243
+ end
244
+ end