charty 0.2.3 → 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +56 -23
  3. data/.github/workflows/nmatrix.yml +67 -0
  4. data/.github/workflows/pycall.yml +86 -0
  5. data/Gemfile +18 -0
  6. data/README.md +172 -4
  7. data/Rakefile +4 -5
  8. data/charty.gemspec +10 -6
  9. data/examples/sample_images/hist_gruff.png +0 -0
  10. data/images/penguins_body_mass_g_flipper_length_mm_scatter_plot.png +0 -0
  11. data/images/penguins_body_mass_g_flipper_length_mm_species_scatter_plot.png +0 -0
  12. data/images/penguins_body_mass_g_flipper_length_mm_species_sex_scatter_plot.png +0 -0
  13. data/images/penguins_species_body_mass_g_bar_plot_h.png +0 -0
  14. data/images/penguins_species_body_mass_g_bar_plot_v.png +0 -0
  15. data/images/penguins_species_body_mass_g_box_plot_h.png +0 -0
  16. data/images/penguins_species_body_mass_g_box_plot_v.png +0 -0
  17. data/images/penguins_species_body_mass_g_sex_bar_plot_v.png +0 -0
  18. data/images/penguins_species_body_mass_g_sex_box_plot_v.png +0 -0
  19. data/lib/charty.rb +8 -1
  20. data/lib/charty/backends/bokeh.rb +2 -2
  21. data/lib/charty/backends/google_charts.rb +1 -1
  22. data/lib/charty/backends/gruff.rb +14 -3
  23. data/lib/charty/backends/plotly.rb +731 -32
  24. data/lib/charty/backends/plotly_helpers/html_renderer.rb +203 -0
  25. data/lib/charty/backends/plotly_helpers/notebook_renderer.rb +87 -0
  26. data/lib/charty/backends/plotly_helpers/plotly_renderer.rb +121 -0
  27. data/lib/charty/backends/pyplot.rb +514 -66
  28. data/lib/charty/backends/rubyplot.rb +1 -1
  29. data/lib/charty/cache_dir.rb +27 -0
  30. data/lib/charty/dash_pattern_generator.rb +57 -0
  31. data/lib/charty/index.rb +213 -0
  32. data/lib/charty/iruby_helper.rb +18 -0
  33. data/lib/charty/linspace.rb +1 -1
  34. data/lib/charty/plot_methods.rb +283 -8
  35. data/lib/charty/plotter.rb +2 -2
  36. data/lib/charty/plotters.rb +11 -0
  37. data/lib/charty/plotters/abstract_plotter.rb +186 -16
  38. data/lib/charty/plotters/bar_plotter.rb +189 -7
  39. data/lib/charty/plotters/box_plotter.rb +64 -11
  40. data/lib/charty/plotters/categorical_plotter.rb +272 -40
  41. data/lib/charty/plotters/count_plotter.rb +7 -0
  42. data/lib/charty/plotters/distribution_plotter.rb +143 -0
  43. data/lib/charty/plotters/estimation_support.rb +84 -0
  44. data/lib/charty/plotters/histogram_plotter.rb +186 -0
  45. data/lib/charty/plotters/line_plotter.rb +300 -0
  46. data/lib/charty/plotters/random_support.rb +25 -0
  47. data/lib/charty/plotters/relational_plotter.rb +635 -0
  48. data/lib/charty/plotters/scatter_plotter.rb +80 -0
  49. data/lib/charty/plotters/vector_plotter.rb +6 -0
  50. data/lib/charty/statistics.rb +96 -2
  51. data/lib/charty/table.rb +160 -15
  52. data/lib/charty/table_adapters.rb +2 -0
  53. data/lib/charty/table_adapters/active_record_adapter.rb +17 -9
  54. data/lib/charty/table_adapters/base_adapter.rb +166 -0
  55. data/lib/charty/table_adapters/daru_adapter.rb +39 -3
  56. data/lib/charty/table_adapters/datasets_adapter.rb +13 -2
  57. data/lib/charty/table_adapters/hash_adapter.rb +141 -16
  58. data/lib/charty/table_adapters/narray_adapter.rb +25 -6
  59. data/lib/charty/table_adapters/nmatrix_adapter.rb +15 -5
  60. data/lib/charty/table_adapters/pandas_adapter.rb +163 -0
  61. data/lib/charty/util.rb +28 -0
  62. data/lib/charty/vector.rb +69 -0
  63. data/lib/charty/vector_adapters.rb +187 -0
  64. data/lib/charty/vector_adapters/array_adapter.rb +101 -0
  65. data/lib/charty/vector_adapters/daru_adapter.rb +163 -0
  66. data/lib/charty/vector_adapters/narray_adapter.rb +182 -0
  67. data/lib/charty/vector_adapters/nmatrix_adapter.rb +37 -0
  68. data/lib/charty/vector_adapters/numpy_adapter.rb +168 -0
  69. data/lib/charty/vector_adapters/pandas_adapter.rb +199 -0
  70. data/lib/charty/version.rb +1 -1
  71. metadata +92 -25
@@ -0,0 +1,80 @@
1
+ module Charty
2
+ module Plotters
3
+ class ScatterPlotter < RelationalPlotter
4
+ def initialize(data: nil, variables: {}, **options, &block)
5
+ x, y, color, style, size = variables.values_at(:x, :y, :color, :style, :size)
6
+ super(x, y, color, style, size, data: data, **options, &block)
7
+ end
8
+
9
+ attr_reader :alpha
10
+
11
+ def alpha=(val)
12
+ case val
13
+ when nil, :auto, 0..1
14
+ @alpha = val
15
+ when "auto"
16
+ @alpha = val.to_sym
17
+ when Numeric
18
+ raise ArgumentError,
19
+ "the given alpha is out of bounds " +
20
+ "(%p for nil, :auto, or number 0..1)" % val
21
+ else
22
+ raise ArgumentError,
23
+ "invalid value of alpha " +
24
+ "(%p for nil, :auto, or number in 0..1)" % val
25
+ end
26
+ end
27
+
28
+ attr_reader :line_width, :edge_color
29
+
30
+ def line_width=(val)
31
+ @line_width = check_number(val, :line_width, allow_nil: true)
32
+ end
33
+
34
+ def edge_color=(val)
35
+ @line_width = check_color(val, :edge_color, allow_nil: true)
36
+ end
37
+
38
+ private def render_plot(backend, **)
39
+ draw_points(backend)
40
+ annotate_axes(backend)
41
+ end
42
+
43
+ private def draw_points(backend)
44
+ map_color(palette: palette, order: color_order, norm: color_norm)
45
+ map_size(sizes: sizes, order: size_order, norm: size_norm)
46
+ map_style(markers: markers, order: style_order)
47
+
48
+ data = @plot_data.drop_na
49
+
50
+ # TODO: shold pass key_color to backend's scatter method.
51
+ # In pyplot backend, it is passed as color parameter.
52
+
53
+ x = data[:x]
54
+ y = data[:y]
55
+ color = data[:color] if @variables.key?(:color)
56
+ style = data[:style] if @variables.key?(:style)
57
+ size = data[:size] if @variables.key?(:size)
58
+
59
+ # TODO: key_color
60
+ backend.scatter(
61
+ x, y, @variables,
62
+ color: color, color_mapper: @color_mapper,
63
+ style: style, style_mapper: @style_mapper,
64
+ size: size, size_mapper: @size_mapper
65
+ )
66
+
67
+ if legend
68
+ backend.add_scatter_plot_legend(@variables, @color_mapper, @size_mapper, @style_mapper, legend)
69
+ end
70
+ end
71
+
72
+ private def annotate_axes(backend)
73
+ xlabel = self.variables[:x]
74
+ ylabel = self.variables[:y]
75
+ backend.set_xlabel(xlabel) unless xlabel.nil?
76
+ backend.set_ylabel(ylabel) unless ylabel.nil?
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,6 @@
1
+ module Charty
2
+ module Plotters
3
+ class VectorPlotter < AbstractPlotter
4
+ end
5
+ end
6
+ end
@@ -7,8 +7,12 @@ module Charty
7
7
  enum.mean
8
8
  end
9
9
 
10
- def self.stdev(enum)
11
- enum.stdev
10
+ def self.stdev(enum, population: false)
11
+ enum.stdev(population: population)
12
+ end
13
+
14
+ def self.histogram(ary, *args, **kwargs)
15
+ ary.histogram(*args, **kwargs)
12
16
  end
13
17
  rescue LoadError
14
18
  def self.mean(enum)
@@ -24,6 +28,96 @@ module Charty
24
28
  var = xs.map {|x| (x - mean)**2 }.sum / (n - ddof)
25
29
  Math.sqrt(var)
26
30
  end
31
+
32
+ def self.histogram(ary, *args, **kwargs)
33
+ raise NotImplementedError,
34
+ "histogram is currently supported only with enumerable-statistics"
35
+ end
36
+ end
37
+
38
+ def self.bootstrap(vector, n_boot: 2000, func: :mean, units: nil, random: nil)
39
+ n = vector.size
40
+ random = Charty::Plotters::RandomSupport.check_random(random)
41
+ func = Charty::Plotters::EstimationSupport.check_estimator(func)
42
+
43
+ if units
44
+ return structured_bootstrap(vector, n_boot, units, func, random)
45
+ end
46
+
47
+ if defined?(Pandas::Series) || defined?(Numpy::NDArray)
48
+ boot_dist = bootstrap_optimized_for_pycall(vector, n_boot, random, func)
49
+ return boot_dist if boot_dist
50
+ end
51
+
52
+ boot_dist = Array.new(n_boot) do |i|
53
+ resampler = Array.new(n) { random.rand(n) }
54
+
55
+ w ||= vector.values_at(*resampler)
56
+
57
+ case func
58
+ when :mean
59
+ mean(w)
60
+ end
61
+ end
62
+
63
+ boot_dist
64
+ end
65
+
66
+ private_class_method def self.bootstrap_optimized_for_pycall(vector, n_boot, random, func)
67
+ case
68
+ when vector.is_a?(Charty::Vector)
69
+ bootstrap_optimized_for_pycall(vector.data, n_boot, random, func)
70
+
71
+ when defined?(Pandas::Series) && vector.is_a?(Pandas::Series) || vector.is_a?(Numpy::NDArray)
72
+ # numpy is also available when pandas is available
73
+ n = vector.size
74
+ resampler = Numpy.empty(n, dtype: Numpy.intp)
75
+ Array.new(n_boot) do |i|
76
+ # TODO: Use Numo and MemoryView to reduce execution time
77
+ # resampler = Numo::Int64.new(n).rand(n)
78
+ # w = Numpy.take(vector, resampler)
79
+ n.times {|i| resampler[i] = random.rand(n) }
80
+ w = vector.take(resampler)
81
+
82
+ case func
83
+ when :mean
84
+ w.mean
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ private_class_method def self.structured_bootstrap(vector, n_boot, units, func, random)
91
+ raise NotImplementedError,
92
+ "structured bootstrapping has not been supported yet"
93
+ end
94
+
95
+ def self.bootstrap_ci(*vectors, width, n_boot: 2000, func: :mean, units: nil, random: nil)
96
+ boot = bootstrap(*vectors, n_boot: n_boot, func: func, units: units, random: random)
97
+ q = [50 - width / 2, 50 + width / 2]
98
+ if boot.respond_to?(:percentile)
99
+ boot.percentile(q)
100
+ else
101
+ percentile(boot, q)
102
+ end
103
+ end
104
+
105
+ # TODO: optimize with introselect algorithm
106
+ def self.percentile(a, q)
107
+ return mean(a) if a.size == 0
108
+
109
+ a = a.sort
110
+ n = a.size
111
+ q.map do |x|
112
+ x = n * (x / 100.0)
113
+ i = x.floor
114
+ if i == n-1
115
+ a[i]
116
+ else
117
+ t = x - i
118
+ (1-t)*a[i] + t*a[i+1]
119
+ end
120
+ end
27
121
  end
28
122
  end
29
123
  end
data/lib/charty/table.rb CHANGED
@@ -21,34 +21,67 @@ module Charty
21
21
  else
22
22
  @adapter = adapter_class.new(data, **kwargs)
23
23
  end
24
+
25
+ @column_cache = {}
24
26
  end
25
27
 
26
28
  attr_reader :adapter
27
29
 
30
+ def_delegators :adapter, :length, :column_length
31
+
32
+ def_delegators :adapter, :columns, :columns=
33
+ def_delegators :adapter, :index, :index=
34
+
28
35
  def_delegator :@adapter, :column_names
36
+
37
+ def column?(name)
38
+ return true if column_names.include?(name)
39
+
40
+ case name
41
+ when String
42
+ column_names.include?(name.to_sym)
43
+ when Symbol
44
+ column_names.include?(name.to_s)
45
+ else
46
+ false
47
+ end
48
+ end
49
+
29
50
  def_delegator :@adapter, :data, :raw_data
30
51
 
31
- def columns
32
- @column_accessor ||= ColumnAccessor.new(@adapter)
52
+ def ==(other)
53
+ return true if equal?(other)
54
+
55
+ case other
56
+ when Charty::Table
57
+ adapter == other.adapter
58
+ else
59
+ super
60
+ end
61
+ end
62
+
63
+ def empty?
64
+ length == 0
33
65
  end
34
66
 
35
- def [](*args)
36
- n_args = args.length
37
- case n_args
38
- when 1
39
- row = nil
40
- column = args[0]
41
- @adapter[row, column]
42
- when 2
43
- row = args[0]
44
- column = args[1]
45
- @adapter[row, column]
67
+ def [](key)
68
+ key = case key
69
+ when Symbol
70
+ key
71
+ else
72
+ String.try_convert(key).to_sym
73
+ end
74
+ if @column_cache.key?(key)
75
+ @column_cache[key]
46
76
  else
47
- message = "wrong number of arguments (given #{n_args}, expected 1..2)"
48
- raise ArgumentError, message
77
+ @column_cache[key] = @adapter[nil, key]
49
78
  end
50
79
  end
51
80
 
81
+ def group_by(grouper, sort: true, drop_na: true)
82
+ adapter.group_by(self, grouper, sort, drop_na)
83
+ end
84
+
52
85
  def to_a(x=nil, y=nil, z=nil)
53
86
  case
54
87
  when defined?(Daru::DataFrame) && table.kind_of?(Daru::DataFrame)
@@ -81,5 +114,117 @@ module Charty
81
114
  i += 1
82
115
  end
83
116
  end
117
+
118
+ def drop_na
119
+ @adapter.drop_na || self
120
+ end
121
+
122
+ def_delegator :adapter, :sort_values
123
+
124
+ def_delegator :adapter, :reset_index
125
+
126
+ class GroupByBase
127
+ end
128
+
129
+ class HashGroupBy < GroupByBase
130
+ def initialize(table, grouper, sort, drop_na)
131
+ @table = table
132
+ @grouper = check_grouper(grouper)
133
+ init_groups(sort, drop_na)
134
+ end
135
+
136
+ private def check_grouper(grouper)
137
+ case grouper
138
+ when Symbol, String, Array
139
+ # TODO check column existence
140
+ return grouper
141
+ when Charty::Vector
142
+ if @table.length != grouper.length
143
+ raise ArgumentError,
144
+ "Wrong number of items in grouper array " +
145
+ "(%p for %p)" % [val.length, @table.length]
146
+ end
147
+ return grouper
148
+ when ->(x) { x.respond_to?(:call) }
149
+ raise NotImplementedError,
150
+ "A callable grouper is unsupported"
151
+ else
152
+ raise ArgumentError,
153
+ "Unable to recognize the value for `grouper`: %p" % val
154
+ end
155
+ end
156
+
157
+ private def init_groups(sort, drop_na)
158
+ case @grouper
159
+ when Symbol, String
160
+ column = @table[@grouper]
161
+ @indices = (0 ... @table.length).group_by do |i|
162
+ column.data[i]
163
+ end
164
+ when Array
165
+ @indices = (0 ... @table.length).group_by { |i|
166
+ @grouper.map {|j| @table[j].data[i] }
167
+ }
168
+ when Charty::Vector
169
+ @indices = (0 ... @table.length).group_by do |i|
170
+ @grouper.data[i]
171
+ end
172
+ end
173
+
174
+ if drop_na
175
+ case @grouper
176
+ when Array
177
+ @indices.reject! {|key, | key.any? {|k| Util.missing?(k) } }
178
+ else
179
+ @indices.reject! {|key, | Util.missing?(key) }
180
+ end
181
+ end
182
+
183
+ if sort
184
+ @indices = @indices.sort_by {|key, | key }.to_h
185
+ end
186
+ end
187
+
188
+ def indices
189
+ @indices.dup
190
+ end
191
+
192
+ def group_keys
193
+ @indices.keys
194
+ end
195
+
196
+ def each_group_key(&block)
197
+ @indices.each_key(&block)
198
+ end
199
+
200
+ def apply(*args, &block)
201
+ Charty::Table.new(
202
+ each_group.map { |_key, table|
203
+ block.call(table, *args)
204
+ },
205
+ index: Charty::Index.new(@indices.keys, name: @grouper)
206
+ )
207
+ end
208
+
209
+ def each_group
210
+ return enum_for(__method__) unless block_given?
211
+
212
+ @indices.each_key do |key|
213
+ yield(key, self[key])
214
+ end
215
+ end
216
+
217
+ def [](key)
218
+ return nil unless @indices.key?(key)
219
+
220
+ index = @indices[key]
221
+ Charty::Table.new(
222
+ @table.column_names.map {|col|
223
+ [col, @table[col].values_at(*index)]
224
+ }.to_h,
225
+ index: index
226
+ )
227
+ end
228
+ end
84
229
  end
85
230
  end
@@ -15,9 +15,11 @@ module Charty
15
15
  end
16
16
  end
17
17
 
18
+ require_relative 'table_adapters/base_adapter'
18
19
  require_relative 'table_adapters/hash_adapter'
19
20
  require_relative 'table_adapters/narray_adapter'
20
21
  require_relative 'table_adapters/datasets_adapter'
21
22
  require_relative 'table_adapters/daru_adapter'
22
23
  require_relative 'table_adapters/active_record_adapter'
23
24
  require_relative 'table_adapters/nmatrix_adapter'
25
+ require_relative 'table_adapters/pandas_adapter'
@@ -1,10 +1,8 @@
1
1
  module Charty
2
2
  module TableAdapters
3
- class ActiveRecordAdapter
3
+ class ActiveRecordAdapter < BaseAdapter
4
4
  TableAdapters.register(:active_record, self)
5
5
 
6
- include Enumerable
7
-
8
6
  def self.supported?(data)
9
7
  defined?(ActiveRecord::Relation) && data.is_a?(ActiveRecord::Relation)
10
8
  end
@@ -12,17 +10,27 @@ module Charty
12
10
  def initialize(data)
13
11
  @data = check_type(ActiveRecord::Relation, data, :data)
14
12
  @column_names = @data.column_names.freeze
15
- @columns = nil
13
+ self.columns = Index.new(@column_names)
14
+ self.index = RangeIndex.new(0 ... length)
16
15
  end
17
16
 
18
- attr_reader :column_names, :data
17
+ attr_reader :data, :column_names
18
+
19
+ def_delegators :data, :size
20
+
21
+ alias length size
22
+
23
+ def column_length
24
+ column_names.length
25
+ end
19
26
 
20
27
  def [](row, column)
21
- fetch_records unless @columns
28
+ fetch_records unless @columns_cache
22
29
  if row
23
- @columns[resolve_column_index(column)][row]
30
+ @columns_cache[resolve_column_index(column)][row]
24
31
  else
25
- @columns[resolve_column_index(column)]
32
+ column_data = @columns_cache[resolve_column_index(column)]
33
+ Vector.new(column_data, index: index, name: column)
26
34
  end
27
35
  end
28
36
 
@@ -43,7 +51,7 @@ module Charty
43
51
  end
44
52
 
45
53
  private def fetch_records
46
- @columns = @data.pluck(*column_names).transpose
54
+ @columns_cache = @data.pluck(*column_names).transpose
47
55
  end
48
56
 
49
57
  private def check_type(type, data, name)