charty 0.2.1 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +56 -23
  3. data/.github/workflows/nmatrix.yml +67 -0
  4. data/.github/workflows/pycall.yml +86 -0
  5. data/Dockerfile.dev +9 -1
  6. data/Gemfile +18 -0
  7. data/README.md +177 -9
  8. data/Rakefile +4 -5
  9. data/charty.gemspec +10 -5
  10. data/examples/palette.rb +1 -1
  11. data/examples/sample_images/hist_gruff.png +0 -0
  12. data/images/penguins_body_mass_g_flipper_length_mm_scatter_plot.png +0 -0
  13. data/images/penguins_body_mass_g_flipper_length_mm_species_scatter_plot.png +0 -0
  14. data/images/penguins_body_mass_g_flipper_length_mm_species_sex_scatter_plot.png +0 -0
  15. data/images/penguins_species_body_mass_g_bar_plot_h.png +0 -0
  16. data/images/penguins_species_body_mass_g_bar_plot_v.png +0 -0
  17. data/images/penguins_species_body_mass_g_box_plot_h.png +0 -0
  18. data/images/penguins_species_body_mass_g_box_plot_v.png +0 -0
  19. data/images/penguins_species_body_mass_g_sex_bar_plot_v.png +0 -0
  20. data/images/penguins_species_body_mass_g_sex_box_plot_v.png +0 -0
  21. data/lib/charty.rb +9 -2
  22. data/lib/charty/backends.rb +1 -0
  23. data/lib/charty/backends/bokeh.rb +2 -2
  24. data/lib/charty/backends/google_charts.rb +1 -1
  25. data/lib/charty/backends/gruff.rb +14 -3
  26. data/lib/charty/backends/plotly.rb +731 -32
  27. data/lib/charty/backends/plotly_helpers/html_renderer.rb +203 -0
  28. data/lib/charty/backends/plotly_helpers/notebook_renderer.rb +86 -0
  29. data/lib/charty/backends/plotly_helpers/plotly_renderer.rb +121 -0
  30. data/lib/charty/backends/pyplot.rb +515 -67
  31. data/lib/charty/backends/rubyplot.rb +1 -1
  32. data/lib/charty/backends/unicode_plot.rb +79 -0
  33. data/lib/charty/cache_dir.rb +27 -0
  34. data/lib/charty/dash_pattern_generator.rb +57 -0
  35. data/lib/charty/index.rb +213 -0
  36. data/lib/charty/iruby_helper.rb +18 -0
  37. data/lib/charty/linspace.rb +1 -1
  38. data/lib/charty/plot_methods.rb +283 -8
  39. data/lib/charty/plotter.rb +2 -2
  40. data/lib/charty/plotters.rb +11 -0
  41. data/lib/charty/plotters/abstract_plotter.rb +188 -18
  42. data/lib/charty/plotters/bar_plotter.rb +189 -7
  43. data/lib/charty/plotters/box_plotter.rb +64 -11
  44. data/lib/charty/plotters/categorical_plotter.rb +272 -40
  45. data/lib/charty/plotters/count_plotter.rb +7 -0
  46. data/lib/charty/plotters/distribution_plotter.rb +143 -0
  47. data/lib/charty/plotters/estimation_support.rb +84 -0
  48. data/lib/charty/plotters/histogram_plotter.rb +182 -0
  49. data/lib/charty/plotters/line_plotter.rb +300 -0
  50. data/lib/charty/plotters/random_support.rb +25 -0
  51. data/lib/charty/plotters/relational_plotter.rb +635 -0
  52. data/lib/charty/plotters/scatter_plotter.rb +80 -0
  53. data/lib/charty/plotters/vector_plotter.rb +6 -0
  54. data/lib/charty/statistics.rb +96 -2
  55. data/lib/charty/table.rb +160 -15
  56. data/lib/charty/table_adapters.rb +2 -0
  57. data/lib/charty/table_adapters/active_record_adapter.rb +17 -9
  58. data/lib/charty/table_adapters/base_adapter.rb +166 -0
  59. data/lib/charty/table_adapters/daru_adapter.rb +39 -3
  60. data/lib/charty/table_adapters/datasets_adapter.rb +13 -2
  61. data/lib/charty/table_adapters/hash_adapter.rb +141 -16
  62. data/lib/charty/table_adapters/narray_adapter.rb +25 -6
  63. data/lib/charty/table_adapters/nmatrix_adapter.rb +15 -5
  64. data/lib/charty/table_adapters/pandas_adapter.rb +163 -0
  65. data/lib/charty/util.rb +28 -0
  66. data/lib/charty/vector.rb +69 -0
  67. data/lib/charty/vector_adapters.rb +187 -0
  68. data/lib/charty/vector_adapters/array_adapter.rb +101 -0
  69. data/lib/charty/vector_adapters/daru_adapter.rb +163 -0
  70. data/lib/charty/vector_adapters/narray_adapter.rb +182 -0
  71. data/lib/charty/vector_adapters/nmatrix_adapter.rb +37 -0
  72. data/lib/charty/vector_adapters/numpy_adapter.rb +168 -0
  73. data/lib/charty/vector_adapters/pandas_adapter.rb +199 -0
  74. data/lib/charty/version.rb +1 -1
  75. metadata +105 -24
  76. data/lib/charty/palette.rb +0 -235
@@ -0,0 +1,80 @@
1
+ module Charty
2
+ module Plotters
3
+ class ScatterPlotter < RelationalPlotter
4
+ def initialize(data: nil, variables: {}, **options, &block)
5
+ x, y, color, style, size = variables.values_at(:x, :y, :color, :style, :size)
6
+ super(x, y, color, style, size, data: data, **options, &block)
7
+ end
8
+
9
+ attr_reader :alpha
10
+
11
+ def alpha=(val)
12
+ case val
13
+ when nil, :auto, 0..1
14
+ @alpha = val
15
+ when "auto"
16
+ @alpha = val.to_sym
17
+ when Numeric
18
+ raise ArgumentError,
19
+ "the given alpha is out of bounds " +
20
+ "(%p for nil, :auto, or number 0..1)" % val
21
+ else
22
+ raise ArgumentError,
23
+ "invalid value of alpha " +
24
+ "(%p for nil, :auto, or number in 0..1)" % val
25
+ end
26
+ end
27
+
28
+ attr_reader :line_width, :edge_color
29
+
30
+ def line_width=(val)
31
+ @line_width = check_number(val, :line_width, allow_nil: true)
32
+ end
33
+
34
+ def edge_color=(val)
35
+ @line_width = check_color(val, :edge_color, allow_nil: true)
36
+ end
37
+
38
+ private def render_plot(backend, **)
39
+ draw_points(backend)
40
+ annotate_axes(backend)
41
+ end
42
+
43
+ private def draw_points(backend)
44
+ map_color(palette: palette, order: color_order, norm: color_norm)
45
+ map_size(sizes: sizes, order: size_order, norm: size_norm)
46
+ map_style(markers: markers, order: style_order)
47
+
48
+ data = @plot_data.drop_na
49
+
50
+ # TODO: shold pass key_color to backend's scatter method.
51
+ # In pyplot backend, it is passed as color parameter.
52
+
53
+ x = data[:x]
54
+ y = data[:y]
55
+ color = data[:color] if @variables.key?(:color)
56
+ style = data[:style] if @variables.key?(:style)
57
+ size = data[:size] if @variables.key?(:size)
58
+
59
+ # TODO: key_color
60
+ backend.scatter(
61
+ x, y, @variables,
62
+ color: color, color_mapper: @color_mapper,
63
+ style: style, style_mapper: @style_mapper,
64
+ size: size, size_mapper: @size_mapper
65
+ )
66
+
67
+ if legend
68
+ backend.add_scatter_plot_legend(@variables, @color_mapper, @size_mapper, @style_mapper, legend)
69
+ end
70
+ end
71
+
72
+ private def annotate_axes(backend)
73
+ xlabel = self.variables[:x]
74
+ ylabel = self.variables[:y]
75
+ backend.set_xlabel(xlabel) unless xlabel.nil?
76
+ backend.set_ylabel(ylabel) unless ylabel.nil?
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,6 @@
1
+ module Charty
2
+ module Plotters
3
+ class VectorPlotter < AbstractPlotter
4
+ end
5
+ end
6
+ end
@@ -7,8 +7,12 @@ module Charty
7
7
  enum.mean
8
8
  end
9
9
 
10
- def self.stdev(enum)
11
- enum.stdev
10
+ def self.stdev(enum, population: false)
11
+ enum.stdev(population: population)
12
+ end
13
+
14
+ def self.histogram(ary, *args, **kwargs)
15
+ ary.histogram(*args, **kwargs)
12
16
  end
13
17
  rescue LoadError
14
18
  def self.mean(enum)
@@ -24,6 +28,96 @@ module Charty
24
28
  var = xs.map {|x| (x - mean)**2 }.sum / (n - ddof)
25
29
  Math.sqrt(var)
26
30
  end
31
+
32
+ def self.histogram(ary, *args, **kwargs)
33
+ raise NotImplementedError,
34
+ "histogram is currently supported only with enumerable-statistics"
35
+ end
36
+ end
37
+
38
+ def self.bootstrap(vector, n_boot: 2000, func: :mean, units: nil, random: nil)
39
+ n = vector.size
40
+ random = Charty::Plotters::RandomSupport.check_random(random)
41
+ func = Charty::Plotters::EstimationSupport.check_estimator(func)
42
+
43
+ if units
44
+ return structured_bootstrap(vector, n_boot, units, func, random)
45
+ end
46
+
47
+ if defined?(Pandas::Series) || defined?(Numpy::NDArray)
48
+ boot_dist = bootstrap_optimized_for_pycall(vector, n_boot, random, func)
49
+ return boot_dist if boot_dist
50
+ end
51
+
52
+ boot_dist = Array.new(n_boot) do |i|
53
+ resampler = Array.new(n) { random.rand(n) }
54
+
55
+ w ||= vector.values_at(*resampler)
56
+
57
+ case func
58
+ when :mean
59
+ mean(w)
60
+ end
61
+ end
62
+
63
+ boot_dist
64
+ end
65
+
66
+ private_class_method def self.bootstrap_optimized_for_pycall(vector, n_boot, random, func)
67
+ case
68
+ when vector.is_a?(Charty::Vector)
69
+ bootstrap_optimized_for_pycall(vector.data, n_boot, random, func)
70
+
71
+ when defined?(Pandas::Series) && vector.is_a?(Pandas::Series) || vector.is_a?(Numpy::NDArray)
72
+ # numpy is also available when pandas is available
73
+ n = vector.size
74
+ resampler = Numpy.empty(n, dtype: Numpy.intp)
75
+ Array.new(n_boot) do |i|
76
+ # TODO: Use Numo and MemoryView to reduce execution time
77
+ # resampler = Numo::Int64.new(n).rand(n)
78
+ # w = Numpy.take(vector, resampler)
79
+ n.times {|i| resampler[i] = random.rand(n) }
80
+ w = vector.take(resampler)
81
+
82
+ case func
83
+ when :mean
84
+ w.mean
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ private_class_method def self.structured_bootstrap(vector, n_boot, units, func, random)
91
+ raise NotImplementedError,
92
+ "structured bootstrapping has not been supported yet"
93
+ end
94
+
95
+ def self.bootstrap_ci(*vectors, width, n_boot: 2000, func: :mean, units: nil, random: nil)
96
+ boot = bootstrap(*vectors, n_boot: n_boot, func: func, units: units, random: random)
97
+ q = [50 - width / 2, 50 + width / 2]
98
+ if boot.respond_to?(:percentile)
99
+ boot.percentile(q)
100
+ else
101
+ percentile(boot, q)
102
+ end
103
+ end
104
+
105
+ # TODO: optimize with introselect algorithm
106
+ def self.percentile(a, q)
107
+ return mean(a) if a.size == 0
108
+
109
+ a = a.sort
110
+ n = a.size
111
+ q.map do |x|
112
+ x = n * (x / 100.0)
113
+ i = x.floor
114
+ if i == n-1
115
+ a[i]
116
+ else
117
+ t = x - i
118
+ (1-t)*a[i] + t*a[i+1]
119
+ end
120
+ end
27
121
  end
28
122
  end
29
123
  end
data/lib/charty/table.rb CHANGED
@@ -21,34 +21,67 @@ module Charty
21
21
  else
22
22
  @adapter = adapter_class.new(data, **kwargs)
23
23
  end
24
+
25
+ @column_cache = {}
24
26
  end
25
27
 
26
28
  attr_reader :adapter
27
29
 
30
+ def_delegators :adapter, :length, :column_length
31
+
32
+ def_delegators :adapter, :columns, :columns=
33
+ def_delegators :adapter, :index, :index=
34
+
28
35
  def_delegator :@adapter, :column_names
36
+
37
+ def column?(name)
38
+ return true if column_names.include?(name)
39
+
40
+ case name
41
+ when String
42
+ column_names.include?(name.to_sym)
43
+ when Symbol
44
+ column_names.include?(name.to_s)
45
+ else
46
+ false
47
+ end
48
+ end
49
+
29
50
  def_delegator :@adapter, :data, :raw_data
30
51
 
31
- def columns
32
- @column_accessor ||= ColumnAccessor.new(@adapter)
52
+ def ==(other)
53
+ return true if equal?(other)
54
+
55
+ case other
56
+ when Charty::Table
57
+ adapter == other.adapter
58
+ else
59
+ super
60
+ end
61
+ end
62
+
63
+ def empty?
64
+ length == 0
33
65
  end
34
66
 
35
- def [](*args)
36
- n_args = args.length
37
- case n_args
38
- when 1
39
- row = nil
40
- column = args[0]
41
- @adapter[row, column]
42
- when 2
43
- row = args[0]
44
- column = args[1]
45
- @adapter[row, column]
67
+ def [](key)
68
+ key = case key
69
+ when Symbol
70
+ key
71
+ else
72
+ String.try_convert(key).to_sym
73
+ end
74
+ if @column_cache.key?(key)
75
+ @column_cache[key]
46
76
  else
47
- message = "wrong number of arguments (given #{n_args}, expected 1..2)"
48
- raise ArgumentError, message
77
+ @column_cache[key] = @adapter[nil, key]
49
78
  end
50
79
  end
51
80
 
81
+ def group_by(grouper, sort: true, drop_na: true)
82
+ adapter.group_by(self, grouper, sort, drop_na)
83
+ end
84
+
52
85
  def to_a(x=nil, y=nil, z=nil)
53
86
  case
54
87
  when defined?(Daru::DataFrame) && table.kind_of?(Daru::DataFrame)
@@ -81,5 +114,117 @@ module Charty
81
114
  i += 1
82
115
  end
83
116
  end
117
+
118
+ def drop_na
119
+ @adapter.drop_na || self
120
+ end
121
+
122
+ def_delegator :adapter, :sort_values
123
+
124
+ def_delegator :adapter, :reset_index
125
+
126
+ class GroupByBase
127
+ end
128
+
129
+ class HashGroupBy < GroupByBase
130
+ def initialize(table, grouper, sort, drop_na)
131
+ @table = table
132
+ @grouper = check_grouper(grouper)
133
+ init_groups(sort, drop_na)
134
+ end
135
+
136
+ private def check_grouper(grouper)
137
+ case grouper
138
+ when Symbol, String, Array
139
+ # TODO check column existence
140
+ return grouper
141
+ when Charty::Vector
142
+ if @table.length != grouper.length
143
+ raise ArgumentError,
144
+ "Wrong number of items in grouper array " +
145
+ "(%p for %p)" % [val.length, @table.length]
146
+ end
147
+ return grouper
148
+ when ->(x) { x.respond_to?(:call) }
149
+ raise NotImplementedError,
150
+ "A callable grouper is unsupported"
151
+ else
152
+ raise ArgumentError,
153
+ "Unable to recognize the value for `grouper`: %p" % val
154
+ end
155
+ end
156
+
157
+ private def init_groups(sort, drop_na)
158
+ case @grouper
159
+ when Symbol, String
160
+ column = @table[@grouper]
161
+ @indices = (0 ... @table.length).group_by do |i|
162
+ column.data[i]
163
+ end
164
+ when Array
165
+ @indices = (0 ... @table.length).group_by { |i|
166
+ @grouper.map {|j| @table[j].data[i] }
167
+ }
168
+ when Charty::Vector
169
+ @indices = (0 ... @table.length).group_by do |i|
170
+ @grouper.data[i]
171
+ end
172
+ end
173
+
174
+ if drop_na
175
+ case @grouper
176
+ when Array
177
+ @indices.reject! {|key, | key.any? {|k| Util.missing?(k) } }
178
+ else
179
+ @indices.reject! {|key, | Util.missing?(key) }
180
+ end
181
+ end
182
+
183
+ if sort
184
+ @indices = @indices.sort_by {|key, | key }.to_h
185
+ end
186
+ end
187
+
188
+ def indices
189
+ @indices.dup
190
+ end
191
+
192
+ def group_keys
193
+ @indices.keys
194
+ end
195
+
196
+ def each_group_key(&block)
197
+ @indices.each_key(&block)
198
+ end
199
+
200
+ def apply(*args, &block)
201
+ Charty::Table.new(
202
+ each_group.map { |_key, table|
203
+ block.call(table, *args)
204
+ },
205
+ index: Charty::Index.new(@indices.keys, name: @grouper)
206
+ )
207
+ end
208
+
209
+ def each_group
210
+ return enum_for(__method__) unless block_given?
211
+
212
+ @indices.each_key do |key|
213
+ yield(key, self[key])
214
+ end
215
+ end
216
+
217
+ def [](key)
218
+ return nil unless @indices.key?(key)
219
+
220
+ index = @indices[key]
221
+ Charty::Table.new(
222
+ @table.column_names.map {|col|
223
+ [col, @table[col].values_at(*index)]
224
+ }.to_h,
225
+ index: index
226
+ )
227
+ end
228
+ end
84
229
  end
85
230
  end
@@ -15,9 +15,11 @@ module Charty
15
15
  end
16
16
  end
17
17
 
18
+ require_relative 'table_adapters/base_adapter'
18
19
  require_relative 'table_adapters/hash_adapter'
19
20
  require_relative 'table_adapters/narray_adapter'
20
21
  require_relative 'table_adapters/datasets_adapter'
21
22
  require_relative 'table_adapters/daru_adapter'
22
23
  require_relative 'table_adapters/active_record_adapter'
23
24
  require_relative 'table_adapters/nmatrix_adapter'
25
+ require_relative 'table_adapters/pandas_adapter'
@@ -1,10 +1,8 @@
1
1
  module Charty
2
2
  module TableAdapters
3
- class ActiveRecordAdapter
3
+ class ActiveRecordAdapter < BaseAdapter
4
4
  TableAdapters.register(:active_record, self)
5
5
 
6
- include Enumerable
7
-
8
6
  def self.supported?(data)
9
7
  defined?(ActiveRecord::Relation) && data.is_a?(ActiveRecord::Relation)
10
8
  end
@@ -12,17 +10,27 @@ module Charty
12
10
  def initialize(data)
13
11
  @data = check_type(ActiveRecord::Relation, data, :data)
14
12
  @column_names = @data.column_names.freeze
15
- @columns = nil
13
+ self.columns = Index.new(@column_names)
14
+ self.index = RangeIndex.new(0 ... length)
16
15
  end
17
16
 
18
- attr_reader :column_names, :data
17
+ attr_reader :data, :column_names
18
+
19
+ def_delegators :data, :size
20
+
21
+ alias length size
22
+
23
+ def column_length
24
+ column_names.length
25
+ end
19
26
 
20
27
  def [](row, column)
21
- fetch_records unless @columns
28
+ fetch_records unless @columns_cache
22
29
  if row
23
- @columns[resolve_column_index(column)][row]
30
+ @columns_cache[resolve_column_index(column)][row]
24
31
  else
25
- @columns[resolve_column_index(column)]
32
+ column_data = @columns_cache[resolve_column_index(column)]
33
+ Vector.new(column_data, index: index, name: column)
26
34
  end
27
35
  end
28
36
 
@@ -43,7 +51,7 @@ module Charty
43
51
  end
44
52
 
45
53
  private def fetch_records
46
- @columns = @data.pluck(*column_names).transpose
54
+ @columns_cache = @data.pluck(*column_names).transpose
47
55
  end
48
56
 
49
57
  private def check_type(type, data, name)