charty 0.2.0 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +71 -0
  3. data/.github/workflows/nmatrix.yml +67 -0
  4. data/.github/workflows/pycall.yml +86 -0
  5. data/Dockerfile.dev +9 -1
  6. data/Gemfile +18 -0
  7. data/README.md +177 -9
  8. data/Rakefile +4 -5
  9. data/charty.gemspec +10 -4
  10. data/examples/Gemfile +1 -0
  11. data/examples/active_record.ipynb +1 -1
  12. data/examples/daru.ipynb +1 -1
  13. data/examples/iris_dataset.ipynb +1 -1
  14. data/examples/nmatrix.ipynb +1 -1
  15. data/examples/{numo-narray.ipynb → numo_narray.ipynb} +1 -1
  16. data/examples/palette.rb +71 -0
  17. data/examples/sample.png +0 -0
  18. data/examples/sample_images/hist_gruff.png +0 -0
  19. data/examples/sample_pyplot.ipynb +40 -38
  20. data/images/penguins_body_mass_g_flipper_length_mm_scatter_plot.png +0 -0
  21. data/images/penguins_body_mass_g_flipper_length_mm_species_scatter_plot.png +0 -0
  22. data/images/penguins_body_mass_g_flipper_length_mm_species_sex_scatter_plot.png +0 -0
  23. data/images/penguins_species_body_mass_g_bar_plot_h.png +0 -0
  24. data/images/penguins_species_body_mass_g_bar_plot_v.png +0 -0
  25. data/images/penguins_species_body_mass_g_box_plot_h.png +0 -0
  26. data/images/penguins_species_body_mass_g_box_plot_v.png +0 -0
  27. data/images/penguins_species_body_mass_g_sex_bar_plot_v.png +0 -0
  28. data/images/penguins_species_body_mass_g_sex_box_plot_v.png +0 -0
  29. data/lib/charty.rb +13 -1
  30. data/lib/charty/backend_methods.rb +8 -0
  31. data/lib/charty/backends.rb +26 -1
  32. data/lib/charty/backends/bokeh.rb +31 -31
  33. data/lib/charty/backends/{google_chart.rb → google_charts.rb} +75 -33
  34. data/lib/charty/backends/gruff.rb +14 -3
  35. data/lib/charty/backends/plotly.rb +774 -9
  36. data/lib/charty/backends/pyplot.rb +611 -34
  37. data/lib/charty/backends/rubyplot.rb +2 -2
  38. data/lib/charty/backends/unicode_plot.rb +79 -0
  39. data/lib/charty/dash_pattern_generator.rb +57 -0
  40. data/lib/charty/index.rb +213 -0
  41. data/lib/charty/linspace.rb +1 -1
  42. data/lib/charty/plot_methods.rb +254 -0
  43. data/lib/charty/plotter.rb +10 -10
  44. data/lib/charty/plotters.rb +12 -0
  45. data/lib/charty/plotters/abstract_plotter.rb +243 -0
  46. data/lib/charty/plotters/bar_plotter.rb +201 -0
  47. data/lib/charty/plotters/box_plotter.rb +79 -0
  48. data/lib/charty/plotters/categorical_plotter.rb +380 -0
  49. data/lib/charty/plotters/count_plotter.rb +7 -0
  50. data/lib/charty/plotters/estimation_support.rb +84 -0
  51. data/lib/charty/plotters/line_plotter.rb +300 -0
  52. data/lib/charty/plotters/random_support.rb +25 -0
  53. data/lib/charty/plotters/relational_plotter.rb +635 -0
  54. data/lib/charty/plotters/scatter_plotter.rb +80 -0
  55. data/lib/charty/plotters/vector_plotter.rb +6 -0
  56. data/lib/charty/statistics.rb +114 -0
  57. data/lib/charty/table.rb +161 -15
  58. data/lib/charty/table_adapters.rb +2 -0
  59. data/lib/charty/table_adapters/active_record_adapter.rb +17 -9
  60. data/lib/charty/table_adapters/base_adapter.rb +166 -0
  61. data/lib/charty/table_adapters/daru_adapter.rb +41 -3
  62. data/lib/charty/table_adapters/datasets_adapter.rb +17 -2
  63. data/lib/charty/table_adapters/hash_adapter.rb +143 -16
  64. data/lib/charty/table_adapters/narray_adapter.rb +25 -6
  65. data/lib/charty/table_adapters/nmatrix_adapter.rb +15 -5
  66. data/lib/charty/table_adapters/pandas_adapter.rb +163 -0
  67. data/lib/charty/util.rb +28 -0
  68. data/lib/charty/vector.rb +69 -0
  69. data/lib/charty/vector_adapters.rb +187 -0
  70. data/lib/charty/vector_adapters/array_adapter.rb +101 -0
  71. data/lib/charty/vector_adapters/daru_adapter.rb +163 -0
  72. data/lib/charty/vector_adapters/narray_adapter.rb +182 -0
  73. data/lib/charty/vector_adapters/nmatrix_adapter.rb +37 -0
  74. data/lib/charty/vector_adapters/numpy_adapter.rb +168 -0
  75. data/lib/charty/vector_adapters/pandas_adapter.rb +199 -0
  76. data/lib/charty/version.rb +1 -1
  77. metadata +121 -22
  78. data/.travis.yml +0 -10
@@ -0,0 +1,80 @@
1
+ module Charty
2
+ module Plotters
3
+ class ScatterPlotter < RelationalPlotter
4
+ def initialize(data: nil, variables: {}, **options, &block)
5
+ x, y, color, style, size = variables.values_at(:x, :y, :color, :style, :size)
6
+ super(x, y, color, style, size, data: data, **options, &block)
7
+ end
8
+
9
+ attr_reader :alpha
10
+
11
+ def alpha=(val)
12
+ case val
13
+ when nil, :auto, 0..1
14
+ @alpha = val
15
+ when "auto"
16
+ @alpha = val.to_sym
17
+ when Numeric
18
+ raise ArgumentError,
19
+ "the given alpha is out of bounds " +
20
+ "(%p for nil, :auto, or number 0..1)" % val
21
+ else
22
+ raise ArgumentError,
23
+ "invalid value of alpha " +
24
+ "(%p for nil, :auto, or number in 0..1)" % val
25
+ end
26
+ end
27
+
28
+ attr_reader :line_width, :edge_color
29
+
30
+ def line_width=(val)
31
+ @line_width = check_number(val, :line_width, allow_nil: true)
32
+ end
33
+
34
+ def edge_color=(val)
35
+ @line_width = check_color(val, :edge_color, allow_nil: true)
36
+ end
37
+
38
+ private def render_plot(backend, **)
39
+ draw_points(backend)
40
+ annotate_axes(backend)
41
+ end
42
+
43
+ private def draw_points(backend)
44
+ map_color(palette: palette, order: color_order, norm: color_norm)
45
+ map_size(sizes: sizes, order: size_order, norm: size_norm)
46
+ map_style(markers: markers, order: style_order)
47
+
48
+ data = @plot_data.drop_na
49
+
50
+ # TODO: shold pass key_color to backend's scatter method.
51
+ # In pyplot backend, it is passed as color parameter.
52
+
53
+ x = data[:x]
54
+ y = data[:y]
55
+ color = data[:color] if @variables.key?(:color)
56
+ style = data[:style] if @variables.key?(:style)
57
+ size = data[:size] if @variables.key?(:size)
58
+
59
+ # TODO: key_color
60
+ backend.scatter(
61
+ x, y, @variables,
62
+ color: color, color_mapper: @color_mapper,
63
+ style: style, style_mapper: @style_mapper,
64
+ size: size, size_mapper: @size_mapper
65
+ )
66
+
67
+ if legend
68
+ backend.add_scatter_plot_legend(@variables, @color_mapper, @size_mapper, @style_mapper, legend)
69
+ end
70
+ end
71
+
72
+ private def annotate_axes(backend)
73
+ xlabel = self.variables[:x]
74
+ ylabel = self.variables[:y]
75
+ backend.set_xlabel(xlabel) unless xlabel.nil?
76
+ backend.set_ylabel(ylabel) unless ylabel.nil?
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,6 @@
1
+ module Charty
2
+ module Plotters
3
+ class VectorPlotter < AbstractPlotter
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,114 @@
1
+ module Charty
2
+ module Statistics
3
+ begin
4
+ require "enumerable/statistics"
5
+
6
+ def self.mean(enum)
7
+ enum.mean
8
+ end
9
+
10
+ def self.stdev(enum, population: false)
11
+ enum.stdev(population: population)
12
+ end
13
+ rescue LoadError
14
+ def self.mean(enum)
15
+ xs = enum.to_a
16
+ xs.sum / xs.length.to_f
17
+ end
18
+
19
+ def self.stdev(enum, population: false)
20
+ xs = enum.to_a
21
+ n = xs.length
22
+ mean = xs.sum.to_f / n
23
+ ddof = population ? 0 : 1
24
+ var = xs.map {|x| (x - mean)**2 }.sum / (n - ddof)
25
+ Math.sqrt(var)
26
+ end
27
+ end
28
+
29
+ def self.bootstrap(vector, n_boot: 2000, func: :mean, units: nil, random: nil)
30
+ n = vector.size
31
+ random = Charty::Plotters::RandomSupport.check_random(random)
32
+ func = Charty::Plotters::EstimationSupport.check_estimator(func)
33
+
34
+ if units
35
+ return structured_bootstrap(vector, n_boot, units, func, random)
36
+ end
37
+
38
+ if defined?(Pandas::Series) || defined?(Numpy::NDArray)
39
+ boot_dist = bootstrap_optimized_for_pycall(vector, n_boot, random, func)
40
+ return boot_dist if boot_dist
41
+ end
42
+
43
+ boot_dist = Array.new(n_boot) do |i|
44
+ resampler = Array.new(n) { random.rand(n) }
45
+
46
+ w ||= vector.values_at(*resampler)
47
+
48
+ case func
49
+ when :mean
50
+ mean(w)
51
+ end
52
+ end
53
+
54
+ boot_dist
55
+ end
56
+
57
+ private_class_method def self.bootstrap_optimized_for_pycall(vector, n_boot, random, func)
58
+ case
59
+ when vector.is_a?(Charty::Vector)
60
+ bootstrap_optimized_for_pycall(vector.data, n_boot, random, func)
61
+
62
+ when defined?(Pandas::Series) && vector.is_a?(Pandas::Series) || vector.is_a?(Numpy::NDArray)
63
+ # numpy is also available when pandas is available
64
+ n = vector.size
65
+ resampler = Numpy.empty(n, dtype: Numpy.intp)
66
+ Array.new(n_boot) do |i|
67
+ # TODO: Use Numo and MemoryView to reduce execution time
68
+ # resampler = Numo::Int64.new(n).rand(n)
69
+ # w = Numpy.take(vector, resampler)
70
+ n.times {|i| resampler[i] = random.rand(n) }
71
+ w = vector.take(resampler)
72
+
73
+ case func
74
+ when :mean
75
+ w.mean
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ private_class_method def self.structured_bootstrap(vector, n_boot, units, func, random)
82
+ raise NotImplementedError,
83
+ "structured bootstrapping has not been supported yet"
84
+ end
85
+
86
+ def self.bootstrap_ci(*vectors, width, n_boot: 2000, func: :mean, units: nil, random: nil)
87
+ boot = bootstrap(*vectors, n_boot: n_boot, func: func, units: units, random: random)
88
+ q = [50 - width / 2, 50 + width / 2]
89
+ if boot.respond_to?(:percentile)
90
+ boot.percentile(q)
91
+ else
92
+ percentile(boot, q)
93
+ end
94
+ end
95
+
96
+ # TODO: optimize with introselect algorithm
97
+ def self.percentile(a, q)
98
+ return mean(a) if a.size == 0
99
+
100
+ a = a.sort
101
+ n = a.size
102
+ q.map do |x|
103
+ x = n * (x / 100.0)
104
+ i = x.floor
105
+ if i == n-1
106
+ a[i]
107
+ else
108
+ t = x - i
109
+ (1-t)*a[i] + t*a[i+1]
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
data/lib/charty/table.rb CHANGED
@@ -21,33 +21,67 @@ module Charty
21
21
  else
22
22
  @adapter = adapter_class.new(data, **kwargs)
23
23
  end
24
+
25
+ @column_cache = {}
24
26
  end
25
27
 
26
28
  attr_reader :adapter
27
29
 
30
+ def_delegators :adapter, :length, :column_length
31
+
32
+ def_delegators :adapter, :columns, :columns=
33
+ def_delegators :adapter, :index, :index=
34
+
28
35
  def_delegator :@adapter, :column_names
29
36
 
30
- def columns
31
- @column_accessor ||= ColumnAccessor.new(@adapter)
37
+ def column?(name)
38
+ return true if column_names.include?(name)
39
+
40
+ case name
41
+ when String
42
+ column_names.include?(name.to_sym)
43
+ when Symbol
44
+ column_names.include?(name.to_s)
45
+ else
46
+ false
47
+ end
48
+ end
49
+
50
+ def_delegator :@adapter, :data, :raw_data
51
+
52
+ def ==(other)
53
+ return true if equal?(other)
54
+
55
+ case other
56
+ when Charty::Table
57
+ adapter == other.adapter
58
+ else
59
+ super
60
+ end
61
+ end
62
+
63
+ def empty?
64
+ length == 0
32
65
  end
33
66
 
34
- def [](*args)
35
- n_args = args.length
36
- case n_args
37
- when 1
38
- row = nil
39
- column = args[0]
40
- @adapter[row, column]
41
- when 2
42
- row = args[0]
43
- column = args[1]
44
- @adapter[row, column]
67
+ def [](key)
68
+ key = case key
69
+ when Symbol
70
+ key
71
+ else
72
+ String.try_convert(key).to_sym
73
+ end
74
+ if @column_cache.key?(key)
75
+ @column_cache[key]
45
76
  else
46
- message = "wrong number of arguments (given #{n_args}, expected 1..2)"
47
- raise ArgumentError, message
77
+ @column_cache[key] = @adapter[nil, key]
48
78
  end
49
79
  end
50
80
 
81
+ def group_by(grouper, sort: true, drop_na: true)
82
+ adapter.group_by(self, grouper, sort, drop_na)
83
+ end
84
+
51
85
  def to_a(x=nil, y=nil, z=nil)
52
86
  case
53
87
  when defined?(Daru::DataFrame) && table.kind_of?(Daru::DataFrame)
@@ -80,5 +114,117 @@ module Charty
80
114
  i += 1
81
115
  end
82
116
  end
117
+
118
+ def drop_na
119
+ @adapter.drop_na || self
120
+ end
121
+
122
+ def_delegator :adapter, :sort_values
123
+
124
+ def_delegator :adapter, :reset_index
125
+
126
+ class GroupByBase
127
+ end
128
+
129
+ class HashGroupBy < GroupByBase
130
+ def initialize(table, grouper, sort, drop_na)
131
+ @table = table
132
+ @grouper = check_grouper(grouper)
133
+ init_groups(sort, drop_na)
134
+ end
135
+
136
+ private def check_grouper(grouper)
137
+ case grouper
138
+ when Symbol, String, Array
139
+ # TODO check column existence
140
+ return grouper
141
+ when Charty::Vector
142
+ if @table.length != grouper.length
143
+ raise ArgumentError,
144
+ "Wrong number of items in grouper array " +
145
+ "(%p for %p)" % [val.length, @table.length]
146
+ end
147
+ return grouper
148
+ when ->(x) { x.respond_to?(:call) }
149
+ raise NotImplementedError,
150
+ "A callable grouper is unsupported"
151
+ else
152
+ raise ArgumentError,
153
+ "Unable to recognize the value for `grouper`: %p" % val
154
+ end
155
+ end
156
+
157
+ private def init_groups(sort, drop_na)
158
+ case @grouper
159
+ when Symbol, String
160
+ column = @table[@grouper]
161
+ @indices = (0 ... @table.length).group_by do |i|
162
+ column.data[i]
163
+ end
164
+ when Array
165
+ @indices = (0 ... @table.length).group_by { |i|
166
+ @grouper.map {|j| @table[j].data[i] }
167
+ }
168
+ when Charty::Vector
169
+ @indices = (0 ... @table.length).group_by do |i|
170
+ @grouper.data[i]
171
+ end
172
+ end
173
+
174
+ if drop_na
175
+ case @grouper
176
+ when Array
177
+ @indices.reject! {|key, | key.any? {|k| Util.missing?(k) } }
178
+ else
179
+ @indices.reject! {|key, | Util.missing?(key) }
180
+ end
181
+ end
182
+
183
+ if sort
184
+ @indices = @indices.sort_by {|key, | key }.to_h
185
+ end
186
+ end
187
+
188
+ def indices
189
+ @indices.dup
190
+ end
191
+
192
+ def group_keys
193
+ @indices.keys
194
+ end
195
+
196
+ def each_group_key(&block)
197
+ @indices.each_key(&block)
198
+ end
199
+
200
+ def apply(*args, &block)
201
+ Charty::Table.new(
202
+ each_group.map { |_key, table|
203
+ block.call(table, *args)
204
+ },
205
+ index: Charty::Index.new(@indices.keys, name: @grouper)
206
+ )
207
+ end
208
+
209
+ def each_group
210
+ return enum_for(__method__) unless block_given?
211
+
212
+ @indices.each_key do |key|
213
+ yield(key, self[key])
214
+ end
215
+ end
216
+
217
+ def [](key)
218
+ return nil unless @indices.key?(key)
219
+
220
+ index = @indices[key]
221
+ Charty::Table.new(
222
+ @table.column_names.map {|col|
223
+ [col, @table[col].values_at(*index)]
224
+ }.to_h,
225
+ index: index
226
+ )
227
+ end
228
+ end
83
229
  end
84
230
  end
@@ -15,9 +15,11 @@ module Charty
15
15
  end
16
16
  end
17
17
 
18
+ require_relative 'table_adapters/base_adapter'
18
19
  require_relative 'table_adapters/hash_adapter'
19
20
  require_relative 'table_adapters/narray_adapter'
20
21
  require_relative 'table_adapters/datasets_adapter'
21
22
  require_relative 'table_adapters/daru_adapter'
22
23
  require_relative 'table_adapters/active_record_adapter'
23
24
  require_relative 'table_adapters/nmatrix_adapter'
25
+ require_relative 'table_adapters/pandas_adapter'
@@ -1,10 +1,8 @@
1
1
  module Charty
2
2
  module TableAdapters
3
- class ActiveRecordAdapter
3
+ class ActiveRecordAdapter < BaseAdapter
4
4
  TableAdapters.register(:active_record, self)
5
5
 
6
- include Enumerable
7
-
8
6
  def self.supported?(data)
9
7
  defined?(ActiveRecord::Relation) && data.is_a?(ActiveRecord::Relation)
10
8
  end
@@ -12,17 +10,27 @@ module Charty
12
10
  def initialize(data)
13
11
  @data = check_type(ActiveRecord::Relation, data, :data)
14
12
  @column_names = @data.column_names.freeze
15
- @columns = nil
13
+ self.columns = Index.new(@column_names)
14
+ self.index = RangeIndex.new(0 ... length)
16
15
  end
17
16
 
18
- attr_reader :column_names
17
+ attr_reader :data, :column_names
18
+
19
+ def_delegators :data, :size
20
+
21
+ alias length size
22
+
23
+ def column_length
24
+ column_names.length
25
+ end
19
26
 
20
27
  def [](row, column)
21
- fetch_records unless @columns
28
+ fetch_records unless @columns_cache
22
29
  if row
23
- @columns[resolve_column_index(column)][row]
30
+ @columns_cache[resolve_column_index(column)][row]
24
31
  else
25
- @columns[resolve_column_index(column)]
32
+ column_data = @columns_cache[resolve_column_index(column)]
33
+ Vector.new(column_data, index: index, name: column)
26
34
  end
27
35
  end
28
36
 
@@ -43,7 +51,7 @@ module Charty
43
51
  end
44
52
 
45
53
  private def fetch_records
46
- @columns = @data.pluck(*column_names).transpose
54
+ @columns_cache = @data.pluck(*column_names).transpose
47
55
  end
48
56
 
49
57
  private def check_type(type, data, name)