charty 0.1.5.dev → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +71 -0
  3. data/.github/workflows/nmatrix.yml +67 -0
  4. data/.github/workflows/pycall.yml +86 -0
  5. data/Dockerfile.dev +9 -1
  6. data/Gemfile +18 -0
  7. data/README.md +176 -9
  8. data/Rakefile +4 -5
  9. data/charty.gemspec +10 -1
  10. data/examples/Gemfile +1 -0
  11. data/examples/active_record.ipynb +1 -1
  12. data/examples/daru.ipynb +1 -1
  13. data/examples/iris_dataset.ipynb +1 -1
  14. data/examples/nmatrix.ipynb +1 -1
  15. data/examples/{numo-narray.ipynb → numo_narray.ipynb} +1 -1
  16. data/examples/palette.rb +71 -0
  17. data/examples/sample.png +0 -0
  18. data/examples/sample_bokeh.ipynb +156 -0
  19. data/examples/sample_google_chart.ipynb +229 -68
  20. data/examples/sample_images/bar_bokeh.html +85 -0
  21. data/examples/sample_images/barh_bokeh.html +85 -0
  22. data/examples/sample_images/box_plot_bokeh.html +85 -0
  23. data/examples/sample_images/curve_bokeh.html +85 -0
  24. data/examples/sample_images/curve_with_function_bokeh.html +85 -0
  25. data/examples/sample_images/hist_gruff.png +0 -0
  26. data/examples/sample_images/scatter_bokeh.html +85 -0
  27. data/examples/sample_pyplot.ipynb +40 -38
  28. data/images/penguins_body_mass_g_flipper_length_mm_scatter_plot.png +0 -0
  29. data/images/penguins_body_mass_g_flipper_length_mm_species_scatter_plot.png +0 -0
  30. data/images/penguins_body_mass_g_flipper_length_mm_species_sex_scatter_plot.png +0 -0
  31. data/images/penguins_species_body_mass_g_bar_plot_h.png +0 -0
  32. data/images/penguins_species_body_mass_g_bar_plot_v.png +0 -0
  33. data/images/penguins_species_body_mass_g_box_plot_h.png +0 -0
  34. data/images/penguins_species_body_mass_g_box_plot_v.png +0 -0
  35. data/images/penguins_species_body_mass_g_sex_bar_plot_v.png +0 -0
  36. data/images/penguins_species_body_mass_g_sex_box_plot_v.png +0 -0
  37. data/lib/charty.rb +14 -1
  38. data/lib/charty/backend_methods.rb +8 -0
  39. data/lib/charty/backends.rb +80 -0
  40. data/lib/charty/backends/bokeh.rb +32 -26
  41. data/lib/charty/backends/google_charts.rb +267 -0
  42. data/lib/charty/backends/gruff.rb +102 -83
  43. data/lib/charty/backends/plotly.rb +685 -0
  44. data/lib/charty/backends/pyplot.rb +586 -92
  45. data/lib/charty/backends/rubyplot.rb +82 -74
  46. data/lib/charty/backends/unicode_plot.rb +79 -0
  47. data/lib/charty/index.rb +213 -0
  48. data/lib/charty/linspace.rb +1 -1
  49. data/lib/charty/missing_value_support.rb +14 -0
  50. data/lib/charty/plot_methods.rb +184 -0
  51. data/lib/charty/plotter.rb +48 -40
  52. data/lib/charty/plotters.rb +11 -0
  53. data/lib/charty/plotters/abstract_plotter.rb +183 -0
  54. data/lib/charty/plotters/bar_plotter.rb +201 -0
  55. data/lib/charty/plotters/box_plotter.rb +79 -0
  56. data/lib/charty/plotters/categorical_plotter.rb +380 -0
  57. data/lib/charty/plotters/count_plotter.rb +7 -0
  58. data/lib/charty/plotters/estimation_support.rb +84 -0
  59. data/lib/charty/plotters/random_support.rb +25 -0
  60. data/lib/charty/plotters/relational_plotter.rb +518 -0
  61. data/lib/charty/plotters/scatter_plotter.rb +104 -0
  62. data/lib/charty/plotters/vector_plotter.rb +6 -0
  63. data/lib/charty/statistics.rb +114 -0
  64. data/lib/charty/table.rb +80 -3
  65. data/lib/charty/table_adapters.rb +25 -0
  66. data/lib/charty/table_adapters/active_record_adapter.rb +63 -0
  67. data/lib/charty/table_adapters/base_adapter.rb +69 -0
  68. data/lib/charty/table_adapters/daru_adapter.rb +70 -0
  69. data/lib/charty/table_adapters/datasets_adapter.rb +49 -0
  70. data/lib/charty/table_adapters/hash_adapter.rb +224 -0
  71. data/lib/charty/table_adapters/narray_adapter.rb +76 -0
  72. data/lib/charty/table_adapters/nmatrix_adapter.rb +67 -0
  73. data/lib/charty/table_adapters/pandas_adapter.rb +81 -0
  74. data/lib/charty/util.rb +20 -0
  75. data/lib/charty/vector.rb +69 -0
  76. data/lib/charty/vector_adapters.rb +183 -0
  77. data/lib/charty/vector_adapters/array_adapter.rb +109 -0
  78. data/lib/charty/vector_adapters/daru_adapter.rb +171 -0
  79. data/lib/charty/vector_adapters/narray_adapter.rb +187 -0
  80. data/lib/charty/vector_adapters/nmatrix_adapter.rb +37 -0
  81. data/lib/charty/vector_adapters/numpy_adapter.rb +168 -0
  82. data/lib/charty/vector_adapters/pandas_adapter.rb +200 -0
  83. data/lib/charty/version.rb +1 -1
  84. metadata +179 -10
  85. data/.travis.yml +0 -11
  86. data/lib/charty/backends/google_chart.rb +0 -167
  87. data/lib/charty/plotter_adapter.rb +0 -17
@@ -0,0 +1,171 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class DaruVectorAdapter < BaseAdapter
4
+ VectorAdapters.register(:daru_vector, self)
5
+
6
+ def self.supported?(data)
7
+ defined?(Daru::Vector) && data.is_a?(Daru::Vector)
8
+ end
9
+
10
+ def initialize(data)
11
+ @data = check_data(data)
12
+ end
13
+
14
+ def_delegator :data, :size, :length
15
+
16
+ def index
17
+ DaruIndex.new(data.index)
18
+ end
19
+
20
+ def index=(new_index)
21
+ case new_index
22
+ when DaruIndex
23
+ data.index = new_index.values
24
+ when Index
25
+ data.index = new_index.to_a
26
+ else
27
+ data.index = new_index
28
+ end
29
+ end
30
+
31
+ def_delegators :data, :name, :name=
32
+
33
+ def compare_data_equality(other)
34
+ case other
35
+ when DaruVectorAdapter
36
+ data == other.data
37
+ when ArrayAdapter
38
+ data.to_a == other.data
39
+ when NArrayAdapter, NMatrixAdapter, NumpyAdapter, PandasSeriesAdapter
40
+ other.compare_data_equality(self)
41
+ else
42
+ data == other.data.to_a
43
+ end
44
+ end
45
+
46
+ def [](key)
47
+ case key
48
+ when Charty::Vector
49
+ where(key)
50
+ else
51
+ data[key]
52
+ end
53
+ end
54
+
55
+ def_delegators :data, :[]=, :to_a
56
+
57
+ def values_at(*indices)
58
+ indices.map {|i| data[i] }
59
+ end
60
+
61
+ def where(mask)
62
+ masked_data, masked_index = where_in_array(mask)
63
+ Charty::Vector.new(Daru::Vector.new(masked_data, index: masked_index), name: name)
64
+ end
65
+
66
+ def where_in_array(mask)
67
+ mask = check_mask_vector(mask)
68
+ masked_data = []
69
+ masked_index = []
70
+ mask.each_with_index do |f, i|
71
+ case f
72
+ when true, 1
73
+ masked_data << data[i]
74
+ masked_index << data.index.key(i)
75
+ end
76
+ end
77
+ return masked_data, masked_index
78
+ end
79
+
80
+ def first_nonnil
81
+ data.drop_while(&:nil?).first
82
+ end
83
+
84
+ def boolean?
85
+ case
86
+ when numeric?, categorical?
87
+ false
88
+ else
89
+ case first_nonnil
90
+ when true, false
91
+ true
92
+ else
93
+ false
94
+ end
95
+ end
96
+ end
97
+
98
+ def_delegators :data, :numeric?
99
+ def_delegator :data, :category?, :categorical?
100
+
101
+ def categories
102
+ data.categories.compact if categorical?
103
+ end
104
+
105
+ def unique_values
106
+ data.uniq.to_a
107
+ end
108
+
109
+ def group_by(grouper)
110
+ case grouper
111
+ when Daru::Vector
112
+ if grouper.category?
113
+ # TODO: A categorical Daru::Vector cannot perform group_by well
114
+ grouper = Daru::Vector.new(grouper.to_a)
115
+ end
116
+ groups = grouper.group_by.groups
117
+ groups.map { |g, indices|
118
+ [g.first, Charty::Vector.new(data[*indices])]
119
+ }.to_h
120
+ when Charty::Vector
121
+ case grouper.data
122
+ when Daru::Vector
123
+ return group_by(grouper.data)
124
+ else
125
+ return group_by(Daru::Vector.new(grouper.to_a))
126
+ end
127
+ else
128
+ return group_by(Charty::Vector.new(grouper))
129
+ end
130
+ end
131
+
132
+ def drop_na
133
+ values = data.reject do |x|
134
+ case
135
+ when x.nil?,
136
+ x.respond_to?(:nan?) && x.nan?
137
+ true
138
+ else
139
+ false
140
+ end
141
+ end
142
+ Charty::Vector.new(Daru::Vector.new(values))
143
+ end
144
+
145
+ def eq(val)
146
+ Charty::Vector.new(data.eq(val).to_a,
147
+ index: data.index.to_a,
148
+ name: name)
149
+ end
150
+
151
+ def notnull
152
+ notnull_data = data.map {|x| ! missing_value?(x) }
153
+ Charty::Vector.new(notnull_data, index: data.index.to_a, name: name)
154
+ end
155
+
156
+ def_delegator :data, :mean
157
+
158
+ def stdev(population: false)
159
+ if population
160
+ data.standard_deviation_sample
161
+ else
162
+ data.standard_deviation_population
163
+ end
164
+ end
165
+
166
+ def percentile(q)
167
+ data.linear_percentile(q)
168
+ end
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,187 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class NArrayAdapter < BaseAdapter
4
+ VectorAdapters.register(:narray, self)
5
+
6
+ extend Forwardable
7
+ include Enumerable
8
+
9
+ def self.supported?(data)
10
+ defined?(Numo::NArray) && data.is_a?(Numo::NArray)
11
+ end
12
+
13
+ def initialize(data)
14
+ @data = check_data(data)
15
+ self.index = index || RangeIndex.new(0 ... length)
16
+ end
17
+
18
+ def compare_data_equality(other)
19
+ case other
20
+ when ArrayAdapter, NArrayAdapter
21
+ data == other.data
22
+ when NumpyAdapter, PandasSeriesAdapter
23
+ other.compare_data_equality(self)
24
+ else
25
+ data == other.data.to_a
26
+ end
27
+ end
28
+
29
+ include NameSupport
30
+ include IndexSupport
31
+
32
+ # TODO: Reconsider the return value type of values_at
33
+ def values_at(*indices)
34
+ data[indices].to_a
35
+ end
36
+
37
+ def where(mask)
38
+ mask = check_mask_vector(mask)
39
+ case mask.data
40
+ when Numo::Bit
41
+ bits = mask.data
42
+ masked_data = data[bits]
43
+ masked_index = bits.where.map {|i| index[i] }.to_a
44
+ else
45
+ masked_data, masked_index = where_in_array(mask)
46
+ masked_data = data.class[*masked_data]
47
+ end
48
+ Charty::Vector.new(masked_data, index: masked_index, name: name)
49
+ end
50
+
51
+ def boolean?
52
+ case data
53
+ when Numo::Bit
54
+ true
55
+ when Numo::RObject
56
+ i, n = 0, data.size
57
+ while i < n
58
+ case data[i]
59
+ when nil, true, false
60
+ # do nothing
61
+ else
62
+ return false
63
+ end
64
+ i += 1
65
+ end
66
+ true
67
+ else
68
+ false
69
+ end
70
+ end
71
+
72
+ def numeric?
73
+ case data
74
+ when Numo::Bit,
75
+ Numo::RObject
76
+ false
77
+ else
78
+ true
79
+ end
80
+ end
81
+
82
+ def categorical?
83
+ false
84
+ end
85
+
86
+ def categories
87
+ nil
88
+ end
89
+
90
+ def unique_values
91
+ existence = {}
92
+ i, n = 0, data.size
93
+ unique = []
94
+ while i < n
95
+ x = data[i]
96
+ unless existence[x]
97
+ unique << x
98
+ existence[x] = true
99
+ end
100
+ i += 1
101
+ end
102
+ unique
103
+ end
104
+
105
+ def group_by(grouper)
106
+ case grouper
107
+ when Charty::Vector
108
+ # nothing to do
109
+ else
110
+ grouper = Charty::Vector.new(grouper)
111
+ end
112
+
113
+ group_keys = grouper.unique_values
114
+
115
+ case grouper.data
116
+ when Numo::NArray
117
+ grouper = grouper.data
118
+ else
119
+ grouper = Numo::NArray[*grouper.to_a]
120
+ end
121
+
122
+ group_keys.map { |g|
123
+ [g, Charty::Vector.new(data[grouper.eq(g)])]
124
+ }.to_h
125
+ end
126
+
127
+ def drop_na
128
+ case data
129
+ when Numo::DFloat, Numo::SFloat, Numo::DComplex, Numo::SComplex
130
+ Charty::Vector.new(data[~data.isnan])
131
+ when Numo::RObject
132
+ where_is_nan = data.isnan
133
+ values = []
134
+ i, n = 0, data.size
135
+ while i < n
136
+ x = data[i]
137
+ unless x.nil? || where_is_nan[i] == 1
138
+ values << x
139
+ end
140
+ i += 1
141
+ end
142
+ Charty::Vector.new(Numo::RObject[*values])
143
+ else
144
+ self
145
+ end
146
+ end
147
+
148
+ def eq(val)
149
+ Charty::Vector.new(data.eq(val),
150
+ index: index,
151
+ name: name)
152
+ end
153
+
154
+ def notnull
155
+ case data
156
+ when Numo::RObject
157
+ i, n = 0, length
158
+ notnull_data = Numo::Bit.zeros(n)
159
+ while i < n
160
+ notnull_data[i] = ! missing_value?(data[i])
161
+ i += 1
162
+ end
163
+ when ->(x) { x.respond_to?(:isnan) }
164
+ notnull_data = ~data.isnan
165
+ else
166
+ notnull_data = Numo::Bit.ones(length)
167
+ end
168
+ Charty::Vector.new(notnull_data, index: index, name: name)
169
+ end
170
+
171
+ def mean
172
+ data.mean(nan: true)
173
+ end
174
+
175
+ def stdev(population: false)
176
+ s = data.stddev(nan: true)
177
+ if population
178
+ # Numo::NArray does not support population standard deviation
179
+ n = data.isnan.sum
180
+ s * (n - 1) / n
181
+ else
182
+ s
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end
@@ -0,0 +1,37 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class NMatrixAdapter < BaseAdapter
4
+ VectorAdapters.register(:nmatrix, self)
5
+
6
+ extend Forwardable
7
+ include Enumerable
8
+
9
+ def self.supported?(data)
10
+ defined?(NMatrix) && data.is_a?(NMatrix)
11
+ end
12
+
13
+ def initialize(data)
14
+ @data = check_data(data)
15
+ self.index = index || RangeIndex.new(0 ... length)
16
+ end
17
+
18
+ def compare_data_equality(other)
19
+ case other
20
+ when NMatrixAdapter
21
+ data == other.data
22
+ when ArrayAdapter, DaruVectorAdapter
23
+ data.to_a == other.data.to_a
24
+ when NArrayAdapter, NumpyAdapter, PandasSeriesAdapter
25
+ other.compare_data_equality(self)
26
+ else
27
+ data == other.data.to_a
28
+ end
29
+ end
30
+
31
+ include NameSupport
32
+ include IndexSupport
33
+
34
+ alias length size
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,168 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class NumpyAdapter < BaseAdapter
4
+ VectorAdapters.register(:numpy, self)
5
+
6
+ def self.supported?(data)
7
+ return false unless defined?(Numpy::NDArray)
8
+ case data
9
+ when Numpy::NDArray
10
+ true
11
+ else
12
+ false
13
+ end
14
+ end
15
+
16
+ def initialize(data)
17
+ @data = check_data(data)
18
+ self.index = index || RangeIndex.new(0 ... length)
19
+ end
20
+
21
+ attr_reader :data
22
+
23
+ def_delegator :data, :size, :length
24
+
25
+ def compare_data_equality(other)
26
+ case other
27
+ when NumpyAdapter, PandasSeriesAdapter
28
+ Numpy.all(data == other.data)
29
+ when BaseAdapter
30
+ Numpy.all(data == other.data.to_a)
31
+ else
32
+ false
33
+ end
34
+ end
35
+
36
+ include NameSupport
37
+ include IndexSupport
38
+
39
+ def where(mask)
40
+ mask = check_mask_vector(mask)
41
+ case mask.data
42
+ when Numpy::NDArray,
43
+ ->(x) { defined?(Pandas::Series) && x.is_a?(Pandas::Series) }
44
+ mask_data = Numpy.asarray(mask.data, dtype: :bool)
45
+ masked_data = data[mask_data]
46
+ masked_index = mask_data.nonzero()[0].to_a.map {|i| index[i] }
47
+ else
48
+ masked_data, masked_index = where_in_array(mask)
49
+ masked_data = Numpy.asarray(masked_data, dtype: data.dtype)
50
+ end
51
+ Charty::Vector.new(masked_data, index: masked_index, name: name)
52
+ end
53
+
54
+ def each
55
+ return enum_for(__method__) unless block_given?
56
+
57
+ i, n = 0, data.size
58
+ while i < n
59
+ yield data[i]
60
+ i += 1
61
+ end
62
+ end
63
+
64
+ def empty?
65
+ data.size == 0
66
+ end
67
+
68
+ def boolean?
69
+ builtins = PyCall.builtins
70
+ case
71
+ when builtins.issubclass(data.dtype.type, Numpy.bool_)
72
+ true
73
+ when builtins.issubclass(data.dtype.type, Numpy.object_)
74
+ i, n = 0, data.size
75
+ while i < n
76
+ case data[i]
77
+ when nil, true, false
78
+ # do nothing
79
+ else
80
+ return false
81
+ end
82
+ i += 1
83
+ end
84
+ true
85
+ else
86
+ false
87
+ end
88
+ end
89
+
90
+ def numeric?
91
+ # TODO: Handle object array
92
+ PyCall.builtins.issubclass(data.dtype.type, PyCall.tuple([Numpy.number, Numpy.bool_]))
93
+ end
94
+
95
+ def categorical?
96
+ false
97
+ end
98
+
99
+ def categories
100
+ nil
101
+ end
102
+
103
+ def unique_values
104
+ Numpy.unique(data).to_a
105
+ end
106
+
107
+ def group_by(grouper)
108
+ case grouper
109
+ when Numpy::NDArray,
110
+ ->(x) { defined?(Pandas::Series) && x.is_a?(Pandas::Series) }
111
+ # Nothing todo
112
+ when Charty::Vector
113
+ case grouper.data
114
+ when Numpy::NDArray
115
+ grouper = grouper.data
116
+ else
117
+ grouper = Numpy.asarray(grouper.to_a)
118
+ end
119
+ else
120
+ grouper = Numpy.asarray(Array.try_convert(grouper))
121
+ end
122
+
123
+ group_keys = Numpy.unique(grouper).to_a
124
+ group_keys.map { |g|
125
+ [g, Charty::Vector.new(data[grouper == g])]
126
+ }.to_h
127
+ end
128
+
129
+ def drop_na
130
+ where_is_na = if numeric?
131
+ Numpy.isnan(data)
132
+ else
133
+ (data == nil)
134
+ end
135
+ Charty::Vector.new(data[Numpy.logical_not(where_is_na)])
136
+ end
137
+
138
+ def eq(val)
139
+ Charty::Vector.new((data == val),
140
+ index: index,
141
+ name: name)
142
+ end
143
+
144
+ def notnull
145
+ case
146
+ when PyCall.builtins.issubclass(data.dtype.type, Numpy.object_)
147
+ i, n = 0, length
148
+ notnull_data = Numpy::NDArray.new(n, dtype: :bool)
149
+ while i < n
150
+ notnull_data[i] = ! missing_value?(data[i])
151
+ i += 1
152
+ end
153
+ else
154
+ notnull_data = Numpy.isnan(data)
155
+ end
156
+ Charty::Vector.new(notnull_data, index: index, name: name)
157
+ end
158
+
159
+ def mean
160
+ Numpy.mean(data)
161
+ end
162
+
163
+ def stdev(population: false)
164
+ Numpy.std(data, ddof: population ? 0 : 1)
165
+ end
166
+ end
167
+ end
168
+ end