charty 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +56 -23
  3. data/.github/workflows/nmatrix.yml +67 -0
  4. data/.github/workflows/pycall.yml +86 -0
  5. data/Gemfile +18 -0
  6. data/README.md +123 -4
  7. data/Rakefile +4 -5
  8. data/charty.gemspec +1 -3
  9. data/examples/sample_images/hist_gruff.png +0 -0
  10. data/images/penguins_body_mass_g_flipper_length_mm_scatter_plot.png +0 -0
  11. data/images/penguins_body_mass_g_flipper_length_mm_species_scatter_plot.png +0 -0
  12. data/images/penguins_body_mass_g_flipper_length_mm_species_sex_scatter_plot.png +0 -0
  13. data/images/penguins_species_body_mass_g_bar_plot_h.png +0 -0
  14. data/images/penguins_species_body_mass_g_bar_plot_v.png +0 -0
  15. data/images/penguins_species_body_mass_g_box_plot_h.png +0 -0
  16. data/images/penguins_species_body_mass_g_box_plot_v.png +0 -0
  17. data/images/penguins_species_body_mass_g_sex_bar_plot_v.png +0 -0
  18. data/images/penguins_species_body_mass_g_sex_box_plot_v.png +0 -0
  19. data/lib/charty.rb +4 -0
  20. data/lib/charty/backends/gruff.rb +13 -2
  21. data/lib/charty/backends/plotly.rb +322 -20
  22. data/lib/charty/backends/pyplot.rb +416 -64
  23. data/lib/charty/index.rb +213 -0
  24. data/lib/charty/linspace.rb +1 -1
  25. data/lib/charty/missing_value_support.rb +14 -0
  26. data/lib/charty/plot_methods.rb +173 -8
  27. data/lib/charty/plotters.rb +7 -0
  28. data/lib/charty/plotters/abstract_plotter.rb +87 -12
  29. data/lib/charty/plotters/bar_plotter.rb +200 -3
  30. data/lib/charty/plotters/box_plotter.rb +75 -7
  31. data/lib/charty/plotters/categorical_plotter.rb +272 -40
  32. data/lib/charty/plotters/count_plotter.rb +7 -0
  33. data/lib/charty/plotters/estimation_support.rb +84 -0
  34. data/lib/charty/plotters/random_support.rb +25 -0
  35. data/lib/charty/plotters/relational_plotter.rb +518 -0
  36. data/lib/charty/plotters/scatter_plotter.rb +115 -0
  37. data/lib/charty/plotters/vector_plotter.rb +6 -0
  38. data/lib/charty/statistics.rb +87 -2
  39. data/lib/charty/table.rb +50 -15
  40. data/lib/charty/table_adapters.rb +2 -0
  41. data/lib/charty/table_adapters/active_record_adapter.rb +17 -9
  42. data/lib/charty/table_adapters/base_adapter.rb +69 -0
  43. data/lib/charty/table_adapters/daru_adapter.rb +37 -3
  44. data/lib/charty/table_adapters/datasets_adapter.rb +6 -2
  45. data/lib/charty/table_adapters/hash_adapter.rb +130 -16
  46. data/lib/charty/table_adapters/narray_adapter.rb +25 -6
  47. data/lib/charty/table_adapters/nmatrix_adapter.rb +15 -5
  48. data/lib/charty/table_adapters/pandas_adapter.rb +81 -0
  49. data/lib/charty/vector.rb +69 -0
  50. data/lib/charty/vector_adapters.rb +183 -0
  51. data/lib/charty/vector_adapters/array_adapter.rb +109 -0
  52. data/lib/charty/vector_adapters/daru_adapter.rb +171 -0
  53. data/lib/charty/vector_adapters/narray_adapter.rb +187 -0
  54. data/lib/charty/vector_adapters/nmatrix_adapter.rb +37 -0
  55. data/lib/charty/vector_adapters/numpy_adapter.rb +168 -0
  56. data/lib/charty/vector_adapters/pandas_adapter.rb +200 -0
  57. data/lib/charty/version.rb +1 -1
  58. metadata +33 -45
@@ -0,0 +1,187 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class NArrayAdapter < BaseAdapter
4
+ VectorAdapters.register(:narray, self)
5
+
6
+ extend Forwardable
7
+ include Enumerable
8
+
9
+ def self.supported?(data)
10
+ defined?(Numo::NArray) && data.is_a?(Numo::NArray)
11
+ end
12
+
13
+ def initialize(data)
14
+ @data = check_data(data)
15
+ self.index = index || RangeIndex.new(0 ... length)
16
+ end
17
+
18
+ def compare_data_equality(other)
19
+ case other
20
+ when ArrayAdapter, NArrayAdapter
21
+ data == other.data
22
+ when NumpyAdapter, PandasSeriesAdapter
23
+ other.compare_data_equality(self)
24
+ else
25
+ data == other.data.to_a
26
+ end
27
+ end
28
+
29
+ include NameSupport
30
+ include IndexSupport
31
+
32
+ # TODO: Reconsider the return value type of values_at
33
+ def values_at(*indices)
34
+ data[indices].to_a
35
+ end
36
+
37
+ def where(mask)
38
+ mask = check_mask_vector(mask)
39
+ case mask.data
40
+ when Numo::Bit
41
+ bits = mask.data
42
+ masked_data = data[bits]
43
+ masked_index = bits.where.map {|i| index[i] }.to_a
44
+ else
45
+ masked_data, masked_index = where_in_array(mask)
46
+ masked_data = data.class[*masked_data]
47
+ end
48
+ Charty::Vector.new(masked_data, index: masked_index, name: name)
49
+ end
50
+
51
+ def boolean?
52
+ case data
53
+ when Numo::Bit
54
+ true
55
+ when Numo::RObject
56
+ i, n = 0, data.size
57
+ while i < n
58
+ case data[i]
59
+ when nil, true, false
60
+ # do nothing
61
+ else
62
+ return false
63
+ end
64
+ i += 1
65
+ end
66
+ true
67
+ else
68
+ false
69
+ end
70
+ end
71
+
72
+ def numeric?
73
+ case data
74
+ when Numo::Bit,
75
+ Numo::RObject
76
+ false
77
+ else
78
+ true
79
+ end
80
+ end
81
+
82
+ def categorical?
83
+ false
84
+ end
85
+
86
+ def categories
87
+ nil
88
+ end
89
+
90
+ def unique_values
91
+ existence = {}
92
+ i, n = 0, data.size
93
+ unique = []
94
+ while i < n
95
+ x = data[i]
96
+ unless existence[x]
97
+ unique << x
98
+ existence[x] = true
99
+ end
100
+ i += 1
101
+ end
102
+ unique
103
+ end
104
+
105
+ def group_by(grouper)
106
+ case grouper
107
+ when Charty::Vector
108
+ # nothing to do
109
+ else
110
+ grouper = Charty::Vector.new(grouper)
111
+ end
112
+
113
+ group_keys = grouper.unique_values
114
+
115
+ case grouper.data
116
+ when Numo::NArray
117
+ grouper = grouper.data
118
+ else
119
+ grouper = Numo::NArray[*grouper.to_a]
120
+ end
121
+
122
+ group_keys.map { |g|
123
+ [g, Charty::Vector.new(data[grouper.eq(g)])]
124
+ }.to_h
125
+ end
126
+
127
+ def drop_na
128
+ case data
129
+ when Numo::DFloat, Numo::SFloat, Numo::DComplex, Numo::SComplex
130
+ Charty::Vector.new(data[~data.isnan])
131
+ when Numo::RObject
132
+ where_is_nan = data.isnan
133
+ values = []
134
+ i, n = 0, data.size
135
+ while i < n
136
+ x = data[i]
137
+ unless x.nil? || where_is_nan[i] == 1
138
+ values << x
139
+ end
140
+ i += 1
141
+ end
142
+ Charty::Vector.new(Numo::RObject[*values])
143
+ else
144
+ self
145
+ end
146
+ end
147
+
148
+ def eq(val)
149
+ Charty::Vector.new(data.eq(val),
150
+ index: index,
151
+ name: name)
152
+ end
153
+
154
+ def notnull
155
+ case data
156
+ when Numo::RObject
157
+ i, n = 0, length
158
+ notnull_data = Numo::Bit.zeros(n)
159
+ while i < n
160
+ notnull_data[i] = ! missing_value?(data[i])
161
+ i += 1
162
+ end
163
+ when ->(x) { x.respond_to?(:isnan) }
164
+ notnull_data = ~data.isnan
165
+ else
166
+ notnull_data = Numo::Bit.ones(length)
167
+ end
168
+ Charty::Vector.new(notnull_data, index: index, name: name)
169
+ end
170
+
171
+ def mean
172
+ data.mean(nan: true)
173
+ end
174
+
175
+ def stdev(population: false)
176
+ s = data.stddev(nan: true)
177
+ if population
178
+ # Numo::NArray does not support population standard deviation
179
+ n = data.isnan.sum
180
+ s * (n - 1) / n
181
+ else
182
+ s
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end
@@ -0,0 +1,37 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class NMatrixAdapter < BaseAdapter
4
+ VectorAdapters.register(:nmatrix, self)
5
+
6
+ extend Forwardable
7
+ include Enumerable
8
+
9
+ def self.supported?(data)
10
+ defined?(NMatrix) && data.is_a?(NMatrix)
11
+ end
12
+
13
+ def initialize(data)
14
+ @data = check_data(data)
15
+ self.index = index || RangeIndex.new(0 ... length)
16
+ end
17
+
18
+ def compare_data_equality(other)
19
+ case other
20
+ when NMatrixAdapter
21
+ data == other.data
22
+ when ArrayAdapter, DaruVectorAdapter
23
+ data.to_a == other.data.to_a
24
+ when NArrayAdapter, NumpyAdapter, PandasSeriesAdapter
25
+ other.compare_data_equality(self)
26
+ else
27
+ data == other.data.to_a
28
+ end
29
+ end
30
+
31
+ include NameSupport
32
+ include IndexSupport
33
+
34
+ alias length size
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,168 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class NumpyAdapter < BaseAdapter
4
+ VectorAdapters.register(:numpy, self)
5
+
6
+ def self.supported?(data)
7
+ return false unless defined?(Numpy::NDArray)
8
+ case data
9
+ when Numpy::NDArray
10
+ true
11
+ else
12
+ false
13
+ end
14
+ end
15
+
16
+ def initialize(data)
17
+ @data = check_data(data)
18
+ self.index = index || RangeIndex.new(0 ... length)
19
+ end
20
+
21
+ attr_reader :data
22
+
23
+ def_delegator :data, :size, :length
24
+
25
+ def compare_data_equality(other)
26
+ case other
27
+ when NumpyAdapter, PandasSeriesAdapter
28
+ Numpy.all(data == other.data)
29
+ when BaseAdapter
30
+ Numpy.all(data == other.data.to_a)
31
+ else
32
+ false
33
+ end
34
+ end
35
+
36
+ include NameSupport
37
+ include IndexSupport
38
+
39
+ def where(mask)
40
+ mask = check_mask_vector(mask)
41
+ case mask.data
42
+ when Numpy::NDArray,
43
+ ->(x) { defined?(Pandas::Series) && x.is_a?(Pandas::Series) }
44
+ mask_data = Numpy.asarray(mask.data, dtype: :bool)
45
+ masked_data = data[mask_data]
46
+ masked_index = mask_data.nonzero()[0].to_a.map {|i| index[i] }
47
+ else
48
+ masked_data, masked_index = where_in_array(mask)
49
+ masked_data = Numpy.asarray(masked_data, dtype: data.dtype)
50
+ end
51
+ Charty::Vector.new(masked_data, index: masked_index, name: name)
52
+ end
53
+
54
+ def each
55
+ return enum_for(__method__) unless block_given?
56
+
57
+ i, n = 0, data.size
58
+ while i < n
59
+ yield data[i]
60
+ i += 1
61
+ end
62
+ end
63
+
64
+ def empty?
65
+ data.size == 0
66
+ end
67
+
68
+ def boolean?
69
+ builtins = PyCall.builtins
70
+ case
71
+ when builtins.issubclass(data.dtype.type, Numpy.bool_)
72
+ true
73
+ when builtins.issubclass(data.dtype.type, Numpy.object_)
74
+ i, n = 0, data.size
75
+ while i < n
76
+ case data[i]
77
+ when nil, true, false
78
+ # do nothing
79
+ else
80
+ return false
81
+ end
82
+ i += 1
83
+ end
84
+ true
85
+ else
86
+ false
87
+ end
88
+ end
89
+
90
+ def numeric?
91
+ # TODO: Handle object array
92
+ PyCall.builtins.issubclass(data.dtype.type, PyCall.tuple([Numpy.number, Numpy.bool_]))
93
+ end
94
+
95
+ def categorical?
96
+ false
97
+ end
98
+
99
+ def categories
100
+ nil
101
+ end
102
+
103
+ def unique_values
104
+ Numpy.unique(data).to_a
105
+ end
106
+
107
+ def group_by(grouper)
108
+ case grouper
109
+ when Numpy::NDArray,
110
+ ->(x) { defined?(Pandas::Series) && x.is_a?(Pandas::Series) }
111
+ # Nothing todo
112
+ when Charty::Vector
113
+ case grouper.data
114
+ when Numpy::NDArray
115
+ grouper = grouper.data
116
+ else
117
+ grouper = Numpy.asarray(grouper.to_a)
118
+ end
119
+ else
120
+ grouper = Numpy.asarray(Array.try_convert(grouper))
121
+ end
122
+
123
+ group_keys = Numpy.unique(grouper).to_a
124
+ group_keys.map { |g|
125
+ [g, Charty::Vector.new(data[grouper == g])]
126
+ }.to_h
127
+ end
128
+
129
+ def drop_na
130
+ where_is_na = if numeric?
131
+ Numpy.isnan(data)
132
+ else
133
+ (data == nil)
134
+ end
135
+ Charty::Vector.new(data[Numpy.logical_not(where_is_na)])
136
+ end
137
+
138
+ def eq(val)
139
+ Charty::Vector.new((data == val),
140
+ index: index,
141
+ name: name)
142
+ end
143
+
144
+ def notnull
145
+ case
146
+ when PyCall.builtins.issubclass(data.dtype.type, Numpy.object_)
147
+ i, n = 0, length
148
+ notnull_data = Numpy::NDArray.new(n, dtype: :bool)
149
+ while i < n
150
+ notnull_data[i] = ! missing_value?(data[i])
151
+ i += 1
152
+ end
153
+ else
154
+ notnull_data = Numpy.isnan(data)
155
+ end
156
+ Charty::Vector.new(notnull_data, index: index, name: name)
157
+ end
158
+
159
+ def mean
160
+ Numpy.mean(data)
161
+ end
162
+
163
+ def stdev(population: false)
164
+ Numpy.std(data, ddof: population ? 0 : 1)
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,200 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class PandasSeriesAdapter < BaseAdapter
4
+ VectorAdapters.register(:pandas_series, self)
5
+
6
+ def self.supported?(data)
7
+ return false unless defined?(Pandas::Series)
8
+ case data
9
+ when Pandas::Series
10
+ true
11
+ else
12
+ false
13
+ end
14
+ end
15
+
16
+ def initialize(data)
17
+ @data = check_data(data)
18
+ end
19
+
20
+ attr_reader :data
21
+
22
+ def_delegator :data, :size, :length
23
+
24
+ def index
25
+ PandasIndex.new(data.index)
26
+ end
27
+
28
+ def index=(new_index)
29
+ case new_index
30
+ when PandasIndex
31
+ data.index = new_index.values
32
+ when Index
33
+ data.index = new_index.to_a
34
+ else
35
+ data.index = new_index
36
+ end
37
+ end
38
+
39
+ def_delegators :data, :name, :name=
40
+
41
+ def compare_data_equality(other)
42
+ case other
43
+ when PandasSeriesAdapter
44
+ return data.equals(other.data)
45
+ when NumpyAdapter
46
+ other = other.data
47
+ when NArrayAdapter
48
+ case other.data
49
+ when Numo::Bit
50
+ other = other.data.to_a
51
+ other.map! {|x| [false, true][x] }
52
+ else
53
+ other = other.data.to_a
54
+ end
55
+ when BaseAdapter
56
+ other = other.data.to_a
57
+ else
58
+ return false
59
+ end
60
+
61
+ data.equals(Pandas::Series.new(other, index: data.index))
62
+ end
63
+
64
+ def [](key)
65
+ case key
66
+ when Charty::Vector
67
+ where(key)
68
+ else
69
+ data[key]
70
+ end
71
+ end
72
+
73
+ def_delegators :data, :[]=, :to_a
74
+
75
+ def each
76
+ return enum_for(__method__) unless block_given?
77
+
78
+ i, n = 0, data.size
79
+ while i < n
80
+ yield data.iloc[i]
81
+ i += 1
82
+ end
83
+ end
84
+
85
+ def empty?
86
+ data.size == 0
87
+ end
88
+
89
+ # TODO: Reconsider the return value type of values_at
90
+ def values_at(*indices)
91
+ data.take(indices).to_a
92
+ end
93
+
94
+ def where(mask)
95
+ mask = check_mask_vector(mask)
96
+ case mask.data
97
+ when Numpy::NDArray,
98
+ ->(x) { defined?(Pandas::Series) && x.is_a?(Pandas::Series) }
99
+ mask_data = Numpy.asarray(mask.data, dtype: :bool)
100
+ masked_data = data[mask_data]
101
+ masked_index = mask_data.nonzero()[0].to_a.map {|i| index[i] }
102
+ else
103
+ masked_data, masked_index = where_in_array(mask)
104
+ masked_data = Pandas::Series.new(masked_data, dtype: data.dtype)
105
+ end
106
+ Charty::Vector.new(masked_data, index: masked_index, name: name)
107
+ end
108
+
109
+ def where_in_array(mask)
110
+ mask = check_mask_vector(mask)
111
+ masked_data = []
112
+ masked_index = []
113
+ mask.each_with_index do |f, i|
114
+ case f
115
+ when true, 1
116
+ masked_data << data.iloc[i]
117
+ masked_index << index[i]
118
+ end
119
+ end
120
+ return masked_data, masked_index
121
+ end
122
+
123
+ def boolean?
124
+ case
125
+ when Pandas.api.types.is_bool_dtype(data.dtype)
126
+ true
127
+ when Pandas.api.types.is_object_dtype(data.dtype)
128
+ data.isin([nil, false, true]).all()
129
+ else
130
+ false
131
+ end
132
+ end
133
+
134
+ def numeric?
135
+ Pandas.api.types.is_numeric_dtype(data.dtype)
136
+ end
137
+
138
+ def categorical?
139
+ Pandas.api.types.is_categorical_dtype(data.dtype)
140
+ end
141
+
142
+ def categories
143
+ data.cat.categories.to_a if categorical?
144
+ end
145
+
146
+ def unique_values
147
+ data.unique.to_a
148
+ end
149
+
150
+ def group_by(grouper)
151
+ case grouper
152
+ when Pandas::Series
153
+ group_keys = grouper.unique.to_a
154
+ groups = data.groupby(grouper)
155
+ group_keys.map {|g|
156
+ [g, Charty::Vector.new(groups.get_group(g))]
157
+ }.to_h
158
+ when Charty::Vector
159
+ case grouper.adapter
160
+ when self.class
161
+ group_by(grouper.data)
162
+ else
163
+ grouper = Pandas::Series.new(grouper.to_a)
164
+ group_by(grouper)
165
+ end
166
+ else
167
+ grouper = Pandas::Series.new(Array(grouper))
168
+ group_by(grouper)
169
+ end
170
+ end
171
+
172
+ def drop_na
173
+ Charty::Vector.new(data.dropna)
174
+ end
175
+
176
+ def eq(val)
177
+ Charty::Vector.new((data == val),
178
+ index: index,
179
+ name: name)
180
+ end
181
+
182
+ def notnull
183
+ Charty::Vector.new(data.notnull, index: index, name: name)
184
+ end
185
+
186
+ def mean
187
+ data.mean()
188
+ end
189
+
190
+ def stdev(population: false)
191
+ data.std(ddof: population ? 0 : 1)
192
+ end
193
+
194
+ def percentile(q)
195
+ q = q.map {|x| x / 100.0 }
196
+ data.quantile(q)
197
+ end
198
+ end
199
+ end
200
+ end