charty 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +56 -23
  3. data/.github/workflows/nmatrix.yml +67 -0
  4. data/.github/workflows/pycall.yml +86 -0
  5. data/Gemfile +18 -0
  6. data/README.md +123 -4
  7. data/Rakefile +4 -5
  8. data/charty.gemspec +1 -3
  9. data/examples/sample_images/hist_gruff.png +0 -0
  10. data/images/penguins_body_mass_g_flipper_length_mm_scatter_plot.png +0 -0
  11. data/images/penguins_body_mass_g_flipper_length_mm_species_scatter_plot.png +0 -0
  12. data/images/penguins_body_mass_g_flipper_length_mm_species_sex_scatter_plot.png +0 -0
  13. data/images/penguins_species_body_mass_g_bar_plot_h.png +0 -0
  14. data/images/penguins_species_body_mass_g_bar_plot_v.png +0 -0
  15. data/images/penguins_species_body_mass_g_box_plot_h.png +0 -0
  16. data/images/penguins_species_body_mass_g_box_plot_v.png +0 -0
  17. data/images/penguins_species_body_mass_g_sex_bar_plot_v.png +0 -0
  18. data/images/penguins_species_body_mass_g_sex_box_plot_v.png +0 -0
  19. data/lib/charty.rb +4 -0
  20. data/lib/charty/backends/gruff.rb +13 -2
  21. data/lib/charty/backends/plotly.rb +322 -20
  22. data/lib/charty/backends/pyplot.rb +416 -64
  23. data/lib/charty/index.rb +213 -0
  24. data/lib/charty/linspace.rb +1 -1
  25. data/lib/charty/missing_value_support.rb +14 -0
  26. data/lib/charty/plot_methods.rb +173 -8
  27. data/lib/charty/plotters.rb +7 -0
  28. data/lib/charty/plotters/abstract_plotter.rb +87 -12
  29. data/lib/charty/plotters/bar_plotter.rb +200 -3
  30. data/lib/charty/plotters/box_plotter.rb +75 -7
  31. data/lib/charty/plotters/categorical_plotter.rb +272 -40
  32. data/lib/charty/plotters/count_plotter.rb +7 -0
  33. data/lib/charty/plotters/estimation_support.rb +84 -0
  34. data/lib/charty/plotters/random_support.rb +25 -0
  35. data/lib/charty/plotters/relational_plotter.rb +518 -0
  36. data/lib/charty/plotters/scatter_plotter.rb +115 -0
  37. data/lib/charty/plotters/vector_plotter.rb +6 -0
  38. data/lib/charty/statistics.rb +87 -2
  39. data/lib/charty/table.rb +50 -15
  40. data/lib/charty/table_adapters.rb +2 -0
  41. data/lib/charty/table_adapters/active_record_adapter.rb +17 -9
  42. data/lib/charty/table_adapters/base_adapter.rb +69 -0
  43. data/lib/charty/table_adapters/daru_adapter.rb +37 -3
  44. data/lib/charty/table_adapters/datasets_adapter.rb +6 -2
  45. data/lib/charty/table_adapters/hash_adapter.rb +130 -16
  46. data/lib/charty/table_adapters/narray_adapter.rb +25 -6
  47. data/lib/charty/table_adapters/nmatrix_adapter.rb +15 -5
  48. data/lib/charty/table_adapters/pandas_adapter.rb +81 -0
  49. data/lib/charty/vector.rb +69 -0
  50. data/lib/charty/vector_adapters.rb +183 -0
  51. data/lib/charty/vector_adapters/array_adapter.rb +109 -0
  52. data/lib/charty/vector_adapters/daru_adapter.rb +171 -0
  53. data/lib/charty/vector_adapters/narray_adapter.rb +187 -0
  54. data/lib/charty/vector_adapters/nmatrix_adapter.rb +37 -0
  55. data/lib/charty/vector_adapters/numpy_adapter.rb +168 -0
  56. data/lib/charty/vector_adapters/pandas_adapter.rb +200 -0
  57. data/lib/charty/version.rb +1 -1
  58. metadata +33 -45
@@ -0,0 +1,187 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class NArrayAdapter < BaseAdapter
4
+ VectorAdapters.register(:narray, self)
5
+
6
+ extend Forwardable
7
+ include Enumerable
8
+
9
+ def self.supported?(data)
10
+ defined?(Numo::NArray) && data.is_a?(Numo::NArray)
11
+ end
12
+
13
+ def initialize(data)
14
+ @data = check_data(data)
15
+ self.index = index || RangeIndex.new(0 ... length)
16
+ end
17
+
18
+ def compare_data_equality(other)
19
+ case other
20
+ when ArrayAdapter, NArrayAdapter
21
+ data == other.data
22
+ when NumpyAdapter, PandasSeriesAdapter
23
+ other.compare_data_equality(self)
24
+ else
25
+ data == other.data.to_a
26
+ end
27
+ end
28
+
29
+ include NameSupport
30
+ include IndexSupport
31
+
32
+ # TODO: Reconsider the return value type of values_at
33
+ def values_at(*indices)
34
+ data[indices].to_a
35
+ end
36
+
37
+ def where(mask)
38
+ mask = check_mask_vector(mask)
39
+ case mask.data
40
+ when Numo::Bit
41
+ bits = mask.data
42
+ masked_data = data[bits]
43
+ masked_index = bits.where.map {|i| index[i] }.to_a
44
+ else
45
+ masked_data, masked_index = where_in_array(mask)
46
+ masked_data = data.class[*masked_data]
47
+ end
48
+ Charty::Vector.new(masked_data, index: masked_index, name: name)
49
+ end
50
+
51
+ def boolean?
52
+ case data
53
+ when Numo::Bit
54
+ true
55
+ when Numo::RObject
56
+ i, n = 0, data.size
57
+ while i < n
58
+ case data[i]
59
+ when nil, true, false
60
+ # do nothing
61
+ else
62
+ return false
63
+ end
64
+ i += 1
65
+ end
66
+ true
67
+ else
68
+ false
69
+ end
70
+ end
71
+
72
+ def numeric?
73
+ case data
74
+ when Numo::Bit,
75
+ Numo::RObject
76
+ false
77
+ else
78
+ true
79
+ end
80
+ end
81
+
82
+ def categorical?
83
+ false
84
+ end
85
+
86
+ def categories
87
+ nil
88
+ end
89
+
90
+ def unique_values
91
+ existence = {}
92
+ i, n = 0, data.size
93
+ unique = []
94
+ while i < n
95
+ x = data[i]
96
+ unless existence[x]
97
+ unique << x
98
+ existence[x] = true
99
+ end
100
+ i += 1
101
+ end
102
+ unique
103
+ end
104
+
105
+ def group_by(grouper)
106
+ case grouper
107
+ when Charty::Vector
108
+ # nothing to do
109
+ else
110
+ grouper = Charty::Vector.new(grouper)
111
+ end
112
+
113
+ group_keys = grouper.unique_values
114
+
115
+ case grouper.data
116
+ when Numo::NArray
117
+ grouper = grouper.data
118
+ else
119
+ grouper = Numo::NArray[*grouper.to_a]
120
+ end
121
+
122
+ group_keys.map { |g|
123
+ [g, Charty::Vector.new(data[grouper.eq(g)])]
124
+ }.to_h
125
+ end
126
+
127
+ def drop_na
128
+ case data
129
+ when Numo::DFloat, Numo::SFloat, Numo::DComplex, Numo::SComplex
130
+ Charty::Vector.new(data[~data.isnan])
131
+ when Numo::RObject
132
+ where_is_nan = data.isnan
133
+ values = []
134
+ i, n = 0, data.size
135
+ while i < n
136
+ x = data[i]
137
+ unless x.nil? || where_is_nan[i] == 1
138
+ values << x
139
+ end
140
+ i += 1
141
+ end
142
+ Charty::Vector.new(Numo::RObject[*values])
143
+ else
144
+ self
145
+ end
146
+ end
147
+
148
+ def eq(val)
149
+ Charty::Vector.new(data.eq(val),
150
+ index: index,
151
+ name: name)
152
+ end
153
+
154
+ def notnull
155
+ case data
156
+ when Numo::RObject
157
+ i, n = 0, length
158
+ notnull_data = Numo::Bit.zeros(n)
159
+ while i < n
160
+ notnull_data[i] = ! missing_value?(data[i])
161
+ i += 1
162
+ end
163
+ when ->(x) { x.respond_to?(:isnan) }
164
+ notnull_data = ~data.isnan
165
+ else
166
+ notnull_data = Numo::Bit.ones(length)
167
+ end
168
+ Charty::Vector.new(notnull_data, index: index, name: name)
169
+ end
170
+
171
+ def mean
172
+ data.mean(nan: true)
173
+ end
174
+
175
+ def stdev(population: false)
176
+ s = data.stddev(nan: true)
177
+ if population
178
+ # Numo::NArray does not support population standard deviation
179
+ n = data.isnan.sum
180
+ s * (n - 1) / n
181
+ else
182
+ s
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end
@@ -0,0 +1,37 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class NMatrixAdapter < BaseAdapter
4
+ VectorAdapters.register(:nmatrix, self)
5
+
6
+ extend Forwardable
7
+ include Enumerable
8
+
9
+ def self.supported?(data)
10
+ defined?(NMatrix) && data.is_a?(NMatrix)
11
+ end
12
+
13
+ def initialize(data)
14
+ @data = check_data(data)
15
+ self.index = index || RangeIndex.new(0 ... length)
16
+ end
17
+
18
+ def compare_data_equality(other)
19
+ case other
20
+ when NMatrixAdapter
21
+ data == other.data
22
+ when ArrayAdapter, DaruVectorAdapter
23
+ data.to_a == other.data.to_a
24
+ when NArrayAdapter, NumpyAdapter, PandasSeriesAdapter
25
+ other.compare_data_equality(self)
26
+ else
27
+ data == other.data.to_a
28
+ end
29
+ end
30
+
31
+ include NameSupport
32
+ include IndexSupport
33
+
34
+ alias length size
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,168 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class NumpyAdapter < BaseAdapter
4
+ VectorAdapters.register(:numpy, self)
5
+
6
+ def self.supported?(data)
7
+ return false unless defined?(Numpy::NDArray)
8
+ case data
9
+ when Numpy::NDArray
10
+ true
11
+ else
12
+ false
13
+ end
14
+ end
15
+
16
+ def initialize(data)
17
+ @data = check_data(data)
18
+ self.index = index || RangeIndex.new(0 ... length)
19
+ end
20
+
21
+ attr_reader :data
22
+
23
+ def_delegator :data, :size, :length
24
+
25
+ def compare_data_equality(other)
26
+ case other
27
+ when NumpyAdapter, PandasSeriesAdapter
28
+ Numpy.all(data == other.data)
29
+ when BaseAdapter
30
+ Numpy.all(data == other.data.to_a)
31
+ else
32
+ false
33
+ end
34
+ end
35
+
36
+ include NameSupport
37
+ include IndexSupport
38
+
39
+ def where(mask)
40
+ mask = check_mask_vector(mask)
41
+ case mask.data
42
+ when Numpy::NDArray,
43
+ ->(x) { defined?(Pandas::Series) && x.is_a?(Pandas::Series) }
44
+ mask_data = Numpy.asarray(mask.data, dtype: :bool)
45
+ masked_data = data[mask_data]
46
+ masked_index = mask_data.nonzero()[0].to_a.map {|i| index[i] }
47
+ else
48
+ masked_data, masked_index = where_in_array(mask)
49
+ masked_data = Numpy.asarray(masked_data, dtype: data.dtype)
50
+ end
51
+ Charty::Vector.new(masked_data, index: masked_index, name: name)
52
+ end
53
+
54
+ def each
55
+ return enum_for(__method__) unless block_given?
56
+
57
+ i, n = 0, data.size
58
+ while i < n
59
+ yield data[i]
60
+ i += 1
61
+ end
62
+ end
63
+
64
+ def empty?
65
+ data.size == 0
66
+ end
67
+
68
+ def boolean?
69
+ builtins = PyCall.builtins
70
+ case
71
+ when builtins.issubclass(data.dtype.type, Numpy.bool_)
72
+ true
73
+ when builtins.issubclass(data.dtype.type, Numpy.object_)
74
+ i, n = 0, data.size
75
+ while i < n
76
+ case data[i]
77
+ when nil, true, false
78
+ # do nothing
79
+ else
80
+ return false
81
+ end
82
+ i += 1
83
+ end
84
+ true
85
+ else
86
+ false
87
+ end
88
+ end
89
+
90
+ def numeric?
91
+ # TODO: Handle object array
92
+ PyCall.builtins.issubclass(data.dtype.type, PyCall.tuple([Numpy.number, Numpy.bool_]))
93
+ end
94
+
95
+ def categorical?
96
+ false
97
+ end
98
+
99
+ def categories
100
+ nil
101
+ end
102
+
103
+ def unique_values
104
+ Numpy.unique(data).to_a
105
+ end
106
+
107
+ def group_by(grouper)
108
+ case grouper
109
+ when Numpy::NDArray,
110
+ ->(x) { defined?(Pandas::Series) && x.is_a?(Pandas::Series) }
111
+ # Nothing todo
112
+ when Charty::Vector
113
+ case grouper.data
114
+ when Numpy::NDArray
115
+ grouper = grouper.data
116
+ else
117
+ grouper = Numpy.asarray(grouper.to_a)
118
+ end
119
+ else
120
+ grouper = Numpy.asarray(Array.try_convert(grouper))
121
+ end
122
+
123
+ group_keys = Numpy.unique(grouper).to_a
124
+ group_keys.map { |g|
125
+ [g, Charty::Vector.new(data[grouper == g])]
126
+ }.to_h
127
+ end
128
+
129
+ def drop_na
130
+ where_is_na = if numeric?
131
+ Numpy.isnan(data)
132
+ else
133
+ (data == nil)
134
+ end
135
+ Charty::Vector.new(data[Numpy.logical_not(where_is_na)])
136
+ end
137
+
138
+ def eq(val)
139
+ Charty::Vector.new((data == val),
140
+ index: index,
141
+ name: name)
142
+ end
143
+
144
+ def notnull
145
+ case
146
+ when PyCall.builtins.issubclass(data.dtype.type, Numpy.object_)
147
+ i, n = 0, length
148
+ notnull_data = Numpy::NDArray.new(n, dtype: :bool)
149
+ while i < n
150
+ notnull_data[i] = ! missing_value?(data[i])
151
+ i += 1
152
+ end
153
+ else
154
+ notnull_data = Numpy.isnan(data)
155
+ end
156
+ Charty::Vector.new(notnull_data, index: index, name: name)
157
+ end
158
+
159
+ def mean
160
+ Numpy.mean(data)
161
+ end
162
+
163
+ def stdev(population: false)
164
+ Numpy.std(data, ddof: population ? 0 : 1)
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,200 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class PandasSeriesAdapter < BaseAdapter
4
+ VectorAdapters.register(:pandas_series, self)
5
+
6
+ def self.supported?(data)
7
+ return false unless defined?(Pandas::Series)
8
+ case data
9
+ when Pandas::Series
10
+ true
11
+ else
12
+ false
13
+ end
14
+ end
15
+
16
+ def initialize(data)
17
+ @data = check_data(data)
18
+ end
19
+
20
+ attr_reader :data
21
+
22
+ def_delegator :data, :size, :length
23
+
24
+ def index
25
+ PandasIndex.new(data.index)
26
+ end
27
+
28
+ def index=(new_index)
29
+ case new_index
30
+ when PandasIndex
31
+ data.index = new_index.values
32
+ when Index
33
+ data.index = new_index.to_a
34
+ else
35
+ data.index = new_index
36
+ end
37
+ end
38
+
39
+ def_delegators :data, :name, :name=
40
+
41
+ def compare_data_equality(other)
42
+ case other
43
+ when PandasSeriesAdapter
44
+ return data.equals(other.data)
45
+ when NumpyAdapter
46
+ other = other.data
47
+ when NArrayAdapter
48
+ case other.data
49
+ when Numo::Bit
50
+ other = other.data.to_a
51
+ other.map! {|x| [false, true][x] }
52
+ else
53
+ other = other.data.to_a
54
+ end
55
+ when BaseAdapter
56
+ other = other.data.to_a
57
+ else
58
+ return false
59
+ end
60
+
61
+ data.equals(Pandas::Series.new(other, index: data.index))
62
+ end
63
+
64
+ def [](key)
65
+ case key
66
+ when Charty::Vector
67
+ where(key)
68
+ else
69
+ data[key]
70
+ end
71
+ end
72
+
73
+ def_delegators :data, :[]=, :to_a
74
+
75
+ def each
76
+ return enum_for(__method__) unless block_given?
77
+
78
+ i, n = 0, data.size
79
+ while i < n
80
+ yield data.iloc[i]
81
+ i += 1
82
+ end
83
+ end
84
+
85
+ def empty?
86
+ data.size == 0
87
+ end
88
+
89
+ # TODO: Reconsider the return value type of values_at
90
+ def values_at(*indices)
91
+ data.take(indices).to_a
92
+ end
93
+
94
+ def where(mask)
95
+ mask = check_mask_vector(mask)
96
+ case mask.data
97
+ when Numpy::NDArray,
98
+ ->(x) { defined?(Pandas::Series) && x.is_a?(Pandas::Series) }
99
+ mask_data = Numpy.asarray(mask.data, dtype: :bool)
100
+ masked_data = data[mask_data]
101
+ masked_index = mask_data.nonzero()[0].to_a.map {|i| index[i] }
102
+ else
103
+ masked_data, masked_index = where_in_array(mask)
104
+ masked_data = Pandas::Series.new(masked_data, dtype: data.dtype)
105
+ end
106
+ Charty::Vector.new(masked_data, index: masked_index, name: name)
107
+ end
108
+
109
+ def where_in_array(mask)
110
+ mask = check_mask_vector(mask)
111
+ masked_data = []
112
+ masked_index = []
113
+ mask.each_with_index do |f, i|
114
+ case f
115
+ when true, 1
116
+ masked_data << data.iloc[i]
117
+ masked_index << index[i]
118
+ end
119
+ end
120
+ return masked_data, masked_index
121
+ end
122
+
123
+ def boolean?
124
+ case
125
+ when Pandas.api.types.is_bool_dtype(data.dtype)
126
+ true
127
+ when Pandas.api.types.is_object_dtype(data.dtype)
128
+ data.isin([nil, false, true]).all()
129
+ else
130
+ false
131
+ end
132
+ end
133
+
134
+ def numeric?
135
+ Pandas.api.types.is_numeric_dtype(data.dtype)
136
+ end
137
+
138
+ def categorical?
139
+ Pandas.api.types.is_categorical_dtype(data.dtype)
140
+ end
141
+
142
+ def categories
143
+ data.cat.categories.to_a if categorical?
144
+ end
145
+
146
+ def unique_values
147
+ data.unique.to_a
148
+ end
149
+
150
+ def group_by(grouper)
151
+ case grouper
152
+ when Pandas::Series
153
+ group_keys = grouper.unique.to_a
154
+ groups = data.groupby(grouper)
155
+ group_keys.map {|g|
156
+ [g, Charty::Vector.new(groups.get_group(g))]
157
+ }.to_h
158
+ when Charty::Vector
159
+ case grouper.adapter
160
+ when self.class
161
+ group_by(grouper.data)
162
+ else
163
+ grouper = Pandas::Series.new(grouper.to_a)
164
+ group_by(grouper)
165
+ end
166
+ else
167
+ grouper = Pandas::Series.new(Array(grouper))
168
+ group_by(grouper)
169
+ end
170
+ end
171
+
172
+ def drop_na
173
+ Charty::Vector.new(data.dropna)
174
+ end
175
+
176
+ def eq(val)
177
+ Charty::Vector.new((data == val),
178
+ index: index,
179
+ name: name)
180
+ end
181
+
182
+ def notnull
183
+ Charty::Vector.new(data.notnull, index: index, name: name)
184
+ end
185
+
186
+ def mean
187
+ data.mean()
188
+ end
189
+
190
+ def stdev(population: false)
191
+ data.std(ddof: population ? 0 : 1)
192
+ end
193
+
194
+ def percentile(q)
195
+ q = q.map {|x| x / 100.0 }
196
+ data.quantile(q)
197
+ end
198
+ end
199
+ end
200
+ end