charty 0.2.7 → 0.2.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/charty.gemspec +1 -0
  3. data/examples/bar_plot.rb +19 -0
  4. data/examples/box_plot.rb +17 -0
  5. data/examples/scatter_plot.rb +17 -0
  6. data/images/penguins_body_mass_g_flipper_length_mm_species_scatter_plot.png +0 -0
  7. data/images/penguins_body_mass_g_flipper_length_mm_species_sex_scatter_plot.png +0 -0
  8. data/images/penguins_species_body_mass_g_bar_plot_h.png +0 -0
  9. data/images/penguins_species_body_mass_g_bar_plot_v.png +0 -0
  10. data/images/penguins_species_body_mass_g_box_plot_h.png +0 -0
  11. data/images/penguins_species_body_mass_g_box_plot_v.png +0 -0
  12. data/images/penguins_species_body_mass_g_sex_bar_plot_v.png +0 -0
  13. data/images/penguins_species_body_mass_g_sex_box_plot_v.png +0 -0
  14. data/lib/charty/backends/plotly.rb +53 -22
  15. data/lib/charty/backends/plotly_helpers/notebook_renderer.rb +4 -1
  16. data/lib/charty/backends/pyplot.rb +73 -0
  17. data/lib/charty/backends/unicode_plot.rb +16 -11
  18. data/lib/charty/index.rb +9 -0
  19. data/lib/charty/plot_methods.rb +46 -10
  20. data/lib/charty/plotters/abstract_plotter.rb +41 -9
  21. data/lib/charty/plotters/bar_plotter.rb +39 -0
  22. data/lib/charty/plotters/categorical_plotter.rb +9 -1
  23. data/lib/charty/plotters/distribution_plotter.rb +44 -7
  24. data/lib/charty/plotters/histogram_plotter.rb +97 -35
  25. data/lib/charty/plotters/line_plotter.rb +38 -5
  26. data/lib/charty/plotters/scatter_plotter.rb +4 -2
  27. data/lib/charty/statistics.rb +2 -2
  28. data/lib/charty/table.rb +30 -23
  29. data/lib/charty/table_adapters/arrow_adapter.rb +53 -0
  30. data/lib/charty/table_adapters/base_adapter.rb +88 -0
  31. data/lib/charty/table_adapters/daru_adapter.rb +41 -1
  32. data/lib/charty/table_adapters/hash_adapter.rb +58 -10
  33. data/lib/charty/table_adapters/pandas_adapter.rb +49 -1
  34. data/lib/charty/table_adapters.rb +1 -0
  35. data/lib/charty/vector.rb +30 -2
  36. data/lib/charty/vector_adapters/array_adapter.rb +1 -1
  37. data/lib/charty/vector_adapters/arrow_adapter.rb +156 -0
  38. data/lib/charty/vector_adapters/daru_adapter.rb +3 -6
  39. data/lib/charty/vector_adapters/narray_adapter.rb +10 -1
  40. data/lib/charty/vector_adapters/nmatrix_adapter.rb +1 -5
  41. data/lib/charty/vector_adapters/numpy_adapter.rb +4 -0
  42. data/lib/charty/vector_adapters/pandas_adapter.rb +10 -1
  43. data/lib/charty/vector_adapters/vector_adapter.rb +62 -0
  44. data/lib/charty/vector_adapters.rb +22 -0
  45. data/lib/charty/version.rb +1 -1
  46. metadata +23 -3
@@ -23,3 +23,4 @@ require_relative 'table_adapters/daru_adapter'
23
23
  require_relative 'table_adapters/active_record_adapter'
24
24
  require_relative 'table_adapters/nmatrix_adapter'
25
25
  require_relative 'table_adapters/pandas_adapter'
26
+ require_relative 'table_adapters/arrow_adapter'
data/lib/charty/vector.rb CHANGED
@@ -49,7 +49,33 @@ module Charty
49
49
 
50
50
  alias completecases notnull
51
51
 
52
- def_delegators :adapter, :mean, :stdev
52
+ def_delegators :adapter, :mean, :stdev, :percentile
53
+
54
+ def_delegators :adapter, :scale, :scale_inverse
55
+
56
+ def scale(method)
57
+ case method
58
+ when :linear
59
+ self
60
+ when :log
61
+ adapter.log_scale(method)
62
+ else
63
+ raise ArgumentError,
64
+ "Invalid scaling method: %p" % method
65
+ end
66
+ end
67
+
68
+ def scale_inverse(method)
69
+ case method
70
+ when :linear
71
+ self
72
+ when :log
73
+ adapter.inverse_log_scale(method)
74
+ else
75
+ raise ArgumentError,
76
+ "Invalid scaling method: %p" % method
77
+ end
78
+ end
53
79
 
54
80
  # TODO: write test
55
81
  def categorical_order(order=nil)
@@ -59,7 +85,9 @@ module Charty
59
85
  order = categories
60
86
  else
61
87
  order = unique_values.compact
62
- order.sort! if numeric?
88
+ if numeric?
89
+ order.sort_by! {|x| Util.missing?(x) ? Float::INFINITY : x }
90
+ end
63
91
  end
64
92
  order.compact!
65
93
  end
@@ -30,7 +30,7 @@ module Charty
30
30
  include NameSupport
31
31
  include IndexSupport
32
32
 
33
- def_delegators :data, :values_at
33
+ def_delegators :data, :values_at, :to_a
34
34
 
35
35
  def where(mask)
36
36
  masked_data, masked_index = where_in_array(mask)
@@ -0,0 +1,156 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class ArrowAdapter < BaseAdapter
4
+ VectorAdapters.register(:arrow, self)
5
+
6
+ include Enumerable
7
+ include NameSupport
8
+ include IndexSupport
9
+
10
+ def self.supported?(data)
11
+ (defined?(Arrow::Array) && data.is_a?(Arrow::Array)) ||
12
+ (defined?(Arrow::ChunkedArray) && data.is_a?(Arrow::ChunkedArray))
13
+ end
14
+
15
+ def initialize(data)
16
+ @data = check_data(data)
17
+ self.index = index || RangeIndex.new(0 ... length)
18
+ end
19
+
20
+ def size
21
+ @data.length
22
+ end
23
+
24
+ def empty?
25
+ @data.length.zero?
26
+ end
27
+
28
+ def where(mask)
29
+ mask = check_mask_vector(mask)
30
+ mask_data = mask.data
31
+ unless mask_data.is_a?(Arrow::BooleanArray)
32
+ mask_data = mask.to_a
33
+ mask_data = mask_data.map(&:nonzero?) if mask_data[0].is_a?(Integer)
34
+ mask_data = Arrow::BooleanArray.new(mask_data)
35
+ end
36
+ masked_data = @data.filter(mask_data)
37
+ masked_index = []
38
+ mask_data.to_a.each_with_index do |boolean, i|
39
+ masked_index << index[i] if boolean
40
+ end
41
+ Vector.new(masked_data, index: masked_index, name: name)
42
+ end
43
+
44
+ def boolean?
45
+ case @data
46
+ when Arrow::BooleanArray
47
+ true
48
+ when Arrow::ChunkedArray
49
+ @data.value_data_type.is_a?(Arrow::BooleanDataType)
50
+ else
51
+ false
52
+ end
53
+ end
54
+
55
+ def numeric?
56
+ case @data
57
+ when Arrow::NumericArray
58
+ true
59
+ when Arrow::ChunkedArray
60
+ @data.value_data_type.is_a?(Arrow::NumericDataType)
61
+ else
62
+ false
63
+ end
64
+ end
65
+
66
+ def categorical?
67
+ case @data
68
+ when Arrow::StringArray, Arrow::DictionaryArray
69
+ true
70
+ when Arrow::ChunkedArray
71
+ case @data.value_data_type
72
+ when Arrow::StringArray, Arrow::DictionaryDataType
73
+ true
74
+ else
75
+ false
76
+ end
77
+ else
78
+ false
79
+ end
80
+ end
81
+
82
+ def categories
83
+ if @data.respond_to?(:dictionary)
84
+ dictionary = @data.dictionary
85
+ else
86
+ dictionary = @data.dictionary_encode.dictionary
87
+ end
88
+ dictionary.to_a
89
+ end
90
+
91
+ def unique_values
92
+ @data.unique.to_a
93
+ end
94
+
95
+ def group_by(grouper)
96
+ grouper = Vector.new(grouper) unless grouper.is_a?(Vector)
97
+ group_keys = grouper.unique_values
98
+ grouper_data = grouper.data
99
+ unless grouper_data.is_a?(Arrow::Array)
100
+ grouper_data = Arrow::Array.new(grouper.to_a)
101
+ end
102
+ equal = Arrow::Function.find("equal")
103
+ group_keys.map { |key|
104
+ if key.nil?
105
+ target_vector = Vector.new([nil] * @data.n_nulls)
106
+ else
107
+ mask = equal.execute([grouper_data, key]).value
108
+ target_vector = Vector.new(@data.filter(mask))
109
+ end
110
+ [key, target_vector]
111
+ }.to_h
112
+ end
113
+
114
+ def drop_na
115
+ if @data.n_nulls.zero?
116
+ Vector.new(@data, index: index, name: name)
117
+ else
118
+ data_without_null =
119
+ Arrow::Function.find("drop_null").execute([@data]).value
120
+ Vector.new(data_without_null)
121
+ end
122
+ end
123
+
124
+ def eq(val)
125
+ mask = Arrow::Function.find("equal").execute([@data, val]).value
126
+ Vector.new(mask, index: index, name: name)
127
+ end
128
+
129
+ def notnull
130
+ if @data.n_nulls.zero?
131
+ mask = Arrow::BooleanArray.new([true] * @data.length)
132
+ else
133
+ mask = Arrow::BooleanArray.new(@data.length,
134
+ @data.null_bitmap,
135
+ nil,
136
+ 0)
137
+ end
138
+ Vector.new(mask, index: index, name: name)
139
+ end
140
+
141
+ def mean
142
+ @data.mean
143
+ end
144
+
145
+ def stdev(population: false)
146
+ options = Arrow::VarianceOptions.new
147
+ if population
148
+ options.ddof = 0
149
+ else
150
+ options.ddof = 1
151
+ end
152
+ Arrow::Function.find("stddev").execute([@data], options).value.value
153
+ end
154
+ end
155
+ end
156
+ end
@@ -34,12 +34,8 @@ module Charty
34
34
  case other
35
35
  when DaruVectorAdapter
36
36
  data == other.data
37
- when ArrayAdapter
38
- data.to_a == other.data
39
- when NArrayAdapter, NMatrixAdapter, NumpyAdapter, PandasSeriesAdapter
40
- other.compare_data_equality(self)
41
37
  else
42
- data == other.data.to_a
38
+ to_a == other.to_a
43
39
  end
44
40
  end
45
41
 
@@ -156,7 +152,8 @@ module Charty
156
152
  end
157
153
 
158
154
  def percentile(q)
159
- data.linear_percentile(q)
155
+ a = data.reject_values(*Daru::MISSING_VALUES).to_a
156
+ Statistics.percentile(a, q)
160
157
  end
161
158
  end
162
159
  end
@@ -22,13 +22,22 @@ module Charty
22
22
  when NumpyAdapter, PandasSeriesAdapter
23
23
  other.compare_data_equality(self)
24
24
  else
25
- data == other.data.to_a
25
+ data == other.to_a
26
26
  end
27
27
  end
28
28
 
29
29
  include NameSupport
30
30
  include IndexSupport
31
31
 
32
+ def to_a
33
+ case data
34
+ when Numo::Bit
35
+ map {|bit| bit == 1 }
36
+ else
37
+ super
38
+ end
39
+ end
40
+
32
41
  def where(mask)
33
42
  mask = check_mask_vector(mask)
34
43
  case mask.data
@@ -19,12 +19,8 @@ module Charty
19
19
  case other
20
20
  when NMatrixAdapter
21
21
  data == other.data
22
- when ArrayAdapter, DaruVectorAdapter
23
- data.to_a == other.data.to_a
24
- when NArrayAdapter, NumpyAdapter, PandasSeriesAdapter
25
- other.compare_data_equality(self)
26
22
  else
27
- data == other.data.to_a
23
+ data.to_a == other.data.to_a
28
24
  end
29
25
  end
30
26
 
@@ -163,6 +163,10 @@ module Charty
163
163
  def stdev(population: false)
164
164
  Numpy.std(data, ddof: population ? 0 : 1)
165
165
  end
166
+
167
+ def percentile(q)
168
+ Numpy.nanpercentile(data, q)
169
+ end
166
170
  end
167
171
  end
168
172
  end
@@ -152,7 +152,8 @@ module Charty
152
152
  group_keys = grouper.unique.to_a
153
153
  groups = data.groupby(grouper)
154
154
  group_keys.map {|g|
155
- [g, Charty::Vector.new(groups.get_group(g))]
155
+ g_vals = groups.get_group(g) rescue []
156
+ [g, Charty::Vector.new(g_vals)]
156
157
  }.to_h
157
158
  when Charty::Vector
158
159
  case grouper.adapter
@@ -194,6 +195,14 @@ module Charty
194
195
  q = q.map {|x| x / 100.0 }
195
196
  data.quantile(q)
196
197
  end
198
+
199
+ def log_scale(method)
200
+ Charty::Vector.new(Numpy.log10(data))
201
+ end
202
+
203
+ def inverse_log_scale(method)
204
+ Charty::Vector.new(Numpy.power(10, data))
205
+ end
197
206
  end
198
207
  end
199
208
  end
@@ -0,0 +1,62 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class VectorAdapter < BaseAdapter
4
+ VectorAdapters.register(:vector, self)
5
+
6
+ extend Forwardable
7
+ include Enumerable
8
+
9
+ def self.supported?(data)
10
+ data.is_a?(Vector)
11
+ end
12
+
13
+ def initialize(data, index: nil)
14
+ data = check_data(data)
15
+ @data = reduce_nested_vector(data)
16
+ self.index = index || RangeIndex.new(0 ... length)
17
+ end
18
+
19
+ include NameSupport
20
+ include IndexSupport
21
+
22
+ def_delegators :data,
23
+ :boolean?,
24
+ :categorical?,
25
+ :categories,
26
+ :drop_na,
27
+ :each,
28
+ :eq,
29
+ :first_nonnil,
30
+ :group_by,
31
+ :notnull,
32
+ :numeric?,
33
+ :to_a,
34
+ :uniq,
35
+ :unique_values,
36
+ :values_at,
37
+ :where
38
+
39
+ def compare_data_equality(other)
40
+ if other.is_a?(self.class)
41
+ other = reduce_nested_vector(other.data).adapter
42
+ end
43
+ if other.is_a?(self.class)
44
+ @data.adapter.data == other.data
45
+ elsif @data.adapter.respond_to?(:compare_data_equality)
46
+ @data.adapter.compare_data_equality(other)
47
+ elsif other.respond_to?(:compare_data_equality)
48
+ other.compare_data_equality(@data.adapter)
49
+ else
50
+ @data.adapter.to_a == other.to_a
51
+ end
52
+ end
53
+
54
+ private def reduce_nested_vector(vector)
55
+ while vector.adapter.is_a?(self.class)
56
+ vector = vector.adapter.data
57
+ end
58
+ vector
59
+ end
60
+ end
61
+ end
62
+ end
@@ -99,6 +99,26 @@ module Charty
99
99
  def stdev(population: false)
100
100
  Statistics.stdev(data, population: population)
101
101
  end
102
+
103
+ def percentile(q)
104
+ Statistics.percentile(data, q)
105
+ end
106
+
107
+ def log_scale(method)
108
+ Charty::Vector.new(
109
+ self.map {|x| Math.log10(x) },
110
+ index: index,
111
+ name: name
112
+ )
113
+ end
114
+
115
+ def inverse_log_scale(method)
116
+ Charty::Vector.new(
117
+ self.map {|x| 10.0 ** x },
118
+ index: index,
119
+ name: name
120
+ )
121
+ end
102
122
  end
103
123
 
104
124
  module NameSupport
@@ -180,8 +200,10 @@ module Charty
180
200
  end
181
201
 
182
202
  require_relative "vector_adapters/array_adapter"
203
+ require_relative "vector_adapters/arrow_adapter"
183
204
  require_relative "vector_adapters/daru_adapter"
184
205
  require_relative "vector_adapters/narray_adapter"
185
206
  require_relative "vector_adapters/nmatrix_adapter"
186
207
  require_relative "vector_adapters/numpy_adapter"
187
208
  require_relative "vector_adapters/pandas_adapter"
209
+ require_relative "vector_adapters/vector_adapter"
@@ -1,5 +1,5 @@
1
1
  module Charty
2
- VERSION = "0.2.7"
2
+ VERSION = "0.2.11"
3
3
 
4
4
  module Version
5
5
  numbers, TAG = VERSION.split("-")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: charty
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.2.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - youchan
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2021-06-21 00:00:00.000000000 Z
13
+ date: 2021-09-10 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: red-colors
@@ -208,6 +208,20 @@ dependencies:
208
208
  - - ">="
209
209
  - !ruby/object:Gem::Version
210
210
  version: 0.7.0
211
+ - !ruby/object:Gem::Dependency
212
+ name: csv
213
+ requirement: !ruby/object:Gem::Requirement
214
+ requirements:
215
+ - - ">="
216
+ - !ruby/object:Gem::Version
217
+ version: '0'
218
+ type: :development
219
+ prerelease: false
220
+ version_requirements: !ruby/object:Gem::Requirement
221
+ requirements:
222
+ - - ">="
223
+ - !ruby/object:Gem::Version
224
+ version: '0'
211
225
  description: Visualizing your data in a simple way.
212
226
  email:
213
227
  - youchan01@gmail.com
@@ -232,6 +246,8 @@ files:
232
246
  - charty.gemspec
233
247
  - examples/Gemfile
234
248
  - examples/active_record.ipynb
249
+ - examples/bar_plot.rb
250
+ - examples/box_plot.rb
235
251
  - examples/daru.ipynb
236
252
  - examples/iris_dataset.ipynb
237
253
  - examples/nmatrix.ipynb
@@ -270,6 +286,7 @@ files:
270
286
  - examples/sample_images/subplot_pyplot.png
271
287
  - examples/sample_pyplot.ipynb
272
288
  - examples/sample_rubyplot.ipynb
289
+ - examples/scatter_plot.rb
273
290
  - images/design_concept.png
274
291
  - images/penguins_body_mass_g_flipper_length_mm_scatter_plot.png
275
292
  - images/penguins_body_mass_g_flipper_length_mm_species_scatter_plot.png
@@ -319,6 +336,7 @@ files:
319
336
  - lib/charty/table.rb
320
337
  - lib/charty/table_adapters.rb
321
338
  - lib/charty/table_adapters/active_record_adapter.rb
339
+ - lib/charty/table_adapters/arrow_adapter.rb
322
340
  - lib/charty/table_adapters/base_adapter.rb
323
341
  - lib/charty/table_adapters/daru_adapter.rb
324
342
  - lib/charty/table_adapters/datasets_adapter.rb
@@ -330,11 +348,13 @@ files:
330
348
  - lib/charty/vector.rb
331
349
  - lib/charty/vector_adapters.rb
332
350
  - lib/charty/vector_adapters/array_adapter.rb
351
+ - lib/charty/vector_adapters/arrow_adapter.rb
333
352
  - lib/charty/vector_adapters/daru_adapter.rb
334
353
  - lib/charty/vector_adapters/narray_adapter.rb
335
354
  - lib/charty/vector_adapters/nmatrix_adapter.rb
336
355
  - lib/charty/vector_adapters/numpy_adapter.rb
337
356
  - lib/charty/vector_adapters/pandas_adapter.rb
357
+ - lib/charty/vector_adapters/vector_adapter.rb
338
358
  - lib/charty/version.rb
339
359
  homepage: https://github.com/red-data-tools/charty
340
360
  licenses:
@@ -355,7 +375,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
355
375
  - !ruby/object:Gem::Version
356
376
  version: '0'
357
377
  requirements: []
358
- rubygems_version: 3.2.3
378
+ rubygems_version: 3.2.23
359
379
  signing_key:
360
380
  specification_version: 4
361
381
  summary: Visualizing your data in a simple way.