charty 0.2.10 → 0.2.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d885d476eadfadd3f76f138707ffaf6166eec006b4528fb19c3acce0b97b1c85
4
- data.tar.gz: 0630f23f19b65177f3b6dc238717a81a019f9dfa40a40c143d71dcc5c5f760f0
3
+ metadata.gz: eac796ba17aeb9a82a312d7099c898a2350abaf34c3fad80ea1afd1483de1797
4
+ data.tar.gz: 8001339bd86377ab3693cc2eeb0a826c04c4ea4e08d58e3a10e99849a273cd58
5
5
  SHA512:
6
- metadata.gz: 535a69b8d794ccbd30b4328f3188c8a8b50c850cfd84285f79b31162296a9d981313e561ea1089a68be3368b7eb1c83ccc3cb2ff511b8f79aef77e544e261bbe
7
- data.tar.gz: 804de2468433db38b5381bcbccab4c28e84c7592bce5c5a98a539ce99c589449759e65ffe7f2e808bd3c5cb5e002ee2219219111561f66f590dbb0ac2c0757bc
6
+ metadata.gz: 3679334bb56834f7fb1067878c60e7601375e05d6adf63e566eb2cd3847a7ce1423bb27d0104f666854c8343840706a8e14dcc12bc8420ae062b8144d3f6ebc2
7
+ data.tar.gz: f4bbec96ae815cd758b67a993355fe5e42a10b91d7ef351e63570ec84fa618e65e4c2df4006f2d3431738c8b82b6a5dcf74d79a9b4ce005f89f18fd2ae88d31d
@@ -20,12 +20,12 @@ module Charty
20
20
  @figure = {
21
21
  type: :bar,
22
22
  bar_pos: bar_pos,
23
- values: values
23
+ values: values,
24
24
  }
25
25
  end
26
26
 
27
- def box_plot(plot_data, positions, **kwargs)
28
- @figure = { type: :box, data: plot_data }
27
+ def box_plot(plot_data, positions, orient:, **kwargs)
28
+ @figure = { type: :box, data: plot_data, orient: orient }
29
29
  end
30
30
 
31
31
  def set_xlabel(label)
@@ -57,7 +57,12 @@ module Charty
57
57
  when :bar
58
58
  ::UnicodePlot.barplot(@layout[:xtick_labels], @figure[:values], xlabel: @layout[:xlabel])
59
59
  when :box
60
- ::UnicodePlot.boxplot(@layout[:xtick_labels], @figure[:data], xlabel: @layout[:xlabel])
60
+ xlabel = if @figure[:orient] == :v
61
+ @layout[:ylabel]
62
+ else
63
+ @layout[:xlabel]
64
+ end
65
+ ::UnicodePlot.boxplot(@layout[:xtick_labels], @figure[:data], xlabel: xlabel)
61
66
  end
62
67
  sio = StringIO.new
63
68
  class << sio
data/lib/charty/index.rb CHANGED
@@ -199,6 +199,15 @@ module Charty
199
199
  end
200
200
  end
201
201
 
202
+ def loc(key)
203
+ case values
204
+ when Pandas::Index
205
+ values.get_loc(key)
206
+ else
207
+ super
208
+ end
209
+ end
210
+
202
211
  def union(other)
203
212
  other = PandasIndex.try_convert(other)
204
213
  # NOTE: Using `sort=False` in pandas.Index#union does not produce pandas.RangeIndex.
@@ -109,9 +109,9 @@ module Charty
109
109
  a = a.sort
110
110
  n = a.size
111
111
  q.map do |x|
112
- x = n * (x / 100.0)
112
+ x = (n-1) * (x / 100.0)
113
113
  i = x.floor
114
- if i == n-1
114
+ if i == x
115
115
  a[i]
116
116
  else
117
117
  t = x - i
@@ -0,0 +1,53 @@
1
+ module Charty
2
+ module TableAdapters
3
+ class ArrowAdapter < BaseAdapter
4
+ TableAdapters.register(:arrow, self)
5
+
6
+ def self.supported?(data)
7
+ defined?(Arrow::Table) && data.is_a?(Arrow::Table)
8
+ end
9
+
10
+ def initialize(data)
11
+ @data = data
12
+ @column_names = @data.columns.map(&:name)
13
+ self.columns = Index.new(@column_names)
14
+ self.index = RangeIndex.new(0 ... length)
15
+ end
16
+
17
+ attr_reader :data
18
+
19
+ def length
20
+ @data.n_rows
21
+ end
22
+
23
+ def column_length
24
+ @column_names.length
25
+ end
26
+
27
+ def compare_data_equality(other)
28
+ case other
29
+ when ArrowAdapter
30
+ data == other.data
31
+ else
32
+ super
33
+ end
34
+ end
35
+
36
+ def [](row, column)
37
+ if row
38
+ @data[column][row]
39
+ else
40
+ case column
41
+ when Array
42
+ Table.new(@data.select_columns(*column))
43
+ else
44
+ column_data = @data[column]
45
+ Vector.new(column_data.data.combine,
46
+ index: index,
47
+ name: column_data.name)
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -75,6 +75,8 @@ module Charty
75
75
  arrays[i] << record[key]
76
76
  end
77
77
  end
78
+ when Vector
79
+ arrays = data
78
80
  when self.class.method(:array?)
79
81
  unsupported_data_format unless data.all?(&self.class.method(:array?))
80
82
  arrays = data.map(&:to_a).transpose
@@ -121,20 +123,15 @@ module Charty
121
123
  index = union_indexes(*indexes)
122
124
 
123
125
  arrays = arrays.map do |array|
124
- case array
125
- when Charty::Vector
126
- array.data
127
- when Hash
128
- raise NotImplementedError
129
- when self.class.method(:array?)
130
- array
131
- else
132
- Array.try_convert(array)
133
- end
126
+ Vector.try_convert(array)
134
127
  end
135
128
 
136
129
  columns = generate_column_names(arrays.length, columns)
137
130
 
131
+ arrays.zip(columns) do |array, column|
132
+ array.name = column.to_sym if array.name.to_s != column
133
+ end
134
+
138
135
  return arrays, columns, index
139
136
  end
140
137
 
@@ -199,9 +196,7 @@ module Charty
199
196
  else
200
197
  @data[str_key]
201
198
  end
202
- # FIXME: Here column_data need to be dupped to
203
- # prevent to overwrite the name of Pandas::Series
204
- Vector.new(column_data.dup, index: index, name: column)
199
+ Vector.new(column_data, index: index, name: column)
205
200
  end
206
201
  end
207
202
 
@@ -216,15 +211,10 @@ module Charty
216
211
  end
217
212
 
218
213
  orig_values = values
219
- case values
220
- when Charty::Vector
221
- values = values.data
222
- else
223
- values = Array.try_convert(values)
224
- end
214
+ values = Vector.try_convert(values)
225
215
  if values.nil?
226
216
  raise ArgumentError,
227
- "`values` must be convertible to Array"
217
+ "`values` must be convertible to Charty::Vector"
228
218
  end
229
219
 
230
220
  if values.length != self.length
@@ -23,3 +23,4 @@ require_relative 'table_adapters/daru_adapter'
23
23
  require_relative 'table_adapters/active_record_adapter'
24
24
  require_relative 'table_adapters/nmatrix_adapter'
25
25
  require_relative 'table_adapters/pandas_adapter'
26
+ require_relative 'table_adapters/arrow_adapter'
data/lib/charty/vector.rb CHANGED
@@ -49,7 +49,7 @@ module Charty
49
49
 
50
50
  alias completecases notnull
51
51
 
52
- def_delegators :adapter, :mean, :stdev
52
+ def_delegators :adapter, :mean, :stdev, :percentile
53
53
 
54
54
  def_delegators :adapter, :scale, :scale_inverse
55
55
 
@@ -30,7 +30,7 @@ module Charty
30
30
  include NameSupport
31
31
  include IndexSupport
32
32
 
33
- def_delegators :data, :values_at
33
+ def_delegators :data, :values_at, :to_a
34
34
 
35
35
  def where(mask)
36
36
  masked_data, masked_index = where_in_array(mask)
@@ -0,0 +1,156 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class ArrowAdapter < BaseAdapter
4
+ VectorAdapters.register(:arrow, self)
5
+
6
+ include Enumerable
7
+ include NameSupport
8
+ include IndexSupport
9
+
10
+ def self.supported?(data)
11
+ (defined?(Arrow::Array) && data.is_a?(Arrow::Array)) ||
12
+ (defined?(Arrow::ChunkedArray) && data.is_a?(Arrow::ChunkedArray))
13
+ end
14
+
15
+ def initialize(data)
16
+ @data = check_data(data)
17
+ self.index = index || RangeIndex.new(0 ... length)
18
+ end
19
+
20
+ def size
21
+ @data.length
22
+ end
23
+
24
+ def empty?
25
+ @data.length.zero?
26
+ end
27
+
28
+ def where(mask)
29
+ mask = check_mask_vector(mask)
30
+ mask_data = mask.data
31
+ unless mask_data.is_a?(Arrow::BooleanArray)
32
+ mask_data = mask.to_a
33
+ mask_data = mask_data.map(&:nonzero?) if mask_data[0].is_a?(Integer)
34
+ mask_data = Arrow::BooleanArray.new(mask_data)
35
+ end
36
+ masked_data = @data.filter(mask_data)
37
+ masked_index = []
38
+ mask_data.to_a.each_with_index do |boolean, i|
39
+ masked_index << index[i] if boolean
40
+ end
41
+ Vector.new(masked_data, index: masked_index, name: name)
42
+ end
43
+
44
+ def boolean?
45
+ case @data
46
+ when Arrow::BooleanArray
47
+ true
48
+ when Arrow::ChunkedArray
49
+ @data.value_data_type.is_a?(Arrow::BooleanDataType)
50
+ else
51
+ false
52
+ end
53
+ end
54
+
55
+ def numeric?
56
+ case @data
57
+ when Arrow::NumericArray
58
+ true
59
+ when Arrow::ChunkedArray
60
+ @data.value_data_type.is_a?(Arrow::NumericDataType)
61
+ else
62
+ false
63
+ end
64
+ end
65
+
66
+ def categorical?
67
+ case @data
68
+ when Arrow::StringArray, Arrow::DictionaryArray
69
+ true
70
+ when Arrow::ChunkedArray
71
+ case @data.value_data_type
72
+ when Arrow::StringArray, Arrow::DictionaryDataType
73
+ true
74
+ else
75
+ false
76
+ end
77
+ else
78
+ false
79
+ end
80
+ end
81
+
82
+ def categories
83
+ if @data.respond_to?(:dictionary)
84
+ dictionary = @data.dictionary
85
+ else
86
+ dictionary = @data.dictionary_encode.dictionary
87
+ end
88
+ dictionary.to_a
89
+ end
90
+
91
+ def unique_values
92
+ @data.unique.to_a
93
+ end
94
+
95
+ def group_by(grouper)
96
+ grouper = Vector.new(grouper) unless grouper.is_a?(Vector)
97
+ group_keys = grouper.unique_values
98
+ grouper_data = grouper.data
99
+ unless grouper_data.is_a?(Arrow::Array)
100
+ grouper_data = Arrow::Array.new(grouper.to_a)
101
+ end
102
+ equal = Arrow::Function.find("equal")
103
+ group_keys.map { |key|
104
+ if key.nil?
105
+ target_vector = Vector.new([nil] * @data.n_nulls)
106
+ else
107
+ mask = equal.execute([grouper_data, key]).value
108
+ target_vector = Vector.new(@data.filter(mask))
109
+ end
110
+ [key, target_vector]
111
+ }.to_h
112
+ end
113
+
114
+ def drop_na
115
+ if @data.n_nulls.zero?
116
+ Vector.new(@data, index: index, name: name)
117
+ else
118
+ data_without_null =
119
+ Arrow::Function.find("drop_null").execute([@data]).value
120
+ Vector.new(data_without_null)
121
+ end
122
+ end
123
+
124
+ def eq(val)
125
+ mask = Arrow::Function.find("equal").execute([@data, val]).value
126
+ Vector.new(mask, index: index, name: name)
127
+ end
128
+
129
+ def notnull
130
+ if @data.n_nulls.zero?
131
+ mask = Arrow::BooleanArray.new([true] * @data.length)
132
+ else
133
+ mask = Arrow::BooleanArray.new(@data.length,
134
+ @data.null_bitmap,
135
+ nil,
136
+ 0)
137
+ end
138
+ Vector.new(mask, index: index, name: name)
139
+ end
140
+
141
+ def mean
142
+ @data.mean
143
+ end
144
+
145
+ def stdev(population: false)
146
+ options = Arrow::VarianceOptions.new
147
+ if population
148
+ options.ddof = 0
149
+ else
150
+ options.ddof = 1
151
+ end
152
+ Arrow::Function.find("stddev").execute([@data], options).value.value
153
+ end
154
+ end
155
+ end
156
+ end
@@ -34,12 +34,8 @@ module Charty
34
34
  case other
35
35
  when DaruVectorAdapter
36
36
  data == other.data
37
- when ArrayAdapter
38
- data.to_a == other.data
39
- when NArrayAdapter, NMatrixAdapter, NumpyAdapter, PandasSeriesAdapter
40
- other.compare_data_equality(self)
41
37
  else
42
- data == other.data.to_a
38
+ to_a == other.to_a
43
39
  end
44
40
  end
45
41
 
@@ -156,7 +152,8 @@ module Charty
156
152
  end
157
153
 
158
154
  def percentile(q)
159
- data.linear_percentile(q)
155
+ a = data.reject_values(*Daru::MISSING_VALUES).to_a
156
+ Statistics.percentile(a, q)
160
157
  end
161
158
  end
162
159
  end
@@ -22,13 +22,22 @@ module Charty
22
22
  when NumpyAdapter, PandasSeriesAdapter
23
23
  other.compare_data_equality(self)
24
24
  else
25
- data == other.data.to_a
25
+ data == other.to_a
26
26
  end
27
27
  end
28
28
 
29
29
  include NameSupport
30
30
  include IndexSupport
31
31
 
32
+ def to_a
33
+ case data
34
+ when Numo::Bit
35
+ map {|bit| bit == 1 }
36
+ else
37
+ super
38
+ end
39
+ end
40
+
32
41
  def where(mask)
33
42
  mask = check_mask_vector(mask)
34
43
  case mask.data
@@ -19,12 +19,8 @@ module Charty
19
19
  case other
20
20
  when NMatrixAdapter
21
21
  data == other.data
22
- when ArrayAdapter, DaruVectorAdapter
23
- data.to_a == other.data.to_a
24
- when NArrayAdapter, NumpyAdapter, PandasSeriesAdapter
25
- other.compare_data_equality(self)
26
22
  else
27
- data == other.data.to_a
23
+ data.to_a == other.data.to_a
28
24
  end
29
25
  end
30
26
 
@@ -163,6 +163,10 @@ module Charty
163
163
  def stdev(population: false)
164
164
  Numpy.std(data, ddof: population ? 0 : 1)
165
165
  end
166
+
167
+ def percentile(q)
168
+ Numpy.nanpercentile(data, q)
169
+ end
166
170
  end
167
171
  end
168
172
  end
@@ -0,0 +1,62 @@
1
+ module Charty
2
+ module VectorAdapters
3
+ class VectorAdapter < BaseAdapter
4
+ VectorAdapters.register(:vector, self)
5
+
6
+ extend Forwardable
7
+ include Enumerable
8
+
9
+ def self.supported?(data)
10
+ data.is_a?(Vector)
11
+ end
12
+
13
+ def initialize(data, index: nil)
14
+ data = check_data(data)
15
+ @data = reduce_nested_vector(data)
16
+ self.index = index || RangeIndex.new(0 ... length)
17
+ end
18
+
19
+ include NameSupport
20
+ include IndexSupport
21
+
22
+ def_delegators :data,
23
+ :boolean?,
24
+ :categorical?,
25
+ :categories,
26
+ :drop_na,
27
+ :each,
28
+ :eq,
29
+ :first_nonnil,
30
+ :group_by,
31
+ :notnull,
32
+ :numeric?,
33
+ :to_a,
34
+ :uniq,
35
+ :unique_values,
36
+ :values_at,
37
+ :where
38
+
39
+ def compare_data_equality(other)
40
+ if other.is_a?(self.class)
41
+ other = reduce_nested_vector(other.data).adapter
42
+ end
43
+ if other.is_a?(self.class)
44
+ @data.adapter.data == other.data
45
+ elsif @data.adapter.respond_to?(:compare_data_equality)
46
+ @data.adapter.compare_data_equality(other)
47
+ elsif other.respond_to?(:compare_data_equality)
48
+ other.compare_data_equality(@data.adapter)
49
+ else
50
+ @data.adapter.to_a == other.to_a
51
+ end
52
+ end
53
+
54
+ private def reduce_nested_vector(vector)
55
+ while vector.adapter.is_a?(self.class)
56
+ vector = vector.adapter.data
57
+ end
58
+ vector
59
+ end
60
+ end
61
+ end
62
+ end
@@ -100,6 +100,10 @@ module Charty
100
100
  Statistics.stdev(data, population: population)
101
101
  end
102
102
 
103
+ def percentile(q)
104
+ Statistics.percentile(data, q)
105
+ end
106
+
103
107
  def log_scale(method)
104
108
  Charty::Vector.new(
105
109
  self.map {|x| Math.log10(x) },
@@ -196,8 +200,10 @@ module Charty
196
200
  end
197
201
 
198
202
  require_relative "vector_adapters/array_adapter"
203
+ require_relative "vector_adapters/arrow_adapter"
199
204
  require_relative "vector_adapters/daru_adapter"
200
205
  require_relative "vector_adapters/narray_adapter"
201
206
  require_relative "vector_adapters/nmatrix_adapter"
202
207
  require_relative "vector_adapters/numpy_adapter"
203
208
  require_relative "vector_adapters/pandas_adapter"
209
+ require_relative "vector_adapters/vector_adapter"
@@ -1,5 +1,5 @@
1
1
  module Charty
2
- VERSION = "0.2.10"
2
+ VERSION = "0.2.11"
3
3
 
4
4
  module Version
5
5
  numbers, TAG = VERSION.split("-")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: charty
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.10
4
+ version: 0.2.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - youchan
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2021-08-18 00:00:00.000000000 Z
13
+ date: 2021-09-10 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: red-colors
@@ -336,6 +336,7 @@ files:
336
336
  - lib/charty/table.rb
337
337
  - lib/charty/table_adapters.rb
338
338
  - lib/charty/table_adapters/active_record_adapter.rb
339
+ - lib/charty/table_adapters/arrow_adapter.rb
339
340
  - lib/charty/table_adapters/base_adapter.rb
340
341
  - lib/charty/table_adapters/daru_adapter.rb
341
342
  - lib/charty/table_adapters/datasets_adapter.rb
@@ -347,11 +348,13 @@ files:
347
348
  - lib/charty/vector.rb
348
349
  - lib/charty/vector_adapters.rb
349
350
  - lib/charty/vector_adapters/array_adapter.rb
351
+ - lib/charty/vector_adapters/arrow_adapter.rb
350
352
  - lib/charty/vector_adapters/daru_adapter.rb
351
353
  - lib/charty/vector_adapters/narray_adapter.rb
352
354
  - lib/charty/vector_adapters/nmatrix_adapter.rb
353
355
  - lib/charty/vector_adapters/numpy_adapter.rb
354
356
  - lib/charty/vector_adapters/pandas_adapter.rb
357
+ - lib/charty/vector_adapters/vector_adapter.rb
355
358
  - lib/charty/version.rb
356
359
  homepage: https://github.com/red-data-tools/charty
357
360
  licenses: