red_amber 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -159,12 +159,19 @@ module RedAmber
159
159
  @vectors || @vectors = init_instance_vars(:vectors)
160
160
  end
161
161
 
162
- # Returns row indices (0...size) in an Array.
162
+ # Returns row indices (start...(size+start)) in an Array.
163
163
  #
164
+ # @param start [Object]
165
+ # Object which have #succ method.
164
166
  # @return [Array]
165
- # An Array of all indices of rows.
166
- def indices
167
- (0...size).to_a
167
+ # An Array of indices of the row.
168
+ # @example
169
+ # (when self.size == 5)
170
+ # - indices #=> [0, 1, 2, 3, 4]
171
+ # - indices(1) #=> [1, 2, 3, 4, 5]
172
+ # - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
173
+ def indices(start = 0)
174
+ (start..).take(size)
168
175
  end
169
176
  alias_method :indexes, :indices
170
177
 
@@ -225,6 +232,18 @@ module RedAmber
225
232
  g
226
233
  end
227
234
 
235
+ def method_missing(name, *args, &block)
236
+ return v(name) if args.empty?
237
+
238
+ super
239
+ end
240
+
241
+ def respond_to_missing?(name, include_private)
242
+ return true if key?(name)
243
+
244
+ super
245
+ end
246
+
228
247
  private
229
248
 
230
249
  # initialize @variable, @keys, @vectors and return one of them
@@ -154,7 +154,7 @@ module RedAmber
154
154
 
155
155
  def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
156
156
  original = self
157
- indices = size > head + tail ? [*0...head, *(size - tail)...size] : [*0...size]
157
+ indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
158
158
  df = slice(indices).assign do
159
159
  assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
160
160
  vectors.each_with_object(assigner) do |v, a|
@@ -173,12 +173,12 @@ module RedAmber
173
173
  end
174
174
 
175
175
  df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
176
- df = size > head + tail ? df[0, 0, 0...head, 0, -tail..-1] : df[0, 0, 0..-1]
176
+ df = size > head + tail ? df[0, 0, 0..head, -tail..-1] : df[0, 0, 0..-1]
177
177
  df = df.assign do
178
178
  vectors.each_with_object({}) do |v, assigner|
179
179
  vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
180
180
  .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
181
- assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
181
+ assigner[v.key] = original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
182
182
  end
183
183
  end
184
184
 
@@ -5,20 +5,20 @@ module RedAmber
5
5
  module DataFrameReshaping
6
6
  # Transpose a wide DataFrame.
7
7
  #
8
- # @param key [Symbol, FalseClass] key of the index column
8
+ # @param key [Symbol] key of the index column
9
9
  # to transepose into keys.
10
- # If it is false, keys[0] is used.
11
- # @param new_key [Symbol, FalseClass] key name of transposed index column.
12
- # If it is false, :name is used. If it already exists, :name1.succ is used.
10
+ # If it is not specified, keys[0] is used.
11
+ # @param new_key [Symbol] key name of transposed index column.
12
+ # If it is not specified, :N is used. If it already exists, :N1 or :N1.succ is used.
13
13
  # @return [DataFrame] trnsposed DataFrame
14
- def transpose(key: keys.first, new_key: :name)
15
- raise DataFrameArgumentError, "Not include: #{key}" unless keys.include?(key)
14
+ def transpose(key: keys.first, name: :N)
15
+ raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
16
16
 
17
17
  # Find unused name
18
18
  new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
19
- new_key = (:name1..).find { |k| !new_keys.include?(k) } if new_keys.include?(new_key)
19
+ name = (:N1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
20
20
 
21
- hash = { new_key => (keys - [key]) }
21
+ hash = { name => (keys - [key]) }
22
22
  i = keys.index(key)
23
23
  each_row do |h|
24
24
  k = h.values[i]
@@ -33,7 +33,7 @@ module RedAmber
33
33
  # @param name [Symbol, String] key of the column which is come **from values**.
34
34
  # @param value [Symbol, String] key of the column which is come **from values**.
35
35
  # @return [DataFrame] long DataFrame.
36
- def to_long(*keep_keys, name: :name, value: :value)
36
+ def to_long(*keep_keys, name: :N, value: :V)
37
37
  not_included = keep_keys - keys
38
38
  raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
39
39
 
@@ -63,7 +63,7 @@ module RedAmber
63
63
  # @param name [Symbol, String] key of the column which will be expanded **to key names**.
64
64
  # @param value [Symbol, String] key of the column which will be expanded **to values**.
65
65
  # @return [DataFrame] wide DataFrame.
66
- def to_wide(name: :name, value: :value)
66
+ def to_wide(name: :N, value: :V)
67
67
  name = name.to_sym
68
68
  raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
69
69
 
@@ -3,8 +3,8 @@
3
3
  module RedAmber
4
4
  # mix-in for the class DataFrame
5
5
  module DataFrameSelectable
6
- # select variables: [symbol] or [string]
7
- # select observations: [array of index], [range]
6
+ # select columns: [symbol] or [string]
7
+ # select rows: [array of index], [range]
8
8
  def [](*args)
9
9
  args.flatten!
10
10
  raise DataFrameArgumentError, 'Empty dataframe' if empty?
@@ -22,17 +22,17 @@ module RedAmber
22
22
  raise DataFrameArgumentError, "Invalid argument: #{args}"
23
23
  end
24
24
 
25
- # slice and select some observations to create sub DataFrame
25
+ # slice and select rows to create sub DataFrame
26
26
  def slice(*args, &block)
27
27
  slicer = args
28
28
  if block
29
29
  raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
30
30
 
31
- slicer = instance_eval(&block)
31
+ slicer = [instance_eval(&block)]
32
32
  end
33
- slicer = [slicer].flatten
33
+ slicer.flatten!
34
34
 
35
- raise DataFrameArgumentError, 'Empty dataframe' if empty?
35
+ raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
36
36
  return remove_all_values if slicer.empty? || slicer[0].nil?
37
37
 
38
38
  vector = parse_to_vector(slicer)
@@ -46,15 +46,59 @@ module RedAmber
46
46
  raise DataFrameArgumentError, "Invalid argument #{slicer}"
47
47
  end
48
48
 
49
- # remove selected observations to create sub DataFrame
49
+ def slice_by(key, keep_key: false, &block)
50
+ raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
51
+ raise DataFrameArgumentError, 'No block given' unless block
52
+ raise DataFrameArgumentError, "#{key} is no a key of self" unless key?(key)
53
+ return self if key.nil?
54
+
55
+ slicer = instance_eval(&block)
56
+ return DataFrame.new unless slicer
57
+
58
+ if slicer.is_a?(Range)
59
+ from = slicer.begin
60
+ from =
61
+ if from.is_a?(String)
62
+ self[key].index(from)
63
+ elsif from.nil?
64
+ 0
65
+ elsif from < 0
66
+ size + from
67
+ else
68
+ from
69
+ end
70
+ to = slicer.end
71
+ to =
72
+ if to.is_a?(String)
73
+ self[key].index(to)
74
+ elsif to.nil?
75
+ size - 1
76
+ elsif to < 0
77
+ size + to
78
+ else
79
+ to
80
+ end
81
+ slicer = (from..to).to_a
82
+ else
83
+ slicer = slicer.map { |x| x.is_a?(String) ? self[key].index(x) : x }
84
+ end
85
+
86
+ if keep_key
87
+ take(slicer)
88
+ else
89
+ take(slicer).drop(key)
90
+ end
91
+ end
92
+
93
+ # remove selected rows to create remainer DataFrame
50
94
  def remove(*args, &block)
51
95
  remover = args
52
96
  if block
53
97
  raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
54
98
 
55
- remover = instance_eval(&block)
99
+ remover = [instance_eval(&block)]
56
100
  end
57
- remover = [remover].flatten
101
+ remover.flatten!
58
102
 
59
103
  raise DataFrameArgumentError, 'Empty dataframe' if empty?
60
104
  return self if remover.empty? || remover[0].nil?
@@ -9,12 +9,16 @@ module RedAmber
9
9
  if block
10
10
  raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
11
11
 
12
- picker = instance_eval(&block)
12
+ picker = [instance_eval(&block)]
13
13
  end
14
- picker = [picker].flatten
14
+ picker.flatten!
15
15
  return DataFrame.new if picker.empty? || picker == [nil]
16
16
 
17
- picker = keys_by_booleans(picker) if booleans?(picker)
17
+ key_vector = Vector.new(keys)
18
+ picker_vector = parse_to_vector(picker)
19
+
20
+ picker = key_vector.filter(*picker_vector).to_a if picker_vector.boolean?
21
+ picker = key_vector.take(*picker_vector).to_a if picker_vector.numeric?
18
22
 
19
23
  # DataFrame#[] creates a Vector with single key is specified.
20
24
  # DataFrame#pick creates a DataFrame with single key.
@@ -29,12 +33,22 @@ module RedAmber
29
33
  if block
30
34
  raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
31
35
 
32
- dropper = instance_eval(&block)
36
+ dropper = [instance_eval(&block)]
33
37
  end
34
- dropper = [dropper].flatten
35
- dropper = keys_by_booleans(dropper) if booleans?(dropper)
38
+ dropper.flatten!
39
+
40
+ key_vector = Vector.new(keys)
41
+ dropper_vector = parse_to_vector(dropper)
42
+
43
+ picker =
44
+ if dropper_vector.boolean?
45
+ key_vector.filter(*dropper_vector.primitive_invert).each.map(&:to_sym)
46
+ elsif dropper_vector.numeric?
47
+ keys - key_vector.take(*dropper_vector).each.map(&:to_sym)
48
+ else
49
+ keys - dropper
50
+ end
36
51
 
37
- picker = keys - dropper
38
52
  return DataFrame.new if picker.empty?
39
53
 
40
54
  # DataFrame#[] creates a Vector with single key is specified.
@@ -91,10 +105,20 @@ module RedAmber
91
105
 
92
106
  def assign_update(*assigner, &block)
93
107
  if block
94
- raise DataFrameArgumentError, 'Must not specify both arguments and a block' unless assigner.empty?
95
-
96
- assigner = [instance_eval(&block)]
108
+ assigner_from_block = instance_eval(&block)
109
+ assigner =
110
+ if assigner.empty?
111
+ # block only
112
+ [assigner_from_block]
113
+ # If Ruby >= 3.0, one line pattern match can be used
114
+ # assigner_from_block in [Array, *]
115
+ elsif multiple_assigner?(assigner_from_block)
116
+ assigner.zip(assigner_from_block)
117
+ else
118
+ assigner.zip([assigner_from_block])
119
+ end
97
120
  end
121
+
98
122
  case assigner
99
123
  in [] | [nil] | [{}] | [[]]
100
124
  return self
@@ -113,6 +137,8 @@ module RedAmber
113
137
  updater = {}
114
138
  appender = {}
115
139
  key_array_pairs.each do |key, array|
140
+ raise DataFrameArgumentError, "Empty column data: #{key} => nil" if array.nil?
141
+
116
142
  if keys.include? key
117
143
  updater[key] = array
118
144
  else
@@ -153,7 +179,7 @@ module RedAmber
153
179
  data = updater[key]
154
180
  next unless data
155
181
 
156
- raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.size != size
182
+ raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.nil? || data.size != size
157
183
 
158
184
  a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
159
185
  fields[i] = Arrow::Field.new(key, a.value_data_type)
@@ -179,8 +205,13 @@ module RedAmber
179
205
  end
180
206
  end
181
207
 
182
- def keys_by_booleans(booleans)
183
- keys.select.with_index { |_, i| booleans[i] }
208
+ def multiple_assigner?(assigner)
209
+ case assigner
210
+ in [Vector, *] | [Array, *] | [Arrow::Array, *]
211
+ true
212
+ else
213
+ false
214
+ end
184
215
  end
185
216
  end
186
217
  end
@@ -122,7 +122,7 @@ module RedAmber
122
122
  return enum_for(:each) unless block_given?
123
123
 
124
124
  size.times do |i|
125
- yield self[i]
125
+ yield data[i]
126
126
  end
127
127
  end
128
128
 
@@ -34,13 +34,6 @@ module RedAmber
34
34
  end
35
35
  alias_method :std, :sd
36
36
 
37
- # option(s) required
38
- # - index
39
-
40
- # Returns other than value
41
- # - mode
42
- # - tdigest
43
-
44
37
  # Return quantile
45
38
  # 0.5 quantile (median) is returned by default.
46
39
  # Or return quantile for specified probability (prob).
@@ -88,8 +81,9 @@ module RedAmber
88
81
 
89
82
  # [Unary element-wise]: vector.func => vector
90
83
  unary_element_wise =
91
- %i[abs array_sort_indices atan bit_wise_not ceil cos fill_null_backward fill_null_forward floor is_finite
92
- is_inf is_nan is_null is_valid round round_to_multiple sign sin tan trunc unique]
84
+ %i[abs acos asin array_sort_indices atan bit_wise_not ceil cos fill_null_backward \
85
+ fill_null_forward floor is_finite is_inf is_nan is_null is_valid ln log10 log1p log2 \
86
+ round round_to_multiple sign sin tan trunc unique]
93
87
  unary_element_wise.each do |function|
94
88
  define_method(function) do |**options|
95
89
  datum = exec_func_unary(function, options)
@@ -129,16 +123,9 @@ module RedAmber
129
123
  end
130
124
  alias_method :not, :invert
131
125
 
132
- # NaN support needed
133
- # - acos asin ln log10 log1p log2
134
-
135
- # Functions with numerical range check
136
- # - abs_checked acos_checked asin_checked cos_checked ln_checked
137
- # log10_checked log1p_checked log2_checked sin_checked tan_checked
138
-
139
126
  # [Binary element-wise]: vector.func(other) => vector
140
127
  binary_element_wise =
141
- %i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor]
128
+ %i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor logb]
142
129
  binary_element_wise.each do |function|
143
130
  define_method(function) do |other, **options|
144
131
  datum = exec_func_binary(function, other, options)
@@ -162,13 +149,6 @@ module RedAmber
162
149
  end
163
150
  end
164
151
 
165
- # NaN support needed
166
- # - logb
167
-
168
- # Functions with numerical range check
169
- # - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
170
- # shift_left_checked shift_right_checked
171
-
172
152
  # [Binary element-wise with operator]: vector.func(other) => vector
173
153
  binary_element_wise_op = {
174
154
  add: '+',
@@ -216,6 +196,23 @@ module RedAmber
216
196
  [Vector.new(Array(other) * size), self]
217
197
  end
218
198
 
199
+ # < Not implimented yet > ---
200
+
201
+ # option(s) required
202
+ # - index
203
+
204
+ # Returns other than value
205
+ # - mode
206
+ # - tdigest
207
+
208
+ # Functions with numerical range check (unary)
209
+ # - abs_checked acos_checked asin_checked cos_checked ln_checked
210
+ # log10_checked log1p_checked log2_checked sin_checked tan_checked
211
+
212
+ # Functions with numerical range check (binary)
213
+ # - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
214
+ # shift_left_checked shift_right_checked
215
+
219
216
  # (array functions)
220
217
  # dictionary_encode,
221
218
  # partition_nth_indices,
@@ -82,16 +82,17 @@ module RedAmber
82
82
 
83
83
  # @param values [Array, Arrow::Array, Vector]
84
84
  def is_in(*values)
85
- values.flatten!
85
+ self_data = chunked? ? data.pack : data
86
+
86
87
  array =
87
- case values[0]
88
- when Vector
89
- values[0].data
90
- when Arrow::Array
91
- values[0]
88
+ case values
89
+ in [Vector] | [Arrow::Array] | [Arrow::ChunkedArray]
90
+ values[0].to_a
91
+ else
92
+ Array(values).flatten
92
93
  end
93
- array ||= data.class.new(values)
94
- Vector.new(data.is_in(array))
94
+
95
+ Vector.new(self_data.is_in(array))
95
96
  end
96
97
 
97
98
  # Arrow's support required
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- VERSION = '0.2.0'
4
+ VERSION = '0.2.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red_amber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hirokazu SUZUKI (heronshoes)
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-08-15 00:00:00.000000000 Z
11
+ date: 2022-09-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow