red_amber 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -159,12 +159,19 @@ module RedAmber
159
159
  @vectors || @vectors = init_instance_vars(:vectors)
160
160
  end
161
161
 
162
- # Returns row indices (0...size) in an Array.
162
+ # Returns row indices (start...(size+start)) in an Array.
163
163
  #
164
+ # @param start [Object]
165
+ # Object which have #succ method.
164
166
  # @return [Array]
165
- # An Array of all indices of rows.
166
- def indices
167
- (0...size).to_a
167
+ # An Array of indices of the row.
168
+ # @example
169
+ # (when self.size == 5)
170
+ # - indices #=> [0, 1, 2, 3, 4]
171
+ # - indices(1) #=> [1, 2, 3, 4, 5]
172
+ # - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
173
+ def indices(start = 0)
174
+ (start..).take(size)
168
175
  end
169
176
  alias_method :indexes, :indices
170
177
 
@@ -225,6 +232,18 @@ module RedAmber
225
232
  g
226
233
  end
227
234
 
235
+ def method_missing(name, *args, &block)
236
+ return v(name) if args.empty?
237
+
238
+ super
239
+ end
240
+
241
+ def respond_to_missing?(name, include_private)
242
+ return true if key?(name)
243
+
244
+ super
245
+ end
246
+
228
247
  private
229
248
 
230
249
  # initialize @variable, @keys, @vectors and return one of them
@@ -154,7 +154,7 @@ module RedAmber
154
154
 
155
155
  def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
156
156
  original = self
157
- indices = size > head + tail ? [*0...head, *(size - tail)...size] : [*0...size]
157
+ indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
158
158
  df = slice(indices).assign do
159
159
  assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
160
160
  vectors.each_with_object(assigner) do |v, a|
@@ -173,12 +173,12 @@ module RedAmber
173
173
  end
174
174
 
175
175
  df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
176
- df = size > head + tail ? df[0, 0, 0...head, 0, -tail..-1] : df[0, 0, 0..-1]
176
+ df = size > head + tail ? df[0, 0, 0..head, -tail..-1] : df[0, 0, 0..-1]
177
177
  df = df.assign do
178
178
  vectors.each_with_object({}) do |v, assigner|
179
179
  vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
180
180
  .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
181
- assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
181
+ assigner[v.key] = original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
182
182
  end
183
183
  end
184
184
 
@@ -5,20 +5,20 @@ module RedAmber
5
5
  module DataFrameReshaping
6
6
  # Transpose a wide DataFrame.
7
7
  #
8
- # @param key [Symbol, FalseClass] key of the index column
8
+ # @param key [Symbol] key of the index column
9
9
  # to transepose into keys.
10
- # If it is false, keys[0] is used.
11
- # @param new_key [Symbol, FalseClass] key name of transposed index column.
12
- # If it is false, :name is used. If it already exists, :name1.succ is used.
10
+ # If it is not specified, keys[0] is used.
11
+ # @param new_key [Symbol] key name of transposed index column.
12
+ # If it is not specified, :N is used. If it already exists, :N1 or :N1.succ is used.
13
13
  # @return [DataFrame] trnsposed DataFrame
14
- def transpose(key: keys.first, new_key: :name)
15
- raise DataFrameArgumentError, "Not include: #{key}" unless keys.include?(key)
14
+ def transpose(key: keys.first, name: :N)
15
+ raise DataFrameArgumentError, "Self does not include: #{key}" unless keys.include?(key)
16
16
 
17
17
  # Find unused name
18
18
  new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
19
- new_key = (:name1..).find { |k| !new_keys.include?(k) } if new_keys.include?(new_key)
19
+ name = (:N1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)
20
20
 
21
- hash = { new_key => (keys - [key]) }
21
+ hash = { name => (keys - [key]) }
22
22
  i = keys.index(key)
23
23
  each_row do |h|
24
24
  k = h.values[i]
@@ -33,7 +33,7 @@ module RedAmber
33
33
  # @param name [Symbol, String] key of the column which is come **from values**.
34
34
  # @param value [Symbol, String] key of the column which is come **from values**.
35
35
  # @return [DataFrame] long DataFrame.
36
- def to_long(*keep_keys, name: :name, value: :value)
36
+ def to_long(*keep_keys, name: :N, value: :V)
37
37
  not_included = keep_keys - keys
38
38
  raise DataFrameArgumentError, "Not have keys #{not_included}" unless not_included.empty?
39
39
 
@@ -63,7 +63,7 @@ module RedAmber
63
63
  # @param name [Symbol, String] key of the column which will be expanded **to key names**.
64
64
  # @param value [Symbol, String] key of the column which will be expanded **to values**.
65
65
  # @return [DataFrame] wide DataFrame.
66
- def to_wide(name: :name, value: :value)
66
+ def to_wide(name: :N, value: :V)
67
67
  name = name.to_sym
68
68
  raise DataFrameArgumentError, "Invalid key: #{name}" unless keys.include?(name)
69
69
 
@@ -3,8 +3,8 @@
3
3
  module RedAmber
4
4
  # mix-in for the class DataFrame
5
5
  module DataFrameSelectable
6
- # select variables: [symbol] or [string]
7
- # select observations: [array of index], [range]
6
+ # select columns: [symbol] or [string]
7
+ # select rows: [array of index], [range]
8
8
  def [](*args)
9
9
  args.flatten!
10
10
  raise DataFrameArgumentError, 'Empty dataframe' if empty?
@@ -22,17 +22,17 @@ module RedAmber
22
22
  raise DataFrameArgumentError, "Invalid argument: #{args}"
23
23
  end
24
24
 
25
- # slice and select some observations to create sub DataFrame
25
+ # slice and select rows to create sub DataFrame
26
26
  def slice(*args, &block)
27
27
  slicer = args
28
28
  if block
29
29
  raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
30
30
 
31
- slicer = instance_eval(&block)
31
+ slicer = [instance_eval(&block)]
32
32
  end
33
- slicer = [slicer].flatten
33
+ slicer.flatten!
34
34
 
35
- raise DataFrameArgumentError, 'Empty dataframe' if empty?
35
+ raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
36
36
  return remove_all_values if slicer.empty? || slicer[0].nil?
37
37
 
38
38
  vector = parse_to_vector(slicer)
@@ -46,15 +46,59 @@ module RedAmber
46
46
  raise DataFrameArgumentError, "Invalid argument #{slicer}"
47
47
  end
48
48
 
49
- # remove selected observations to create sub DataFrame
49
+ def slice_by(key, keep_key: false, &block)
50
+ raise DataFrameArgumentError, 'Self is an empty dataframe' if empty?
51
+ raise DataFrameArgumentError, 'No block given' unless block
52
+ raise DataFrameArgumentError, "#{key} is no a key of self" unless key?(key)
53
+ return self if key.nil?
54
+
55
+ slicer = instance_eval(&block)
56
+ return DataFrame.new unless slicer
57
+
58
+ if slicer.is_a?(Range)
59
+ from = slicer.begin
60
+ from =
61
+ if from.is_a?(String)
62
+ self[key].index(from)
63
+ elsif from.nil?
64
+ 0
65
+ elsif from < 0
66
+ size + from
67
+ else
68
+ from
69
+ end
70
+ to = slicer.end
71
+ to =
72
+ if to.is_a?(String)
73
+ self[key].index(to)
74
+ elsif to.nil?
75
+ size - 1
76
+ elsif to < 0
77
+ size + to
78
+ else
79
+ to
80
+ end
81
+ slicer = (from..to).to_a
82
+ else
83
+ slicer = slicer.map { |x| x.is_a?(String) ? self[key].index(x) : x }
84
+ end
85
+
86
+ if keep_key
87
+ take(slicer)
88
+ else
89
+ take(slicer).drop(key)
90
+ end
91
+ end
92
+
93
+ # remove selected rows to create remainer DataFrame
50
94
  def remove(*args, &block)
51
95
  remover = args
52
96
  if block
53
97
  raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
54
98
 
55
- remover = instance_eval(&block)
99
+ remover = [instance_eval(&block)]
56
100
  end
57
- remover = [remover].flatten
101
+ remover.flatten!
58
102
 
59
103
  raise DataFrameArgumentError, 'Empty dataframe' if empty?
60
104
  return self if remover.empty? || remover[0].nil?
@@ -9,12 +9,16 @@ module RedAmber
9
9
  if block
10
10
  raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
11
11
 
12
- picker = instance_eval(&block)
12
+ picker = [instance_eval(&block)]
13
13
  end
14
- picker = [picker].flatten
14
+ picker.flatten!
15
15
  return DataFrame.new if picker.empty? || picker == [nil]
16
16
 
17
- picker = keys_by_booleans(picker) if booleans?(picker)
17
+ key_vector = Vector.new(keys)
18
+ picker_vector = parse_to_vector(picker)
19
+
20
+ picker = key_vector.filter(*picker_vector).to_a if picker_vector.boolean?
21
+ picker = key_vector.take(*picker_vector).to_a if picker_vector.numeric?
18
22
 
19
23
  # DataFrame#[] creates a Vector with single key is specified.
20
24
  # DataFrame#pick creates a DataFrame with single key.
@@ -29,12 +33,22 @@ module RedAmber
29
33
  if block
30
34
  raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
31
35
 
32
- dropper = instance_eval(&block)
36
+ dropper = [instance_eval(&block)]
33
37
  end
34
- dropper = [dropper].flatten
35
- dropper = keys_by_booleans(dropper) if booleans?(dropper)
38
+ dropper.flatten!
39
+
40
+ key_vector = Vector.new(keys)
41
+ dropper_vector = parse_to_vector(dropper)
42
+
43
+ picker =
44
+ if dropper_vector.boolean?
45
+ key_vector.filter(*dropper_vector.primitive_invert).each.map(&:to_sym)
46
+ elsif dropper_vector.numeric?
47
+ keys - key_vector.take(*dropper_vector).each.map(&:to_sym)
48
+ else
49
+ keys - dropper
50
+ end
36
51
 
37
- picker = keys - dropper
38
52
  return DataFrame.new if picker.empty?
39
53
 
40
54
  # DataFrame#[] creates a Vector with single key is specified.
@@ -91,10 +105,20 @@ module RedAmber
91
105
 
92
106
  def assign_update(*assigner, &block)
93
107
  if block
94
- raise DataFrameArgumentError, 'Must not specify both arguments and a block' unless assigner.empty?
95
-
96
- assigner = [instance_eval(&block)]
108
+ assigner_from_block = instance_eval(&block)
109
+ assigner =
110
+ if assigner.empty?
111
+ # block only
112
+ [assigner_from_block]
113
+ # If Ruby >= 3.0, one line pattern match can be used
114
+ # assigner_from_block in [Array, *]
115
+ elsif multiple_assigner?(assigner_from_block)
116
+ assigner.zip(assigner_from_block)
117
+ else
118
+ assigner.zip([assigner_from_block])
119
+ end
97
120
  end
121
+
98
122
  case assigner
99
123
  in [] | [nil] | [{}] | [[]]
100
124
  return self
@@ -113,6 +137,8 @@ module RedAmber
113
137
  updater = {}
114
138
  appender = {}
115
139
  key_array_pairs.each do |key, array|
140
+ raise DataFrameArgumentError, "Empty column data: #{key} => nil" if array.nil?
141
+
116
142
  if keys.include? key
117
143
  updater[key] = array
118
144
  else
@@ -153,7 +179,7 @@ module RedAmber
153
179
  data = updater[key]
154
180
  next unless data
155
181
 
156
- raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.size != size
182
+ raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})" if data.nil? || data.size != size
157
183
 
158
184
  a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
159
185
  fields[i] = Arrow::Field.new(key, a.value_data_type)
@@ -179,8 +205,13 @@ module RedAmber
179
205
  end
180
206
  end
181
207
 
182
- def keys_by_booleans(booleans)
183
- keys.select.with_index { |_, i| booleans[i] }
208
+ def multiple_assigner?(assigner)
209
+ case assigner
210
+ in [Vector, *] | [Array, *] | [Arrow::Array, *]
211
+ true
212
+ else
213
+ false
214
+ end
184
215
  end
185
216
  end
186
217
  end
@@ -122,7 +122,7 @@ module RedAmber
122
122
  return enum_for(:each) unless block_given?
123
123
 
124
124
  size.times do |i|
125
- yield self[i]
125
+ yield data[i]
126
126
  end
127
127
  end
128
128
 
@@ -34,13 +34,6 @@ module RedAmber
34
34
  end
35
35
  alias_method :std, :sd
36
36
 
37
- # option(s) required
38
- # - index
39
-
40
- # Returns other than value
41
- # - mode
42
- # - tdigest
43
-
44
37
  # Return quantile
45
38
  # 0.5 quantile (median) is returned by default.
46
39
  # Or return quantile for specified probability (prob).
@@ -88,8 +81,9 @@ module RedAmber
88
81
 
89
82
  # [Unary element-wise]: vector.func => vector
90
83
  unary_element_wise =
91
- %i[abs array_sort_indices atan bit_wise_not ceil cos fill_null_backward fill_null_forward floor is_finite
92
- is_inf is_nan is_null is_valid round round_to_multiple sign sin tan trunc unique]
84
+ %i[abs acos asin array_sort_indices atan bit_wise_not ceil cos fill_null_backward \
85
+ fill_null_forward floor is_finite is_inf is_nan is_null is_valid ln log10 log1p log2 \
86
+ round round_to_multiple sign sin tan trunc unique]
93
87
  unary_element_wise.each do |function|
94
88
  define_method(function) do |**options|
95
89
  datum = exec_func_unary(function, options)
@@ -129,16 +123,9 @@ module RedAmber
129
123
  end
130
124
  alias_method :not, :invert
131
125
 
132
- # NaN support needed
133
- # - acos asin ln log10 log1p log2
134
-
135
- # Functions with numerical range check
136
- # - abs_checked acos_checked asin_checked cos_checked ln_checked
137
- # log10_checked log1p_checked log2_checked sin_checked tan_checked
138
-
139
126
  # [Binary element-wise]: vector.func(other) => vector
140
127
  binary_element_wise =
141
- %i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor]
128
+ %i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor logb]
142
129
  binary_element_wise.each do |function|
143
130
  define_method(function) do |other, **options|
144
131
  datum = exec_func_binary(function, other, options)
@@ -162,13 +149,6 @@ module RedAmber
162
149
  end
163
150
  end
164
151
 
165
- # NaN support needed
166
- # - logb
167
-
168
- # Functions with numerical range check
169
- # - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
170
- # shift_left_checked shift_right_checked
171
-
172
152
  # [Binary element-wise with operator]: vector.func(other) => vector
173
153
  binary_element_wise_op = {
174
154
  add: '+',
@@ -216,6 +196,23 @@ module RedAmber
216
196
  [Vector.new(Array(other) * size), self]
217
197
  end
218
198
 
199
+ # < Not implimented yet > ---
200
+
201
+ # option(s) required
202
+ # - index
203
+
204
+ # Returns other than value
205
+ # - mode
206
+ # - tdigest
207
+
208
+ # Functions with numerical range check (unary)
209
+ # - abs_checked acos_checked asin_checked cos_checked ln_checked
210
+ # log10_checked log1p_checked log2_checked sin_checked tan_checked
211
+
212
+ # Functions with numerical range check (binary)
213
+ # - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
214
+ # shift_left_checked shift_right_checked
215
+
219
216
  # (array functions)
220
217
  # dictionary_encode,
221
218
  # partition_nth_indices,
@@ -82,16 +82,17 @@ module RedAmber
82
82
 
83
83
  # @param values [Array, Arrow::Array, Vector]
84
84
  def is_in(*values)
85
- values.flatten!
85
+ self_data = chunked? ? data.pack : data
86
+
86
87
  array =
87
- case values[0]
88
- when Vector
89
- values[0].data
90
- when Arrow::Array
91
- values[0]
88
+ case values
89
+ in [Vector] | [Arrow::Array] | [Arrow::ChunkedArray]
90
+ values[0].to_a
91
+ else
92
+ Array(values).flatten
92
93
  end
93
- array ||= data.class.new(values)
94
- Vector.new(data.is_in(array))
94
+
95
+ Vector.new(self_data.is_in(array))
95
96
  end
96
97
 
97
98
  # Arrow's support required
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- VERSION = '0.2.0'
4
+ VERSION = '0.2.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red_amber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hirokazu SUZUKI (heronshoes)
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-08-15 00:00:00.000000000 Z
11
+ date: 2022-09-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow