red_amber 0.2.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +133 -51
- data/.yardopts +2 -0
- data/CHANGELOG.md +203 -1
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +61 -45
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +63 -0
- data/doc/DataFrame.md +35 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +295 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +537 -68
- data/lib/red_amber/data_frame_combinable.rb +776 -123
- data/lib/red_amber/data_frame_displayable.rb +248 -18
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +81 -10
- data/lib/red_amber/data_frame_reshaping.rb +216 -21
- data/lib/red_amber/data_frame_selectable.rb +781 -120
- data/lib/red_amber/data_frame_variable_operation.rb +561 -85
- data/lib/red_amber/group.rb +195 -21
- data/lib/red_amber/helper.rb +114 -32
- data/lib/red_amber/refinements.rb +206 -0
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +435 -58
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +321 -69
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +397 -24
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +15 -1
- data/red_amber.gemspec +4 -3
- metadata +19 -11
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -294
@@ -1,73 +1,373 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Mix-in for the class DataFrame
|
5
5
|
module DataFrameVariableOperation
|
6
|
-
#
|
6
|
+
# Array is refined
|
7
|
+
using RefineArray
|
8
|
+
|
9
|
+
# Select variables (columns) to create a new DataFrame.
|
10
|
+
#
|
11
|
+
# @note if a single key is specified, DataFrame#pick generates a DataFrame.
|
12
|
+
# On the other hand, DataFrame#[] generates a Vector.
|
13
|
+
#
|
14
|
+
# @overload pick(keys)
|
15
|
+
# Pick up variables by Symbol(s) or String(s).
|
16
|
+
#
|
17
|
+
# @param keys [Symbol, String, <Symbol, String>]
|
18
|
+
# key name(s) of variables to pick.
|
19
|
+
# @return [DataFrame]
|
20
|
+
# picked DataFrame.
|
21
|
+
# @example Pick up by a key
|
22
|
+
# languages
|
23
|
+
#
|
24
|
+
# # =>
|
25
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x00000000000cfd8c>
|
26
|
+
# Language Creator Released
|
27
|
+
# <string> <string> <uint16>
|
28
|
+
# 0 Ruby Yukihiro Matsumoto 1995
|
29
|
+
# 1 Python Guido van Rossum 1991
|
30
|
+
# 2 R Ross Ihaka and Robert Gentleman 1993
|
31
|
+
# 3 Rust Graydon Hoare 2001
|
32
|
+
#
|
33
|
+
# languages.pick(:Language)
|
34
|
+
#
|
35
|
+
# # =>
|
36
|
+
# #<RedAmber::DataFrame : 4 x 1 Vector, 0x0000000000113d20>
|
37
|
+
# Language
|
38
|
+
# <string>
|
39
|
+
# 0 Ruby
|
40
|
+
# 1 Python
|
41
|
+
# 2 R
|
42
|
+
# 3 Rust
|
43
|
+
#
|
44
|
+
# languages[:Language]
|
45
|
+
#
|
46
|
+
# # =>
|
47
|
+
# #<RedAmber::Vector(:string, size=4):0x000000000010359c>
|
48
|
+
# ["Ruby", "Python", "R", "Rust"]
|
49
|
+
#
|
50
|
+
# @overload pick(booleans)
|
51
|
+
# Pick up variables by booleans.
|
52
|
+
#
|
53
|
+
# @param booleans [<Booleans, nil>, Vector]
|
54
|
+
# boolean array or vecctor to pick up variables at true.
|
55
|
+
# @return [DataFrame]
|
56
|
+
# picked DataFrame.
|
57
|
+
# @example Pick up by booleans
|
58
|
+
# languages.pick(true, true, false)
|
59
|
+
#
|
60
|
+
# # =>
|
61
|
+
# #<RedAmber::DataFrame : 4 x 2 Vectors, 0x0000000000066a1c>
|
62
|
+
# Language Creator
|
63
|
+
# <string> <string>
|
64
|
+
# 0 Ruby Yukihiro Matsumoto
|
65
|
+
# 1 Python Guido van Rossum
|
66
|
+
# 2 R Ross Ihaka and Robert Gentleman
|
67
|
+
# 3 Rust Graydon Hoare
|
68
|
+
#
|
69
|
+
# is_string = languages.vectors.map(&:string?) # [true, true, false]
|
70
|
+
# languages.pick(is_string)
|
71
|
+
# # =>
|
72
|
+
# (same as above)
|
73
|
+
#
|
74
|
+
# @overload pick(indices)
|
75
|
+
# Pick up variables by column indices.
|
76
|
+
#
|
77
|
+
# @param indices [Integer, Float, Range<Integer>, Vector, Arrow::Array]
|
78
|
+
# numeric array to pick up variables by column index.
|
79
|
+
# @return [DataFrame]
|
80
|
+
# picked DataFrame.
|
81
|
+
# @example Pick up by indices
|
82
|
+
# languages.pick(0, 2, 1)
|
83
|
+
#
|
84
|
+
# # =>
|
85
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000011cfb0>
|
86
|
+
# Language Released Creator
|
87
|
+
# <string> <uint16> <string>
|
88
|
+
# 0 Ruby 1995 Yukihiro Matsumoto
|
89
|
+
# 1 Python 1991 Guido van Rossum
|
90
|
+
# 2 R 1993 Ross Ihaka and Robert Gentleman
|
91
|
+
# 3 Rust 2001 Graydon Hoare
|
92
|
+
#
|
93
|
+
# @overload pick
|
94
|
+
# Pick up variables by the yielded value from the block.
|
95
|
+
# @note Arguments and a block cannot be used simultaneously.
|
96
|
+
#
|
97
|
+
# @yield [self]
|
98
|
+
# the block is called within the context of self.
|
99
|
+
# (Block is called by instance_eval(&block). )
|
100
|
+
# @yieldreturn [keys, booleans, indices]
|
101
|
+
# returns keys, booleans or indices just same as arguments.
|
102
|
+
# @return [DataFrame]
|
103
|
+
# picked DataFrame.
|
104
|
+
# @example Pick up by a block.
|
105
|
+
# # same as languages.pick { |df| df.languages.vectors.map(&:string?) }
|
106
|
+
# languages.pick { languages.vectors.map(&:string?) }
|
107
|
+
#
|
108
|
+
# # =>
|
109
|
+
# #<RedAmber::DataFrame : 4 x 2 Vectors, 0x0000000000154104>
|
110
|
+
# Language Creator
|
111
|
+
# <string> <string>
|
112
|
+
# 0 Ruby Yukihiro Matsumoto
|
113
|
+
# 1 Python Guido van Rossum
|
114
|
+
# 2 R Ross Ihaka and Robert Gentleman
|
115
|
+
# 3 Rust Graydon Hoare
|
116
|
+
#
|
7
117
|
def pick(*args, &block)
|
8
|
-
picker = args
|
9
118
|
if block
|
10
|
-
|
119
|
+
unless args.empty?
|
120
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and block.'
|
121
|
+
end
|
11
122
|
|
12
|
-
|
123
|
+
args = [instance_eval(&block)]
|
13
124
|
end
|
14
|
-
picker.flatten!
|
15
|
-
return DataFrame.new if picker.empty? || picker == [nil]
|
16
|
-
|
17
|
-
key_vector = Vector.new(keys)
|
18
|
-
vec = parse_to_vector(picker, vsize: n_keys)
|
19
|
-
|
20
|
-
ary =
|
21
|
-
if vec.boolean?
|
22
|
-
key_vector.filter(*vec).to_a
|
23
|
-
elsif vec.numeric?
|
24
|
-
key_vector.take(*vec).to_a
|
25
|
-
elsif vec.string? || vec.dictionary?
|
26
|
-
vec.to_a
|
27
|
-
else
|
28
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
29
|
-
end
|
30
125
|
|
31
|
-
|
32
|
-
|
33
|
-
|
126
|
+
case args
|
127
|
+
in [] | [nil]
|
128
|
+
return DataFrame.new
|
129
|
+
in [*] if args.symbol?
|
130
|
+
return DataFrame.create(@table.select_columns(*args))
|
131
|
+
in [*] if args.boolean?
|
132
|
+
picker = keys.select_by_booleans(args)
|
133
|
+
return DataFrame.create(@table.select_columns(*picker))
|
134
|
+
in [(Vector | Arrow::Array | Arrow::ChunkedArray) => a]
|
135
|
+
picker = a.to_a
|
136
|
+
else
|
137
|
+
picker = parse_args(args, n_keys)
|
138
|
+
end
|
139
|
+
|
140
|
+
return DataFrame.new if picker.compact.empty?
|
141
|
+
|
142
|
+
if picker.boolean?
|
143
|
+
picker = keys.select_by_booleans(picker)
|
144
|
+
return DataFrame.create(@table.select_columns(*picker))
|
145
|
+
end
|
146
|
+
picker.compact!
|
147
|
+
raise DataFrameArgumentError, "some keys are duplicated: #{args}" if picker.uniq!
|
148
|
+
|
149
|
+
DataFrame.create(@table.select_columns(*picker))
|
34
150
|
end
|
35
151
|
|
36
|
-
#
|
152
|
+
# Drop off some variables (columns) to create a remainer DataFrame.
|
153
|
+
#
|
154
|
+
# @note DataFrame#drop creates a DataFrame even if it is a single column
|
155
|
+
# (not a Vector).
|
156
|
+
#
|
157
|
+
# @overload drop(keys)
|
158
|
+
# Drop off variables by Symbol(s) or String(s).
|
159
|
+
#
|
160
|
+
# @param keys [Symbol, String, <Symbol, String>]
|
161
|
+
# key name(s) of variables to drop.
|
162
|
+
# @return [DataFrame]
|
163
|
+
# remainer DataFrame.
|
164
|
+
# @example Drop off by a key
|
165
|
+
# languages
|
166
|
+
#
|
167
|
+
# # =>
|
168
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x00000000000cfd8c>
|
169
|
+
# Language Creator Released
|
170
|
+
# <string> <string> <uint16>
|
171
|
+
# 0 Ruby Yukihiro Matsumoto 1995
|
172
|
+
# 1 Python Guido van Rossum 1991
|
173
|
+
# 2 R Ross Ihaka and Robert Gentleman 1993
|
174
|
+
# 3 Rust Graydon Hoare 2001
|
175
|
+
#
|
176
|
+
# languages.drop(:Language)
|
177
|
+
#
|
178
|
+
# # =>
|
179
|
+
# #<RedAmber::DataFrame : 4 x 2 Vectors, 0x000000000005805c>
|
180
|
+
# Creator Released
|
181
|
+
# <string> <uint16>
|
182
|
+
# 0 Yukihiro Matsumoto 1995
|
183
|
+
# 1 Guido van Rossum 1991
|
184
|
+
# 2 Ross Ihaka and Robert Gentleman 1993
|
185
|
+
# 3 Graydon Hoare 2001
|
186
|
+
#
|
187
|
+
# @overload drop(booleans)
|
188
|
+
# Drop off variables by booleans.
|
189
|
+
#
|
190
|
+
# @param booleans [<Booleans, nil>, Vector]
|
191
|
+
# boolean array or vector of variables to drop at true.
|
192
|
+
# @return [DataFrame]
|
193
|
+
# remainer DataFrame.
|
194
|
+
# @example Drop off by booleans
|
195
|
+
# is_numeric = languages.vectors.map(&:numeric?) # [nil, nil, true]
|
196
|
+
# languages.drop(is_numeric)
|
197
|
+
#
|
198
|
+
# # =>
|
199
|
+
# #<RedAmber::DataFrame : 4 x 2 Vectors, 0x0000000000066a1c>
|
200
|
+
# Language Creator
|
201
|
+
# <string> <string>
|
202
|
+
# 0 Ruby Yukihiro Matsumoto
|
203
|
+
# 1 Python Guido van Rossum
|
204
|
+
# 2 R Ross Ihaka and Robert Gentleman
|
205
|
+
# 3 Rust Graydon Hoare
|
206
|
+
#
|
207
|
+
# @overload drop(indices)
|
208
|
+
# Drop off variables by column indices.
|
209
|
+
#
|
210
|
+
# @param indices [Integer, Float, Range<Integer>, Vector, Arrow::Array]
|
211
|
+
# numeric array of variables to drop by column index.
|
212
|
+
# @return [DataFrame]
|
213
|
+
# remainer DataFrame.
|
214
|
+
# @example Drop off by indices
|
215
|
+
# languages.drop(2)
|
216
|
+
#
|
217
|
+
# # =>
|
218
|
+
# #<RedAmber::DataFrame : 4 x 2 Vectors, 0x0000000000066a1c>
|
219
|
+
# Language Creator
|
220
|
+
# <string> <string>
|
221
|
+
# 0 Ruby Yukihiro Matsumoto
|
222
|
+
# 1 Python Guido van Rossum
|
223
|
+
# 2 R Ross Ihaka and Robert Gentleman
|
224
|
+
# 3 Rust Graydon Hoare
|
225
|
+
#
|
226
|
+
# @overload drop
|
227
|
+
# Drop off variables by the yielded value from the block.
|
228
|
+
# @note Arguments and a block cannot be used simultaneously.
|
229
|
+
#
|
230
|
+
# @yield [self] the block is called within the context of self.
|
231
|
+
# (Block is called by instance_eval(&block). )
|
232
|
+
# @yieldreturn [keys, booleans, indices]
|
233
|
+
# returns keys, booleans or indices just same as arguments.
|
234
|
+
# @return [DataFrame]
|
235
|
+
# remainer DataFrame.
|
236
|
+
# @example Drop off by a block.
|
237
|
+
# # same as languages.drop { |df| df.vectors.map(&:numeric?) }
|
238
|
+
# languages.drop { vectors.map(&:numeric?) }
|
239
|
+
#
|
240
|
+
# # =>
|
241
|
+
# #<RedAmber::DataFrame : 4 x 2 Vectors, 0x0000000000154104>
|
242
|
+
# Language Creator
|
243
|
+
# <string> <string>
|
244
|
+
# 0 Ruby Yukihiro Matsumoto
|
245
|
+
# 1 Python Guido van Rossum
|
246
|
+
# 2 R Ross Ihaka and Robert Gentleman
|
247
|
+
# 3 Rust Graydon Hoare
|
248
|
+
#
|
37
249
|
def drop(*args, &block)
|
38
|
-
dropper = args
|
39
250
|
if block
|
40
|
-
|
251
|
+
unless args.empty?
|
252
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and block.'
|
253
|
+
end
|
41
254
|
|
42
|
-
|
255
|
+
args = [instance_eval(&block)]
|
43
256
|
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
keys - key_vector.take(*vec).each.map(&:to_sym) # Array
|
54
|
-
elsif vec.string? || vec.dictionary?
|
55
|
-
keys - vec.to_a.map { _1&.to_sym } # Array
|
257
|
+
return self if args.empty? || empty?
|
258
|
+
|
259
|
+
picker =
|
260
|
+
if args.symbol?
|
261
|
+
keys - args
|
262
|
+
elsif args.boolean?
|
263
|
+
keys.reject_by_booleans(args)
|
264
|
+
elsif args.integer?
|
265
|
+
keys.reject_by_indices(args)
|
56
266
|
else
|
57
|
-
|
267
|
+
dropper = parse_args(args, n_keys)
|
268
|
+
if dropper.boolean?
|
269
|
+
keys.reject_by_booleans(dropper)
|
270
|
+
elsif dropper.symbol?
|
271
|
+
keys - dropper
|
272
|
+
else
|
273
|
+
dropper.compact!
|
274
|
+
unless dropper.integer?
|
275
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
276
|
+
end
|
277
|
+
|
278
|
+
keys.reject_by_indices(dropper)
|
279
|
+
end
|
58
280
|
end
|
59
281
|
|
60
|
-
return DataFrame.new if
|
282
|
+
return DataFrame.new if picker.empty?
|
61
283
|
|
62
|
-
|
63
|
-
# DataFrame#drop creates a DataFrame with single key.
|
64
|
-
DataFrame.new(@table[ary])
|
284
|
+
DataFrame.create(@table.select_columns(*picker))
|
65
285
|
end
|
66
286
|
|
67
|
-
# rename
|
287
|
+
# rename keys (variable/column names) to create a updated DataFrame.
|
288
|
+
#
|
289
|
+
# @overload rename(key_pairs)
|
290
|
+
# Rename by key pairs as a Hash.
|
291
|
+
#
|
292
|
+
# @param key_pairs [Hash{existing_key => new_key}]
|
293
|
+
# key pair(s) of existing name and new name.
|
294
|
+
# @return [DataFrame]
|
295
|
+
# renamed DataFrame.
|
296
|
+
# @example Rename by a Hash
|
297
|
+
# comecome
|
298
|
+
#
|
299
|
+
# # =>
|
300
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000000037b4>
|
301
|
+
# name age
|
302
|
+
# <string> <uint8>
|
303
|
+
# 0 Yasuko 68
|
304
|
+
# 1 Rui 49
|
305
|
+
# 2 Hinata 28
|
306
|
+
#
|
307
|
+
# comecome.rename(:age => :age_in_1993)
|
308
|
+
#
|
309
|
+
# # =>
|
310
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000000037c8>
|
311
|
+
# name age_in_1993
|
312
|
+
# <string> <uint8>
|
313
|
+
# 0 Yasuko 68
|
314
|
+
# 1 Rui 49
|
315
|
+
# 2 Hinata 28
|
316
|
+
#
|
317
|
+
# @overload rename(key_pairs)
|
318
|
+
# Rename by key pairs as an Array of Array.
|
319
|
+
#
|
320
|
+
# @param key_pairs [<Array[existing_key, new_key]>]
|
321
|
+
# key pair(s) of existing name and new name.
|
322
|
+
# @return [DataFrame]
|
323
|
+
# renamed DataFrame.
|
324
|
+
# @example Rename by an Array
|
325
|
+
# renamer = [[:name, :heroine], [:age, :age_in_1993]]
|
326
|
+
# comecome.rename(renamer)
|
327
|
+
#
|
328
|
+
# # =>
|
329
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000000037dc>
|
330
|
+
# heroine age_in_1993
|
331
|
+
# <string> <uint8>
|
332
|
+
# 0 Yasuko 68
|
333
|
+
# 1 Rui 49
|
334
|
+
# 2 Hinata 28
|
335
|
+
#
|
336
|
+
# @overload rename
|
337
|
+
# Rename by key pairs yielding from block.
|
338
|
+
#
|
339
|
+
# @yield [self] the block is called within the context of self.
|
340
|
+
# (Block is called by instance_eval(&block). )
|
341
|
+
# @yieldreturn [<[existing_key, new_key]>, Hash]
|
342
|
+
# returns an Array or a Hash just same as arguments.
|
343
|
+
# @return [DataFrame]
|
344
|
+
# renamed DataFrame.
|
345
|
+
# @example Rename by block.
|
346
|
+
# df
|
347
|
+
#
|
348
|
+
# # =>
|
349
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000c29c>
|
350
|
+
# X Y Z
|
351
|
+
# <uint8> <uint8> <uint8>
|
352
|
+
# 0 1 3 5
|
353
|
+
# 1 2 4 6
|
354
|
+
#
|
355
|
+
# df.rename { keys.zip(keys.map(&:downcase)) }
|
356
|
+
# # or
|
357
|
+
# df.rename { [keys, keys.map(&:downcase)].transpose }
|
358
|
+
#
|
359
|
+
# # =>
|
360
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000c364>
|
361
|
+
# x y z
|
362
|
+
# <uint8> <uint8> <uint8>
|
363
|
+
# 0 1 3 5
|
364
|
+
# 1 2 4 6
|
365
|
+
#
|
68
366
|
def rename(*renamer, &block)
|
69
367
|
if block
|
70
|
-
|
368
|
+
unless renamer.empty?
|
369
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and a block'
|
370
|
+
end
|
71
371
|
|
72
372
|
renamer = [instance_eval(&block)]
|
73
373
|
end
|
@@ -88,37 +388,211 @@ module RedAmber
|
|
88
388
|
rename_by_hash(key_pairs)
|
89
389
|
end
|
90
390
|
|
91
|
-
#
|
391
|
+
# Assign new or updated variables (columns) and create an updated DataFrame.
|
392
|
+
# - Array-like variables with new keys will append new columns from right.
|
393
|
+
# - Array-like variables with exisiting keys will update corresponding vectors.
|
394
|
+
# - Symbol key and String key are considered as the same key.
|
395
|
+
# - If assigner is empty or nil, returns self.
|
396
|
+
#
|
397
|
+
# @overload assign(key_value_pairs)
|
398
|
+
# accepts pairs of key and values by an Array or a Hash.
|
399
|
+
#
|
400
|
+
# @param key_value_pairs [Array<key, array_like>, Hash{key => array_like}]
|
401
|
+
# `key` must be a Symbol or a String.
|
402
|
+
# `array_like` is column data to be assigned.
|
403
|
+
# It must be one of `Vector` or `Arrow::Array` or `Array`.
|
404
|
+
# @return [DataFrame]
|
405
|
+
# assigned DataFrame.
|
406
|
+
# @example Assign a new column
|
407
|
+
# comecome
|
408
|
+
#
|
409
|
+
# # =>
|
410
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000000280dc>
|
411
|
+
# name age
|
412
|
+
# <string> <uint8>
|
413
|
+
# 0 Yasuko 68
|
414
|
+
# 1 Rui 49
|
415
|
+
# 2 Hinata 28
|
416
|
+
#
|
417
|
+
# brothers = ['Santa', nil, 'Momotaro']
|
418
|
+
# comecome.assign(brother: brothers)
|
419
|
+
# # or
|
420
|
+
# comecome.assign({ brother: brothers })
|
421
|
+
# # or
|
422
|
+
# comecome.assign(:brother, brothers)
|
423
|
+
# # or
|
424
|
+
# comecome.assign([:brother, brothers])
|
425
|
+
#
|
426
|
+
# # =>
|
427
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000004077c>
|
428
|
+
# name age brother
|
429
|
+
# <string> <uint8> <string>
|
430
|
+
# 0 Yasuko 68 Santa
|
431
|
+
# 1 Rui 49 (nil)
|
432
|
+
# 2 Hinata 28 Momotaro
|
433
|
+
#
|
434
|
+
# @example Assign new data for a existing column
|
435
|
+
# comecome.assign(age: comecome[:age] + 29)
|
436
|
+
#
|
437
|
+
# # =>
|
438
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000065860>
|
439
|
+
# name age
|
440
|
+
# <string> <uint8>
|
441
|
+
# 0 Yasuko 97
|
442
|
+
# 1 Rui 78
|
443
|
+
# 2 Hinata 57
|
444
|
+
#
|
445
|
+
# @overload assign
|
446
|
+
# accepts block yielding pairs of key and values.
|
447
|
+
#
|
448
|
+
# @yield [self]
|
449
|
+
# the block is called within the context of self.
|
450
|
+
# (Block is called by instance_eval(&block). )
|
451
|
+
# @yieldreturn [Array<key, array_like>, Hash(key => array_like)]
|
452
|
+
# `key` must be a Symbol or a String.
|
453
|
+
# `array_like` is column data to be assigned.
|
454
|
+
# It must be one of `Vector` or `Arrow::Array` or `Array`.
|
455
|
+
# @return [DataFrame]
|
456
|
+
# assigned DataFrame.
|
457
|
+
# @example Assign new data for a existing column by block
|
458
|
+
# comecome.assign { { age: age + 29 } }
|
459
|
+
# # or
|
460
|
+
# comecome.assign { [:age, age + 29] }
|
461
|
+
# # or
|
462
|
+
# comecome.assign { [[:age, age + 29]] }
|
463
|
+
#
|
464
|
+
# # =>
|
465
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000007d640>
|
466
|
+
# name age
|
467
|
+
# <string> <uint8>
|
468
|
+
# 0 Yasuko 97
|
469
|
+
# 1 Rui 78
|
470
|
+
# 2 Hinata 57
|
471
|
+
#
|
472
|
+
# @overload assign(keys)
|
473
|
+
# accepts keys from argument and pairs of key and values from block.
|
474
|
+
#
|
475
|
+
# @param keys [Symbol, String] keys of columns to create or update.
|
476
|
+
# @yield [self]
|
477
|
+
# the block is called within the context of self.
|
478
|
+
# (Block is called by instance_eval(&block).)
|
479
|
+
# @yieldreturn [Array<array_like>]
|
480
|
+
# column data to be assigned.
|
481
|
+
# `array_like` must be one of `Vector` or `Arrow::Array` or `Array`.
|
482
|
+
# @return [DataFrame]
|
483
|
+
# assigned DataFrame.
|
484
|
+
# @example Assign new data for a existing column by block
|
485
|
+
# comecome.assign(:age) { age + 29 }
|
486
|
+
#
|
487
|
+
# # =>
|
488
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000007af94>
|
489
|
+
# name age
|
490
|
+
# <string> <uint8>
|
491
|
+
# 0 Yasuko 97
|
492
|
+
# 1 Rui 78
|
493
|
+
# 2 Hinata 57
|
494
|
+
#
|
495
|
+
# @example Assign multiple data
|
496
|
+
# comecome.assign(:age_in_1993, :brother) do
|
497
|
+
# [
|
498
|
+
# age + 29,
|
499
|
+
# ['Santa', nil, 'Momotaro'],
|
500
|
+
# ]
|
501
|
+
# end
|
502
|
+
#
|
503
|
+
# # =>
|
504
|
+
# #<RedAmber::DataFrame : 3 x 4 Vectors, 0x00000000000b363c>
|
505
|
+
# name age age_in_1993 brother
|
506
|
+
# <string> <uint8> <uint8> <string>
|
507
|
+
# 0 Yasuko 68 97 Santa
|
508
|
+
# 1 Rui 49 78 (nil)
|
509
|
+
# 2 Hinata 28 57 Momotaro
|
510
|
+
#
|
92
511
|
def assign(*assigner, &block)
|
93
|
-
|
94
|
-
return self if appender.is_a?(DataFrame)
|
95
|
-
|
96
|
-
append_to_fields_and_arrays(appender, fields, arrays, append_to_left: false) unless appender.empty?
|
97
|
-
|
98
|
-
DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
|
512
|
+
assign_update(*assigner, append_to_left: false, &block)
|
99
513
|
end
|
100
514
|
|
515
|
+
# Assign new or updated variables (columns) and create an updated DataFrame.
|
516
|
+
# - Array-like variables with new keys will append new columns from left.
|
517
|
+
# - Array-like variables with exisiting keys will update corresponding vectors.
|
518
|
+
# - Symbol key and String key are considered as the same key.
|
519
|
+
# - If assigner is empty or nil, returns self.
|
520
|
+
#
|
521
|
+
# @overload assign_left(key_value_pairs)
|
522
|
+
# accepts pairs of key and values by an Array or a Hash.
|
523
|
+
#
|
524
|
+
# @param key_value_pairs [Array<key, array_like>, Hash{key => array_like}]
|
525
|
+
# `key` must be a Symbol or a String.
|
526
|
+
# `array_like` is column data to be assigned.
|
527
|
+
# It must be one of `Vector` or `Arrow::Array` or `Array`.
|
528
|
+
# @return [DataFrame]
|
529
|
+
# assigned DataFrame.
|
530
|
+
# @example Assign a new column from left
|
531
|
+
# df
|
532
|
+
#
|
533
|
+
# # =>
|
534
|
+
# #<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000000c10c>
|
535
|
+
# index float string
|
536
|
+
# <uint8> <double> <string>
|
537
|
+
# 0 0 0.0 A
|
538
|
+
# 1 1 1.1 B
|
539
|
+
# 2 2 2.2 C
|
540
|
+
# 3 3 NaN D
|
541
|
+
# 4 (nil) (nil) (nil)
|
542
|
+
#
|
543
|
+
# df.assign_left(new_index: df.indices(1))
|
544
|
+
#
|
545
|
+
# # =>
|
546
|
+
# #<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000001787c>
|
547
|
+
# new_index index float string
|
548
|
+
# <uint8> <uint8> <double> <string>
|
549
|
+
# 0 1 0 0.0 A
|
550
|
+
# 1 2 1 1.1 B
|
551
|
+
# 2 3 2 2.2 C
|
552
|
+
# 3 4 3 NaN D
|
553
|
+
# 4 5 (nil) (nil) (nil)
|
554
|
+
#
|
555
|
+
# @overload assign_left
|
556
|
+
# accepts block yielding pairs of key and values.
|
557
|
+
#
|
558
|
+
# @yield [self]
|
559
|
+
# the block is called within the context of self.
|
560
|
+
# (Block is called by instance_eval(&block). )
|
561
|
+
# @yieldreturn [Array<key, array_like>, Hash(key => array_like)]
|
562
|
+
# `key` must be a Symbol or a String.
|
563
|
+
# `array_like` is column data to be assigned.
|
564
|
+
# It must be one of `Vector` or `Arrow::Array` or `Array`.
|
565
|
+
# @return [DataFrame]
|
566
|
+
# assigned DataFrame.
|
567
|
+
#
|
568
|
+
# @overload assign_left(keys)
|
569
|
+
# accepts keys from argument and pairs of key and values from block.
|
570
|
+
#
|
571
|
+
# @param keys [Symbol, String]
|
572
|
+
# keys of columns to create or update.
|
573
|
+
# @yield [self]
|
574
|
+
# the block is called within the context of self.
|
575
|
+
# (Block is called by instance_eval(&block).)
|
576
|
+
# @yieldreturn [Array<array_like>]
|
577
|
+
# column data to be assigned.
|
578
|
+
# `array_like` must be one of `Vector` or `Arrow::Array` or `Array`.
|
579
|
+
# @return [DataFrame]
|
580
|
+
# assigned DataFrame.
|
581
|
+
#
|
101
582
|
def assign_left(*assigner, &block)
|
102
|
-
|
103
|
-
return self if appender.is_a?(DataFrame)
|
104
|
-
|
105
|
-
append_to_fields_and_arrays(appender, fields, arrays, append_to_left: true) unless appender.empty?
|
106
|
-
|
107
|
-
DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
|
583
|
+
assign_update(*assigner, append_to_left: true, &block)
|
108
584
|
end
|
109
585
|
|
110
586
|
private
|
111
587
|
|
112
|
-
def assign_update(*assigner, &block)
|
588
|
+
def assign_update(*assigner, append_to_left: false, &block)
|
113
589
|
if block
|
114
590
|
assigner_from_block = instance_eval(&block)
|
115
591
|
assigner =
|
116
|
-
|
117
|
-
|
592
|
+
case assigner_from_block
|
593
|
+
in _ if assigner.empty? # block only
|
118
594
|
[assigner_from_block]
|
119
|
-
|
120
|
-
# assigner_from_block in [Array, *]
|
121
|
-
elsif multiple_assigner?(assigner_from_block)
|
595
|
+
in [Vector, *] | [Array, *] | [Arrow::Array, *]
|
122
596
|
assigner.zip(assigner_from_block)
|
123
597
|
else
|
124
598
|
assigner.zip([assigner_from_block])
|
@@ -128,10 +602,10 @@ module RedAmber
|
|
128
602
|
case assigner
|
129
603
|
in [] | [nil] | [{}] | [[]]
|
130
604
|
return self
|
131
|
-
in [Hash => key_array_pairs]
|
132
|
-
# noop
|
133
605
|
in [(Symbol | String) => key, (Vector | Array | Arrow::Array) => array]
|
134
606
|
key_array_pairs = { key => array }
|
607
|
+
in [Hash => key_array_pairs]
|
608
|
+
# noop
|
135
609
|
in [Array => array_in_array]
|
136
610
|
key_array_pairs = try_convert_to_hash(array_in_array)
|
137
611
|
in [Array, *] => array_in_array1
|
@@ -151,20 +625,27 @@ module RedAmber
|
|
151
625
|
appender[key] = array
|
152
626
|
end
|
153
627
|
end
|
154
|
-
|
628
|
+
fields, arrays = *update_fields_and_arrays(updater)
|
629
|
+
return self if appender.is_a?(DataFrame)
|
630
|
+
|
631
|
+
unless appender.empty?
|
632
|
+
append_to_fields_and_arrays(appender, fields, arrays, append_to_left)
|
633
|
+
end
|
634
|
+
|
635
|
+
DataFrame.create(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
|
155
636
|
end
|
156
637
|
|
157
638
|
def try_convert_to_hash(array)
|
158
639
|
array.to_h
|
159
640
|
rescue TypeError
|
160
641
|
[array].to_h
|
161
|
-
rescue TypeError # rubocop:disable Lint/DuplicateRescueException
|
162
|
-
raise DataFrameArgumentError, "Invalid argument in Array #{array}"
|
163
642
|
end
|
164
643
|
|
165
644
|
def rename_by_hash(key_pairs)
|
166
645
|
not_existing_keys = key_pairs.keys - keys
|
167
|
-
|
646
|
+
unless not_existing_keys.empty?
|
647
|
+
raise DataFrameArgumentError, "Not existing: #{not_existing_keys}"
|
648
|
+
end
|
168
649
|
|
169
650
|
fields =
|
170
651
|
keys.map do |key|
|
@@ -175,7 +656,7 @@ module RedAmber
|
|
175
656
|
@table.schema[key]
|
176
657
|
end
|
177
658
|
end
|
178
|
-
DataFrame.
|
659
|
+
DataFrame.create(Arrow::Table.new(Arrow::Schema.new(fields), @table.columns))
|
179
660
|
end
|
180
661
|
|
181
662
|
def update_fields_and_arrays(updater)
|
@@ -185,7 +666,9 @@ module RedAmber
|
|
185
666
|
data = updater[key]
|
186
667
|
next unless data
|
187
668
|
|
188
|
-
|
669
|
+
if data.size != size
|
670
|
+
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})"
|
671
|
+
end
|
189
672
|
|
190
673
|
a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
|
191
674
|
fields[i] = Arrow::Field.new(key, a.value_data_type)
|
@@ -194,10 +677,12 @@ module RedAmber
|
|
194
677
|
[fields, arrays]
|
195
678
|
end
|
196
679
|
|
197
|
-
def append_to_fields_and_arrays(appender, fields, arrays, append_to_left
|
680
|
+
def append_to_fields_and_arrays(appender, fields, arrays, append_to_left)
|
198
681
|
enum = append_to_left ? appender.reverse_each : appender.each
|
199
682
|
enum.each do |key, data|
|
200
|
-
|
683
|
+
if data.size != size
|
684
|
+
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})"
|
685
|
+
end
|
201
686
|
|
202
687
|
a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
|
203
688
|
|
@@ -210,14 +695,5 @@ module RedAmber
|
|
210
695
|
end
|
211
696
|
end
|
212
697
|
end
|
213
|
-
|
214
|
-
def multiple_assigner?(assigner)
|
215
|
-
case assigner
|
216
|
-
in [Vector, *] | [Array, *] | [Arrow::Array, *]
|
217
|
-
true
|
218
|
-
else
|
219
|
-
false
|
220
|
-
end
|
221
|
-
end
|
222
698
|
end
|
223
699
|
end
|