red_amber 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +39 -20
- data/.yardopts +2 -0
- data/CHANGELOG.md +113 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +25 -26
- data/benchmark/basic.yml +2 -2
- data/benchmark/combine.yml +2 -2
- data/benchmark/dataframe.yml +2 -2
- data/benchmark/group.yml +2 -2
- data/benchmark/reshape.yml +2 -2
- data/benchmark/vector.yml +3 -0
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +429 -75
- data/lib/red_amber/data_frame_combinable.rb +516 -66
- data/lib/red_amber/data_frame_displayable.rb +244 -14
- data/lib/red_amber/data_frame_indexable.rb +121 -18
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +622 -66
- data/lib/red_amber/data_frame_variable_operation.rb +446 -34
- data/lib/red_amber/group.rb +187 -22
- data/lib/red_amber/helper.rb +70 -10
- data/lib/red_amber/refinements.rb +12 -5
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +385 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +217 -12
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
data/lib/red_amber/data_frame.rb
CHANGED
@@ -4,7 +4,7 @@ module RedAmber
|
|
4
4
|
# Class to represent a data frame.
|
5
5
|
# Variable @table holds an Arrow::Table object.
|
6
6
|
class DataFrame
|
7
|
-
#
|
7
|
+
# Mix-in
|
8
8
|
include DataFrameCombinable
|
9
9
|
include DataFrameDisplayable
|
10
10
|
include DataFrameIndexable
|
@@ -17,26 +17,62 @@ module RedAmber
|
|
17
17
|
using RefineArrowTable
|
18
18
|
using RefineHash
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
20
|
+
class << self
|
21
|
+
# Quicker DataFrame constructor from a `Arrow::Table`.
|
22
|
+
#
|
23
|
+
# @param table [Arrow::Table]
|
24
|
+
# A table to have in the DataFrame.
|
25
|
+
# @return [DataFrame]
|
26
|
+
# Initialized DataFrame.
|
27
|
+
#
|
28
|
+
# @note This method will allocate table directly and may be used in the method.
|
29
|
+
# @note `table` must have unique keys.
|
30
|
+
#
|
31
|
+
def create(table)
|
32
|
+
instance = allocate
|
33
|
+
instance.instance_variable_set(:@table, table)
|
34
|
+
instance
|
35
|
+
end
|
31
36
|
end
|
32
37
|
|
33
38
|
# Creates a new DataFrame.
|
34
39
|
#
|
40
|
+
# @overload initialize(hash)
|
41
|
+
# Initialize a DataFrame by a Hash.
|
42
|
+
#
|
43
|
+
# @param hash [Hash<key => <Array, Arrow::Array, #to_arrow_array>>]
|
44
|
+
# a Hash of `key` with array-like for column values.
|
45
|
+
# `key`s are Symbol or String.
|
46
|
+
# @example Initialize by a Hash
|
47
|
+
# hash = { x: [1, 2, 3], y: %w[A B C] }
|
48
|
+
# DataFrame.new(hash)
|
49
|
+
# @example Initialize by a Hash like arguments.
|
50
|
+
# DataFrame.new(x: [1, 2, 3], y: %w[A B C])
|
51
|
+
# @example Initialize from #to_arrow_array responsibles.
|
52
|
+
# # #to_arrow_array responsible `array-like` is also available.
|
53
|
+
# require 'arrow-numo-narray'
|
54
|
+
# DataFrame.new(numo: Numo::DFloat.new(3).rand)
|
55
|
+
#
|
35
56
|
# @overload initialize(table)
|
36
|
-
# Initialize DataFrame by an `Arrow::Table
|
57
|
+
# Initialize a DataFrame by an `Arrow::Table`.
|
37
58
|
#
|
38
59
|
# @param table [Arrow::Table]
|
39
|
-
#
|
60
|
+
# a table to have in the DataFrame.
|
61
|
+
# @example Initialize by a Table
|
62
|
+
# table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
|
63
|
+
# DataFrame.new(table)
|
64
|
+
#
|
65
|
+
# @overload initialize(schama, row_oriented_array)
|
66
|
+
# Initialize a DataFrame by schema and row_oriented_array.
|
67
|
+
#
|
68
|
+
# @param schema [Hash<key => type>]
|
69
|
+
# a schema of key and data type.
|
70
|
+
# @param row_oriented_array [Array]
|
71
|
+
# an Array of rows.
|
72
|
+
# @example Initialize by a schema and a row_oriented_array.
|
73
|
+
# schema = { x: :uint8, y: :string }
|
74
|
+
# row_oriented_array = [[1, 'A'], [2, 'B'], [3, 'C']]
|
75
|
+
# DataFrame.new(schema, row_oriented_array)
|
40
76
|
#
|
41
77
|
# @overload initialize(arrowable)
|
42
78
|
# Initialize DataFrame by a `#to_arrow` responsible object.
|
@@ -47,6 +83,11 @@ module RedAmber
|
|
47
83
|
#
|
48
84
|
# @note `RedAmber::DataFrame` itself is readable by this.
|
49
85
|
# @note Hash is refined to respond to `#to_arrow` in this class.
|
86
|
+
# @example Initialize by Red Dataset object.
|
87
|
+
# require 'datasets-arrow'
|
88
|
+
# dataset = Datasets::Penguins.new
|
89
|
+
# penguins = DataFrame.new(dataset)
|
90
|
+
# @since 0.2.2
|
50
91
|
#
|
51
92
|
# @overload initialize(rover_like)
|
52
93
|
# Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
|
@@ -60,20 +101,18 @@ module RedAmber
|
|
60
101
|
# @overload initialize()
|
61
102
|
# Create empty DataFrame
|
62
103
|
#
|
63
|
-
# @example
|
104
|
+
# @example
|
105
|
+
# DataFrame.new
|
64
106
|
#
|
65
107
|
# @overload initialize(empty)
|
66
108
|
# Create empty DataFrame
|
67
109
|
#
|
68
110
|
# @param empty [nil, [], {}]
|
69
111
|
#
|
70
|
-
# @example
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
74
|
-
# @param args [values]
|
75
|
-
# Accepts any argments which is valid for `Arrow::Table.new(args)`. See
|
76
|
-
# {https://github.com/apache/arrow/blob/master/ruby/red-arrow/lib/arrow/table.rb
|
112
|
+
# @example Return empty DataFrame.
|
113
|
+
# DataFrame.new([])
|
114
|
+
# DataFrame.new({})
|
115
|
+
# DataFrame.new(nil)
|
77
116
|
#
|
78
117
|
def initialize(*args)
|
79
118
|
case args
|
@@ -109,15 +148,16 @@ module RedAmber
|
|
109
148
|
|
110
149
|
# Returns the table having within.
|
111
150
|
#
|
112
|
-
# @return [Arrow::Table]
|
151
|
+
# @return [Arrow::Table]
|
152
|
+
# the table within.
|
113
153
|
#
|
114
154
|
attr_reader :table
|
115
|
-
|
116
155
|
alias_method :to_arrow, :table
|
117
156
|
|
118
|
-
# Returns the number of rows.
|
157
|
+
# Returns the number of records (rows).
|
119
158
|
#
|
120
|
-
# @return [Integer]
|
159
|
+
# @return [Integer]
|
160
|
+
# number of records (rows).
|
121
161
|
#
|
122
162
|
def size
|
123
163
|
@table.n_rows
|
@@ -126,9 +166,10 @@ module RedAmber
|
|
126
166
|
alias_method :n_obs, :size
|
127
167
|
alias_method :n_rows, :size
|
128
168
|
|
129
|
-
# Returns the number of columns.
|
169
|
+
# Returns the number of variables (columns).
|
130
170
|
#
|
131
|
-
# @return [Integer]
|
171
|
+
# @return [Integer]
|
172
|
+
# number of variables (columns).
|
132
173
|
#
|
133
174
|
def n_keys
|
134
175
|
@table.n_columns
|
@@ -140,7 +181,7 @@ module RedAmber
|
|
140
181
|
# Returns the numbers of rows and columns.
|
141
182
|
#
|
142
183
|
# @return [Array]
|
143
|
-
#
|
184
|
+
# number of rows and number of columns in an array.
|
144
185
|
# Same as [size, n_keys].
|
145
186
|
#
|
146
187
|
def shape
|
@@ -160,7 +201,7 @@ module RedAmber
|
|
160
201
|
# Returns an Array of keys.
|
161
202
|
#
|
162
203
|
# @return [Array]
|
163
|
-
#
|
204
|
+
# keys in an Array.
|
164
205
|
#
|
165
206
|
def keys
|
166
207
|
@keys || @keys = init_instance_vars(:keys)
|
@@ -170,9 +211,10 @@ module RedAmber
|
|
170
211
|
|
171
212
|
# Returns true if self has a specified key in the argument.
|
172
213
|
#
|
173
|
-
# @param key [Symbol, String]
|
214
|
+
# @param key [Symbol, String]
|
215
|
+
# key to test.
|
174
216
|
# @return [Boolean]
|
175
|
-
#
|
217
|
+
# returns true if self has key in Symbol.
|
176
218
|
#
|
177
219
|
def key?(key)
|
178
220
|
keys.include?(key.to_sym)
|
@@ -181,9 +223,10 @@ module RedAmber
|
|
181
223
|
|
182
224
|
# Returns index of specified key in the Array keys.
|
183
225
|
#
|
184
|
-
# @param key [Symbol, String]
|
226
|
+
# @param key [Symbol, String]
|
227
|
+
# key to know.
|
185
228
|
# @return [Integer]
|
186
|
-
#
|
229
|
+
# index of key in the Array keys.
|
187
230
|
#
|
188
231
|
def key_index(key)
|
189
232
|
keys.find_index(key.to_sym)
|
@@ -194,7 +237,7 @@ module RedAmber
|
|
194
237
|
# Returns abbreviated type names in an Array.
|
195
238
|
#
|
196
239
|
# @return [Array]
|
197
|
-
#
|
240
|
+
# abbreviated Red Arrow data type names.
|
198
241
|
#
|
199
242
|
def types
|
200
243
|
@types || @types = @table.columns.map do |column|
|
@@ -205,7 +248,7 @@ module RedAmber
|
|
205
248
|
# Returns an Array of Classes of data type.
|
206
249
|
#
|
207
250
|
# @return [Array]
|
208
|
-
#
|
251
|
+
# an Array of Red Arrow data type Classes.
|
209
252
|
#
|
210
253
|
def type_classes
|
211
254
|
@data_types || @data_types = @table.columns.map { |column| column.data_type.class }
|
@@ -214,34 +257,16 @@ module RedAmber
|
|
214
257
|
# Returns Vectors in an Array.
|
215
258
|
#
|
216
259
|
# @return [Array]
|
217
|
-
#
|
260
|
+
# an Array of Vector.
|
218
261
|
#
|
219
262
|
def vectors
|
220
263
|
@vectors || @vectors = init_instance_vars(:vectors)
|
221
264
|
end
|
222
265
|
|
223
|
-
# Returns row indices (start...(size+start)) in a Vector.
|
224
|
-
#
|
225
|
-
# @param start [Object]
|
226
|
-
# Object which have `#succ` method.
|
227
|
-
#
|
228
|
-
# @return [Array]
|
229
|
-
# A Vector of row indices.
|
230
|
-
#
|
231
|
-
# @example
|
232
|
-
# (when self.size == 5)
|
233
|
-
# - indices #=> Vector[0, 1, 2, 3, 4]
|
234
|
-
# - indices(1) #=> Vector[1, 2, 3, 4, 5]
|
235
|
-
# - indices('a') #=> Vector['a', 'b', 'c', 'd', 'e']
|
236
|
-
#
|
237
|
-
def indices(start = 0)
|
238
|
-
Vector.new((start..).take(size))
|
239
|
-
end
|
240
|
-
alias_method :indexes, :indices
|
241
|
-
|
242
266
|
# Returns column-oriented data in a Hash.
|
243
267
|
#
|
244
|
-
# @return [Hash]
|
268
|
+
# @return [Hash]
|
269
|
+
# a Hash of 'key => column_in_an_array'.
|
245
270
|
#
|
246
271
|
def to_h
|
247
272
|
variables.transform_values(&:to_a)
|
@@ -249,7 +274,8 @@ module RedAmber
|
|
249
274
|
|
250
275
|
# Returns a row-oriented array without header.
|
251
276
|
#
|
252
|
-
# @return [Array]
|
277
|
+
# @return [Array]
|
278
|
+
# row-oriented data without header.
|
253
279
|
#
|
254
280
|
# @note If you need column-oriented array, use `.to_h.to_a`.
|
255
281
|
#
|
@@ -260,7 +286,8 @@ module RedAmber
|
|
260
286
|
|
261
287
|
# Returns column name and data type in a Hash.
|
262
288
|
#
|
263
|
-
# @return [Hash]
|
289
|
+
# @return [Hash]
|
290
|
+
# column name and data type.
|
264
291
|
#
|
265
292
|
# @example
|
266
293
|
# RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
|
@@ -273,7 +300,7 @@ module RedAmber
|
|
273
300
|
# Compare DataFrames.
|
274
301
|
#
|
275
302
|
# @return [true, false]
|
276
|
-
#
|
303
|
+
# true if other is a DataFrame and table is same.
|
277
304
|
# Otherwise return false.
|
278
305
|
#
|
279
306
|
def ==(other)
|
@@ -282,7 +309,8 @@ module RedAmber
|
|
282
309
|
|
283
310
|
# Check if it is a empty DataFrame.
|
284
311
|
#
|
285
|
-
# @return [true, false
|
312
|
+
# @return [true, false
|
313
|
+
# ] true if it has no columns.
|
286
314
|
#
|
287
315
|
def empty?
|
288
316
|
variables.empty?
|
@@ -293,14 +321,18 @@ module RedAmber
|
|
293
321
|
# @overload each_row
|
294
322
|
# Returns Enumerator when no block given.
|
295
323
|
#
|
296
|
-
# @return [Enumerator]
|
324
|
+
# @return [Enumerator]
|
325
|
+
# enumerator of each rows.
|
297
326
|
#
|
298
327
|
# @overload each_row(&block)
|
299
328
|
# Yields with key and row pairs.
|
300
329
|
#
|
301
|
-
# @
|
302
|
-
#
|
303
|
-
# @yieldreturn [Integer]
|
330
|
+
# @yieldparam key_row_pairs [Hash]
|
331
|
+
# key and row pairs.
|
332
|
+
# @yieldreturn [Integer]
|
333
|
+
# size of the DataFrame.
|
334
|
+
# @return [Integer]
|
335
|
+
# returns size.
|
304
336
|
#
|
305
337
|
def each_row
|
306
338
|
return enum_for(:each_row) unless block_given?
|
@@ -316,25 +348,346 @@ module RedAmber
|
|
316
348
|
|
317
349
|
# Returns self in a `Rover::DataFrame`.
|
318
350
|
#
|
319
|
-
# @return [Rover::DataFrame]
|
351
|
+
# @return [Rover::DataFrame]
|
352
|
+
# a `Rover::DataFrame`.
|
320
353
|
#
|
321
354
|
def to_rover
|
322
355
|
require 'rover'
|
323
356
|
Rover::DataFrame.new(to_h)
|
324
357
|
end
|
325
358
|
|
359
|
+
# Create a Group object. Or create a Group and summarize it.
|
360
|
+
#
|
361
|
+
# @overload group(*group_keys)
|
362
|
+
# Create a Group object.
|
363
|
+
#
|
364
|
+
# @param group_keys [Array<Symbol, String>]
|
365
|
+
# keys for grouping.
|
366
|
+
# @return [Group]
|
367
|
+
# Group object.
|
368
|
+
# @example Create a Group
|
369
|
+
# penguins.group(:species)
|
370
|
+
#
|
371
|
+
# # =>
|
372
|
+
# #<RedAmber::Group : 0x000000000000c3c8>
|
373
|
+
# species group_count
|
374
|
+
# <string> <uint8>
|
375
|
+
# 0 Adelie 152
|
376
|
+
# 1 Chinstrap 68
|
377
|
+
# 2 Gentoo 124
|
378
|
+
#
|
379
|
+
# @overload group(*group_keys)
|
380
|
+
# Create a Group and summarize it by aggregation functions from the block.
|
381
|
+
#
|
382
|
+
# @yieldparam group [Group]
|
383
|
+
# passes Group object.
|
384
|
+
# @yieldreturn [DataFrame, Array<DataFrame>]
|
385
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
386
|
+
# @return [DataFrame]
|
387
|
+
# summarized DataFrame.
|
388
|
+
# @example Create a group and summarize it.
|
389
|
+
# penguins.group(:species) { mean(:bill_length_mm) }
|
390
|
+
#
|
391
|
+
# # =>
|
392
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f3fc>
|
393
|
+
# species mean(bill_length_mm)
|
394
|
+
# <string> <double>
|
395
|
+
# 0 Adelie 38.79
|
396
|
+
# 1 Chinstrap 48.83
|
397
|
+
# 2 Gentoo 47.5
|
398
|
+
#
|
326
399
|
def group(*group_keys, &block)
|
327
400
|
g = Group.new(self, group_keys)
|
328
401
|
g = g.summarize(&block) if block
|
329
402
|
g
|
330
403
|
end
|
331
404
|
|
405
|
+
# Create SubFrames by value grouping.
|
406
|
+
#
|
407
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
408
|
+
# @param keys [Symbol, String, Array<Symbol, String>]
|
409
|
+
# grouping keys.
|
410
|
+
# @return [SubFrames]
|
411
|
+
# a created SubFrames grouped by column values on `keys`.
|
412
|
+
# @example
|
413
|
+
# df.sub_by_value(keys: :y)
|
414
|
+
#
|
415
|
+
# # =>
|
416
|
+
# #<RedAmber::SubFrames : 0x000000000000fc08>
|
417
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
418
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
419
|
+
# ---
|
420
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>
|
421
|
+
# x y z
|
422
|
+
# <uint8> <string> <boolean>
|
423
|
+
# 0 1 A false
|
424
|
+
# 1 2 A true
|
425
|
+
# ---
|
426
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fc30>
|
427
|
+
# x y z
|
428
|
+
# <uint8> <string> <boolean>
|
429
|
+
# 0 3 B false
|
430
|
+
# 1 4 B (nil)
|
431
|
+
# 2 5 B true
|
432
|
+
# ---
|
433
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>
|
434
|
+
# x y z
|
435
|
+
# <uint8> <string> <boolean>
|
436
|
+
# 0 6 C false
|
437
|
+
#
|
438
|
+
# @since 0.4.0
|
439
|
+
#
|
440
|
+
def sub_by_value(keys: nil)
|
441
|
+
SubFrames.new(self, group(keys).filters)
|
442
|
+
end
|
443
|
+
alias_method :subframes_by_value, :sub_by_value
|
444
|
+
|
445
|
+
# Create SubFrames by Windowing with `from`, `size` and `step`.
|
446
|
+
#
|
447
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
448
|
+
# @param from [Integer]
|
449
|
+
# start position of window.
|
450
|
+
# @param size [Integer]
|
451
|
+
# window size.
|
452
|
+
# @param step [Integer]
|
453
|
+
# moving step of window.
|
454
|
+
# @return [SubFrames]
|
455
|
+
# a created SubFrames.
|
456
|
+
# @example
|
457
|
+
# df.sub_by_window(size: 4, step: 2)
|
458
|
+
#
|
459
|
+
# # =>
|
460
|
+
# #<RedAmber::SubFrames : 0x000000000000fc58>
|
461
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
462
|
+
# 2 SubFrames: [4, 4] in sizes.
|
463
|
+
# ---
|
464
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc6c>
|
465
|
+
# x y z
|
466
|
+
# <uint8> <string> <boolean>
|
467
|
+
# 0 1 A false
|
468
|
+
# 1 2 A true
|
469
|
+
# 2 3 B false
|
470
|
+
# 3 4 B (nil)
|
471
|
+
# ---
|
472
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc80>
|
473
|
+
# x y z
|
474
|
+
# <uint8> <string> <boolean>
|
475
|
+
# 0 3 B false
|
476
|
+
# 1 4 B (nil)
|
477
|
+
# 2 5 B true
|
478
|
+
# 3 6 C false
|
479
|
+
#
|
480
|
+
# @since 0.4.0
|
481
|
+
#
|
482
|
+
def sub_by_window(from: 0, size: nil, step: 1)
|
483
|
+
SubFrames.new(self) do
|
484
|
+
from.step(by: step, to: (size() - size)).map do |i| # rubocop:disable Style/MethodCallWithoutArgsParentheses
|
485
|
+
[*i...(i + size)]
|
486
|
+
end
|
487
|
+
end
|
488
|
+
end
|
489
|
+
alias_method :subframes_by_window, :sub_by_window
|
490
|
+
|
491
|
+
# Create SubFrames by Grouping/Windowing by posion from a enumrator method.
|
492
|
+
#
|
493
|
+
# This method will process the indices of self by enumerator.
|
494
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
495
|
+
# @param enumerator_method [Symbol]
|
496
|
+
# Enumerator name.
|
497
|
+
# @param args [<Object>]
|
498
|
+
# arguments for the enumerator method.
|
499
|
+
# @return [SubFrames]
|
500
|
+
# a created SubFrames.
|
501
|
+
# @example Create a SubFrames object sliced by 3 rows.
|
502
|
+
# df.sub_by_enum(:each_slice, 3)
|
503
|
+
#
|
504
|
+
# # =>
|
505
|
+
# #<RedAmber::SubFrames : 0x000000000000fd20>
|
506
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
507
|
+
# 2 SubFrames: [3, 3] in sizes.
|
508
|
+
# ---
|
509
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd34>
|
510
|
+
# x y z
|
511
|
+
# <uint8> <string> <boolean>
|
512
|
+
# 0 1 A false
|
513
|
+
# 1 2 A true
|
514
|
+
# 2 3 B false
|
515
|
+
# ---
|
516
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd48>
|
517
|
+
# x y z
|
518
|
+
# <uint8> <string> <boolean>
|
519
|
+
# 0 4 B (nil)
|
520
|
+
# 1 5 B true
|
521
|
+
# 2 6 C false
|
522
|
+
#
|
523
|
+
# @example Create a SubFrames object for each consecutive 3 rows.
|
524
|
+
# df.sub_by_enum(:each_cons, 4)
|
525
|
+
#
|
526
|
+
# # =>
|
527
|
+
# #<RedAmber::SubFrames : 0x000000000000fd98>
|
528
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
529
|
+
# 3 SubFrames: [4, 4, 4] in sizes.
|
530
|
+
# ---
|
531
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdac>
|
532
|
+
# x y z
|
533
|
+
# <uint8> <string> <boolean>
|
534
|
+
# 0 1 A false
|
535
|
+
# 1 2 A true
|
536
|
+
# 2 3 B false
|
537
|
+
# 3 4 B (nil)
|
538
|
+
# ---
|
539
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdc0>
|
540
|
+
# x y z
|
541
|
+
# <uint8> <string> <boolean>
|
542
|
+
# 0 2 A true
|
543
|
+
# 1 3 B false
|
544
|
+
# 2 4 B (nil)
|
545
|
+
# 3 5 B true
|
546
|
+
# ---
|
547
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdd4>
|
548
|
+
# x y z
|
549
|
+
# <uint8> <string> <boolean>
|
550
|
+
# 0 3 B false
|
551
|
+
# 1 4 B (nil)
|
552
|
+
# 2 5 B true
|
553
|
+
# 3 6 C false
|
554
|
+
#
|
555
|
+
# @since 0.4.0
|
556
|
+
#
|
557
|
+
def sub_by_enum(enumerator_method, *args)
|
558
|
+
SubFrames.new(self, indices.send(enumerator_method, *args).to_a)
|
559
|
+
end
|
560
|
+
alias_method :subframes_by_enum, :sub_by_enum
|
561
|
+
|
562
|
+
# Create SubFrames by windowing with a kernel (i.e. masked window) and step.
|
563
|
+
#
|
564
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
565
|
+
# @param kernel [Array<true, false>, Vector]
|
566
|
+
# boolean array-like to pick records in the window.
|
567
|
+
# Kernel is a boolean Array and it behaves like a masked window.
|
568
|
+
# @param step [Integer]
|
569
|
+
# moving step of window.
|
570
|
+
# @return [SubFrames]
|
571
|
+
# a created SubFrames.
|
572
|
+
# @example
|
573
|
+
# kernel = [true, false, false, true]
|
574
|
+
# df.sub_by_kernel(kernel, step: 2)
|
575
|
+
#
|
576
|
+
# # =>
|
577
|
+
# #<RedAmber::SubFrames : 0x000000000000fde8>
|
578
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
579
|
+
# 2 SubFrames: [2, 2] in sizes.
|
580
|
+
# ---
|
581
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fdfc>
|
582
|
+
# x y z
|
583
|
+
# <uint8> <string> <boolean>
|
584
|
+
# 0 1 A false
|
585
|
+
# 1 4 B (nil)
|
586
|
+
# ---
|
587
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fe10>
|
588
|
+
# x y z
|
589
|
+
# <uint8> <string> <boolean>
|
590
|
+
# 0 3 B false
|
591
|
+
# 1 6 C false
|
592
|
+
#
|
593
|
+
# @since 0.4.0
|
594
|
+
#
|
595
|
+
def sub_by_kernel(kernel, step: 1)
|
596
|
+
limit_size = size - kernel.size
|
597
|
+
kernel_vector = Vector.new(kernel.concat([nil] * limit_size))
|
598
|
+
SubFrames.new(self) do
|
599
|
+
0.step(by: step, to: limit_size).map do |i|
|
600
|
+
kernel_vector.shift(i)
|
601
|
+
end
|
602
|
+
end
|
603
|
+
end
|
604
|
+
alias_method :subframes_by_kernel, :sub_by_kernel
|
605
|
+
|
606
|
+
# Generic builder of sub-dataframes from self.
|
607
|
+
#
|
608
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
609
|
+
# @overload build_subframes(subset_specifier)
|
610
|
+
# Create a new SubFrames object.
|
611
|
+
#
|
612
|
+
# @param subset_specifier [Array<Vector>, Array<array-like>]
|
613
|
+
# an Array of numeric indices or boolean filters
|
614
|
+
# to create subsets of DataFrame.
|
615
|
+
# @return [SubFrames]
|
616
|
+
# new SubFrames.
|
617
|
+
# @example
|
618
|
+
# df.build_subframes([[0, 2, 4], [1, 3, 5]])
|
619
|
+
#
|
620
|
+
# # =>
|
621
|
+
# #<RedAmber::SubFrames : 0x000000000000fe9c>
|
622
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
623
|
+
# 2 SubFrames: [3, 3] in sizes.
|
624
|
+
# ---
|
625
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000feb0>
|
626
|
+
# x y z
|
627
|
+
# <uint8> <string> <boolean>
|
628
|
+
# 0 1 A false
|
629
|
+
# 1 3 B false
|
630
|
+
# 2 5 B true
|
631
|
+
# ---
|
632
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fec4>
|
633
|
+
# x y z
|
634
|
+
# <uint8> <string> <boolean>
|
635
|
+
# 0 2 A true
|
636
|
+
# 1 4 B (nil)
|
637
|
+
# 2 6 C false
|
638
|
+
#
|
639
|
+
# @overload build_subframes
|
640
|
+
# Create a new SubFrames object by block.
|
641
|
+
#
|
642
|
+
# @yield [self]
|
643
|
+
# the block is called within the context of self.
|
644
|
+
# (Block is called by instance_eval(&block). )
|
645
|
+
# @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
|
646
|
+
# an Array of index or boolean array-likes to create subsets of DataFrame.
|
647
|
+
# All array-likes are responsible to #numeric? or #boolean?.
|
648
|
+
# @example
|
649
|
+
# dataframe.build_subframes do
|
650
|
+
# even = indices.map(&:even?)
|
651
|
+
# [even, !even]
|
652
|
+
# end
|
653
|
+
#
|
654
|
+
# # =>
|
655
|
+
# #<RedAmber::SubFrames : 0x000000000000fe60>
|
656
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
657
|
+
# 2 SubFrames: [3, 3] in sizes.
|
658
|
+
# ---
|
659
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe74>
|
660
|
+
# x y z
|
661
|
+
# <uint8> <string> <boolean>
|
662
|
+
# 0 1 A false
|
663
|
+
# 1 3 B false
|
664
|
+
# 2 5 B true
|
665
|
+
# ---
|
666
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe88>
|
667
|
+
# x y z
|
668
|
+
# <uint8> <string> <boolean>
|
669
|
+
# 0 2 A true
|
670
|
+
# 1 4 B (nil)
|
671
|
+
# 2 6 C false
|
672
|
+
#
|
673
|
+
# @since 0.4.0
|
674
|
+
#
|
675
|
+
def build_subframes(subset_specifier = nil, &block)
|
676
|
+
if block
|
677
|
+
SubFrames.new(self, instance_eval(&block))
|
678
|
+
else
|
679
|
+
SubFrames.new(self, subset_specifier)
|
680
|
+
end
|
681
|
+
end
|
682
|
+
|
683
|
+
# Catch variable (column) key as method name.
|
332
684
|
def method_missing(name, *args, &block)
|
333
685
|
return v(name) if args.empty? && key?(name)
|
334
686
|
|
335
687
|
super
|
336
688
|
end
|
337
689
|
|
690
|
+
# Catch variable (column) key as method name.
|
338
691
|
def respond_to_missing?(name, include_private)
|
339
692
|
return true if key?(name)
|
340
693
|
|
@@ -346,15 +699,16 @@ module RedAmber
|
|
346
699
|
# initialize @variable, @keys, @vectors and return one of them
|
347
700
|
def init_instance_vars(var)
|
348
701
|
ary =
|
349
|
-
@table
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
702
|
+
@table
|
703
|
+
.columns
|
704
|
+
.each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
|
705
|
+
v = Vector.create(column.data)
|
706
|
+
k = column.name.to_sym
|
707
|
+
v.key = k
|
708
|
+
variables[k] = v
|
709
|
+
keys << k
|
710
|
+
vectors << v
|
711
|
+
end
|
358
712
|
|
359
713
|
@variables, @keys, @vectors = ary
|
360
714
|
ary[%i[variables keys vectors].index(var)]
|