red_amber 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +39 -20
- data/.yardopts +2 -0
- data/CHANGELOG.md +113 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +25 -26
- data/benchmark/basic.yml +2 -2
- data/benchmark/combine.yml +2 -2
- data/benchmark/dataframe.yml +2 -2
- data/benchmark/group.yml +2 -2
- data/benchmark/reshape.yml +2 -2
- data/benchmark/vector.yml +3 -0
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +429 -75
- data/lib/red_amber/data_frame_combinable.rb +516 -66
- data/lib/red_amber/data_frame_displayable.rb +244 -14
- data/lib/red_amber/data_frame_indexable.rb +121 -18
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +622 -66
- data/lib/red_amber/data_frame_variable_operation.rb +446 -34
- data/lib/red_amber/group.rb +187 -22
- data/lib/red_amber/helper.rb +70 -10
- data/lib/red_amber/refinements.rb +12 -5
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +385 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +217 -12
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
data/lib/red_amber/data_frame.rb
CHANGED
@@ -4,7 +4,7 @@ module RedAmber
|
|
4
4
|
# Class to represent a data frame.
|
5
5
|
# Variable @table holds an Arrow::Table object.
|
6
6
|
class DataFrame
|
7
|
-
#
|
7
|
+
# Mix-in
|
8
8
|
include DataFrameCombinable
|
9
9
|
include DataFrameDisplayable
|
10
10
|
include DataFrameIndexable
|
@@ -17,26 +17,62 @@ module RedAmber
|
|
17
17
|
using RefineArrowTable
|
18
18
|
using RefineHash
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
20
|
+
class << self
|
21
|
+
# Quicker DataFrame constructor from a `Arrow::Table`.
|
22
|
+
#
|
23
|
+
# @param table [Arrow::Table]
|
24
|
+
# A table to have in the DataFrame.
|
25
|
+
# @return [DataFrame]
|
26
|
+
# Initialized DataFrame.
|
27
|
+
#
|
28
|
+
# @note This method will allocate table directly and may be used in the method.
|
29
|
+
# @note `table` must have unique keys.
|
30
|
+
#
|
31
|
+
def create(table)
|
32
|
+
instance = allocate
|
33
|
+
instance.instance_variable_set(:@table, table)
|
34
|
+
instance
|
35
|
+
end
|
31
36
|
end
|
32
37
|
|
33
38
|
# Creates a new DataFrame.
|
34
39
|
#
|
40
|
+
# @overload initialize(hash)
|
41
|
+
# Initialize a DataFrame by a Hash.
|
42
|
+
#
|
43
|
+
# @param hash [Hash<key => <Array, Arrow::Array, #to_arrow_array>>]
|
44
|
+
# a Hash of `key` with array-like for column values.
|
45
|
+
# `key`s are Symbol or String.
|
46
|
+
# @example Initialize by a Hash
|
47
|
+
# hash = { x: [1, 2, 3], y: %w[A B C] }
|
48
|
+
# DataFrame.new(hash)
|
49
|
+
# @example Initialize by a Hash like arguments.
|
50
|
+
# DataFrame.new(x: [1, 2, 3], y: %w[A B C])
|
51
|
+
# @example Initialize from #to_arrow_array responsibles.
|
52
|
+
# # #to_arrow_array responsible `array-like` is also available.
|
53
|
+
# require 'arrow-numo-narray'
|
54
|
+
# DataFrame.new(numo: Numo::DFloat.new(3).rand)
|
55
|
+
#
|
35
56
|
# @overload initialize(table)
|
36
|
-
# Initialize DataFrame by an `Arrow::Table
|
57
|
+
# Initialize a DataFrame by an `Arrow::Table`.
|
37
58
|
#
|
38
59
|
# @param table [Arrow::Table]
|
39
|
-
#
|
60
|
+
# a table to have in the DataFrame.
|
61
|
+
# @example Initialize by a Table
|
62
|
+
# table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
|
63
|
+
# DataFrame.new(table)
|
64
|
+
#
|
65
|
+
# @overload initialize(schama, row_oriented_array)
|
66
|
+
# Initialize a DataFrame by schema and row_oriented_array.
|
67
|
+
#
|
68
|
+
# @param schema [Hash<key => type>]
|
69
|
+
# a schema of key and data type.
|
70
|
+
# @param row_oriented_array [Array]
|
71
|
+
# an Array of rows.
|
72
|
+
# @example Initialize by a schema and a row_oriented_array.
|
73
|
+
# schema = { x: :uint8, y: :string }
|
74
|
+
# row_oriented_array = [[1, 'A'], [2, 'B'], [3, 'C']]
|
75
|
+
# DataFrame.new(schema, row_oriented_array)
|
40
76
|
#
|
41
77
|
# @overload initialize(arrowable)
|
42
78
|
# Initialize DataFrame by a `#to_arrow` responsible object.
|
@@ -47,6 +83,11 @@ module RedAmber
|
|
47
83
|
#
|
48
84
|
# @note `RedAmber::DataFrame` itself is readable by this.
|
49
85
|
# @note Hash is refined to respond to `#to_arrow` in this class.
|
86
|
+
# @example Initialize by Red Dataset object.
|
87
|
+
# require 'datasets-arrow'
|
88
|
+
# dataset = Datasets::Penguins.new
|
89
|
+
# penguins = DataFrame.new(dataset)
|
90
|
+
# @since 0.2.2
|
50
91
|
#
|
51
92
|
# @overload initialize(rover_like)
|
52
93
|
# Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
|
@@ -60,20 +101,18 @@ module RedAmber
|
|
60
101
|
# @overload initialize()
|
61
102
|
# Create empty DataFrame
|
62
103
|
#
|
63
|
-
# @example
|
104
|
+
# @example
|
105
|
+
# DataFrame.new
|
64
106
|
#
|
65
107
|
# @overload initialize(empty)
|
66
108
|
# Create empty DataFrame
|
67
109
|
#
|
68
110
|
# @param empty [nil, [], {}]
|
69
111
|
#
|
70
|
-
# @example
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
74
|
-
# @param args [values]
|
75
|
-
# Accepts any argments which is valid for `Arrow::Table.new(args)`. See
|
76
|
-
# {https://github.com/apache/arrow/blob/master/ruby/red-arrow/lib/arrow/table.rb
|
112
|
+
# @example Return empty DataFrame.
|
113
|
+
# DataFrame.new([])
|
114
|
+
# DataFrame.new({})
|
115
|
+
# DataFrame.new(nil)
|
77
116
|
#
|
78
117
|
def initialize(*args)
|
79
118
|
case args
|
@@ -109,15 +148,16 @@ module RedAmber
|
|
109
148
|
|
110
149
|
# Returns the table having within.
|
111
150
|
#
|
112
|
-
# @return [Arrow::Table]
|
151
|
+
# @return [Arrow::Table]
|
152
|
+
# the table within.
|
113
153
|
#
|
114
154
|
attr_reader :table
|
115
|
-
|
116
155
|
alias_method :to_arrow, :table
|
117
156
|
|
118
|
-
# Returns the number of rows.
|
157
|
+
# Returns the number of records (rows).
|
119
158
|
#
|
120
|
-
# @return [Integer]
|
159
|
+
# @return [Integer]
|
160
|
+
# number of records (rows).
|
121
161
|
#
|
122
162
|
def size
|
123
163
|
@table.n_rows
|
@@ -126,9 +166,10 @@ module RedAmber
|
|
126
166
|
alias_method :n_obs, :size
|
127
167
|
alias_method :n_rows, :size
|
128
168
|
|
129
|
-
# Returns the number of columns.
|
169
|
+
# Returns the number of variables (columns).
|
130
170
|
#
|
131
|
-
# @return [Integer]
|
171
|
+
# @return [Integer]
|
172
|
+
# number of variables (columns).
|
132
173
|
#
|
133
174
|
def n_keys
|
134
175
|
@table.n_columns
|
@@ -140,7 +181,7 @@ module RedAmber
|
|
140
181
|
# Returns the numbers of rows and columns.
|
141
182
|
#
|
142
183
|
# @return [Array]
|
143
|
-
#
|
184
|
+
# number of rows and number of columns in an array.
|
144
185
|
# Same as [size, n_keys].
|
145
186
|
#
|
146
187
|
def shape
|
@@ -160,7 +201,7 @@ module RedAmber
|
|
160
201
|
# Returns an Array of keys.
|
161
202
|
#
|
162
203
|
# @return [Array]
|
163
|
-
#
|
204
|
+
# keys in an Array.
|
164
205
|
#
|
165
206
|
def keys
|
166
207
|
@keys || @keys = init_instance_vars(:keys)
|
@@ -170,9 +211,10 @@ module RedAmber
|
|
170
211
|
|
171
212
|
# Returns true if self has a specified key in the argument.
|
172
213
|
#
|
173
|
-
# @param key [Symbol, String]
|
214
|
+
# @param key [Symbol, String]
|
215
|
+
# key to test.
|
174
216
|
# @return [Boolean]
|
175
|
-
#
|
217
|
+
# returns true if self has key in Symbol.
|
176
218
|
#
|
177
219
|
def key?(key)
|
178
220
|
keys.include?(key.to_sym)
|
@@ -181,9 +223,10 @@ module RedAmber
|
|
181
223
|
|
182
224
|
# Returns index of specified key in the Array keys.
|
183
225
|
#
|
184
|
-
# @param key [Symbol, String]
|
226
|
+
# @param key [Symbol, String]
|
227
|
+
# key to know.
|
185
228
|
# @return [Integer]
|
186
|
-
#
|
229
|
+
# index of key in the Array keys.
|
187
230
|
#
|
188
231
|
def key_index(key)
|
189
232
|
keys.find_index(key.to_sym)
|
@@ -194,7 +237,7 @@ module RedAmber
|
|
194
237
|
# Returns abbreviated type names in an Array.
|
195
238
|
#
|
196
239
|
# @return [Array]
|
197
|
-
#
|
240
|
+
# abbreviated Red Arrow data type names.
|
198
241
|
#
|
199
242
|
def types
|
200
243
|
@types || @types = @table.columns.map do |column|
|
@@ -205,7 +248,7 @@ module RedAmber
|
|
205
248
|
# Returns an Array of Classes of data type.
|
206
249
|
#
|
207
250
|
# @return [Array]
|
208
|
-
#
|
251
|
+
# an Array of Red Arrow data type Classes.
|
209
252
|
#
|
210
253
|
def type_classes
|
211
254
|
@data_types || @data_types = @table.columns.map { |column| column.data_type.class }
|
@@ -214,34 +257,16 @@ module RedAmber
|
|
214
257
|
# Returns Vectors in an Array.
|
215
258
|
#
|
216
259
|
# @return [Array]
|
217
|
-
#
|
260
|
+
# an Array of Vector.
|
218
261
|
#
|
219
262
|
def vectors
|
220
263
|
@vectors || @vectors = init_instance_vars(:vectors)
|
221
264
|
end
|
222
265
|
|
223
|
-
# Returns row indices (start...(size+start)) in a Vector.
|
224
|
-
#
|
225
|
-
# @param start [Object]
|
226
|
-
# Object which have `#succ` method.
|
227
|
-
#
|
228
|
-
# @return [Array]
|
229
|
-
# A Vector of row indices.
|
230
|
-
#
|
231
|
-
# @example
|
232
|
-
# (when self.size == 5)
|
233
|
-
# - indices #=> Vector[0, 1, 2, 3, 4]
|
234
|
-
# - indices(1) #=> Vector[1, 2, 3, 4, 5]
|
235
|
-
# - indices('a') #=> Vector['a', 'b', 'c', 'd', 'e']
|
236
|
-
#
|
237
|
-
def indices(start = 0)
|
238
|
-
Vector.new((start..).take(size))
|
239
|
-
end
|
240
|
-
alias_method :indexes, :indices
|
241
|
-
|
242
266
|
# Returns column-oriented data in a Hash.
|
243
267
|
#
|
244
|
-
# @return [Hash]
|
268
|
+
# @return [Hash]
|
269
|
+
# a Hash of 'key => column_in_an_array'.
|
245
270
|
#
|
246
271
|
def to_h
|
247
272
|
variables.transform_values(&:to_a)
|
@@ -249,7 +274,8 @@ module RedAmber
|
|
249
274
|
|
250
275
|
# Returns a row-oriented array without header.
|
251
276
|
#
|
252
|
-
# @return [Array]
|
277
|
+
# @return [Array]
|
278
|
+
# row-oriented data without header.
|
253
279
|
#
|
254
280
|
# @note If you need column-oriented array, use `.to_h.to_a`.
|
255
281
|
#
|
@@ -260,7 +286,8 @@ module RedAmber
|
|
260
286
|
|
261
287
|
# Returns column name and data type in a Hash.
|
262
288
|
#
|
263
|
-
# @return [Hash]
|
289
|
+
# @return [Hash]
|
290
|
+
# column name and data type.
|
264
291
|
#
|
265
292
|
# @example
|
266
293
|
# RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
|
@@ -273,7 +300,7 @@ module RedAmber
|
|
273
300
|
# Compare DataFrames.
|
274
301
|
#
|
275
302
|
# @return [true, false]
|
276
|
-
#
|
303
|
+
# true if other is a DataFrame and table is same.
|
277
304
|
# Otherwise return false.
|
278
305
|
#
|
279
306
|
def ==(other)
|
@@ -282,7 +309,8 @@ module RedAmber
|
|
282
309
|
|
283
310
|
# Check if it is a empty DataFrame.
|
284
311
|
#
|
285
|
-
# @return [true, false
|
312
|
+
# @return [true, false
|
313
|
+
# ] true if it has no columns.
|
286
314
|
#
|
287
315
|
def empty?
|
288
316
|
variables.empty?
|
@@ -293,14 +321,18 @@ module RedAmber
|
|
293
321
|
# @overload each_row
|
294
322
|
# Returns Enumerator when no block given.
|
295
323
|
#
|
296
|
-
# @return [Enumerator]
|
324
|
+
# @return [Enumerator]
|
325
|
+
# enumerator of each rows.
|
297
326
|
#
|
298
327
|
# @overload each_row(&block)
|
299
328
|
# Yields with key and row pairs.
|
300
329
|
#
|
301
|
-
# @
|
302
|
-
#
|
303
|
-
# @yieldreturn [Integer]
|
330
|
+
# @yieldparam key_row_pairs [Hash]
|
331
|
+
# key and row pairs.
|
332
|
+
# @yieldreturn [Integer]
|
333
|
+
# size of the DataFrame.
|
334
|
+
# @return [Integer]
|
335
|
+
# returns size.
|
304
336
|
#
|
305
337
|
def each_row
|
306
338
|
return enum_for(:each_row) unless block_given?
|
@@ -316,25 +348,346 @@ module RedAmber
|
|
316
348
|
|
317
349
|
# Returns self in a `Rover::DataFrame`.
|
318
350
|
#
|
319
|
-
# @return [Rover::DataFrame]
|
351
|
+
# @return [Rover::DataFrame]
|
352
|
+
# a `Rover::DataFrame`.
|
320
353
|
#
|
321
354
|
def to_rover
|
322
355
|
require 'rover'
|
323
356
|
Rover::DataFrame.new(to_h)
|
324
357
|
end
|
325
358
|
|
359
|
+
# Create a Group object. Or create a Group and summarize it.
|
360
|
+
#
|
361
|
+
# @overload group(*group_keys)
|
362
|
+
# Create a Group object.
|
363
|
+
#
|
364
|
+
# @param group_keys [Array<Symbol, String>]
|
365
|
+
# keys for grouping.
|
366
|
+
# @return [Group]
|
367
|
+
# Group object.
|
368
|
+
# @example Create a Group
|
369
|
+
# penguins.group(:species)
|
370
|
+
#
|
371
|
+
# # =>
|
372
|
+
# #<RedAmber::Group : 0x000000000000c3c8>
|
373
|
+
# species group_count
|
374
|
+
# <string> <uint8>
|
375
|
+
# 0 Adelie 152
|
376
|
+
# 1 Chinstrap 68
|
377
|
+
# 2 Gentoo 124
|
378
|
+
#
|
379
|
+
# @overload group(*group_keys)
|
380
|
+
# Create a Group and summarize it by aggregation functions from the block.
|
381
|
+
#
|
382
|
+
# @yieldparam group [Group]
|
383
|
+
# passes Group object.
|
384
|
+
# @yieldreturn [DataFrame, Array<DataFrame>]
|
385
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
386
|
+
# @return [DataFrame]
|
387
|
+
# summarized DataFrame.
|
388
|
+
# @example Create a group and summarize it.
|
389
|
+
# penguins.group(:species) { mean(:bill_length_mm) }
|
390
|
+
#
|
391
|
+
# # =>
|
392
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f3fc>
|
393
|
+
# species mean(bill_length_mm)
|
394
|
+
# <string> <double>
|
395
|
+
# 0 Adelie 38.79
|
396
|
+
# 1 Chinstrap 48.83
|
397
|
+
# 2 Gentoo 47.5
|
398
|
+
#
|
326
399
|
def group(*group_keys, &block)
|
327
400
|
g = Group.new(self, group_keys)
|
328
401
|
g = g.summarize(&block) if block
|
329
402
|
g
|
330
403
|
end
|
331
404
|
|
405
|
+
# Create SubFrames by value grouping.
|
406
|
+
#
|
407
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
408
|
+
# @param keys [Symbol, String, Array<Symbol, String>]
|
409
|
+
# grouping keys.
|
410
|
+
# @return [SubFrames]
|
411
|
+
# a created SubFrames grouped by column values on `keys`.
|
412
|
+
# @example
|
413
|
+
# df.sub_by_value(keys: :y)
|
414
|
+
#
|
415
|
+
# # =>
|
416
|
+
# #<RedAmber::SubFrames : 0x000000000000fc08>
|
417
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
418
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
419
|
+
# ---
|
420
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>
|
421
|
+
# x y z
|
422
|
+
# <uint8> <string> <boolean>
|
423
|
+
# 0 1 A false
|
424
|
+
# 1 2 A true
|
425
|
+
# ---
|
426
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fc30>
|
427
|
+
# x y z
|
428
|
+
# <uint8> <string> <boolean>
|
429
|
+
# 0 3 B false
|
430
|
+
# 1 4 B (nil)
|
431
|
+
# 2 5 B true
|
432
|
+
# ---
|
433
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>
|
434
|
+
# x y z
|
435
|
+
# <uint8> <string> <boolean>
|
436
|
+
# 0 6 C false
|
437
|
+
#
|
438
|
+
# @since 0.4.0
|
439
|
+
#
|
440
|
+
def sub_by_value(keys: nil)
|
441
|
+
SubFrames.new(self, group(keys).filters)
|
442
|
+
end
|
443
|
+
alias_method :subframes_by_value, :sub_by_value
|
444
|
+
|
445
|
+
# Create SubFrames by Windowing with `from`, `size` and `step`.
|
446
|
+
#
|
447
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
448
|
+
# @param from [Integer]
|
449
|
+
# start position of window.
|
450
|
+
# @param size [Integer]
|
451
|
+
# window size.
|
452
|
+
# @param step [Integer]
|
453
|
+
# moving step of window.
|
454
|
+
# @return [SubFrames]
|
455
|
+
# a created SubFrames.
|
456
|
+
# @example
|
457
|
+
# df.sub_by_window(size: 4, step: 2)
|
458
|
+
#
|
459
|
+
# # =>
|
460
|
+
# #<RedAmber::SubFrames : 0x000000000000fc58>
|
461
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
462
|
+
# 2 SubFrames: [4, 4] in sizes.
|
463
|
+
# ---
|
464
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc6c>
|
465
|
+
# x y z
|
466
|
+
# <uint8> <string> <boolean>
|
467
|
+
# 0 1 A false
|
468
|
+
# 1 2 A true
|
469
|
+
# 2 3 B false
|
470
|
+
# 3 4 B (nil)
|
471
|
+
# ---
|
472
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc80>
|
473
|
+
# x y z
|
474
|
+
# <uint8> <string> <boolean>
|
475
|
+
# 0 3 B false
|
476
|
+
# 1 4 B (nil)
|
477
|
+
# 2 5 B true
|
478
|
+
# 3 6 C false
|
479
|
+
#
|
480
|
+
# @since 0.4.0
|
481
|
+
#
|
482
|
+
def sub_by_window(from: 0, size: nil, step: 1)
|
483
|
+
SubFrames.new(self) do
|
484
|
+
from.step(by: step, to: (size() - size)).map do |i| # rubocop:disable Style/MethodCallWithoutArgsParentheses
|
485
|
+
[*i...(i + size)]
|
486
|
+
end
|
487
|
+
end
|
488
|
+
end
|
489
|
+
alias_method :subframes_by_window, :sub_by_window
|
490
|
+
|
491
|
+
# Create SubFrames by Grouping/Windowing by posion from a enumrator method.
|
492
|
+
#
|
493
|
+
# This method will process the indices of self by enumerator.
|
494
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
495
|
+
# @param enumerator_method [Symbol]
|
496
|
+
# Enumerator name.
|
497
|
+
# @param args [<Object>]
|
498
|
+
# arguments for the enumerator method.
|
499
|
+
# @return [SubFrames]
|
500
|
+
# a created SubFrames.
|
501
|
+
# @example Create a SubFrames object sliced by 3 rows.
|
502
|
+
# df.sub_by_enum(:each_slice, 3)
|
503
|
+
#
|
504
|
+
# # =>
|
505
|
+
# #<RedAmber::SubFrames : 0x000000000000fd20>
|
506
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
507
|
+
# 2 SubFrames: [3, 3] in sizes.
|
508
|
+
# ---
|
509
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd34>
|
510
|
+
# x y z
|
511
|
+
# <uint8> <string> <boolean>
|
512
|
+
# 0 1 A false
|
513
|
+
# 1 2 A true
|
514
|
+
# 2 3 B false
|
515
|
+
# ---
|
516
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd48>
|
517
|
+
# x y z
|
518
|
+
# <uint8> <string> <boolean>
|
519
|
+
# 0 4 B (nil)
|
520
|
+
# 1 5 B true
|
521
|
+
# 2 6 C false
|
522
|
+
#
|
523
|
+
# @example Create a SubFrames object for each consecutive 3 rows.
|
524
|
+
# df.sub_by_enum(:each_cons, 4)
|
525
|
+
#
|
526
|
+
# # =>
|
527
|
+
# #<RedAmber::SubFrames : 0x000000000000fd98>
|
528
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
529
|
+
# 3 SubFrames: [4, 4, 4] in sizes.
|
530
|
+
# ---
|
531
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdac>
|
532
|
+
# x y z
|
533
|
+
# <uint8> <string> <boolean>
|
534
|
+
# 0 1 A false
|
535
|
+
# 1 2 A true
|
536
|
+
# 2 3 B false
|
537
|
+
# 3 4 B (nil)
|
538
|
+
# ---
|
539
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdc0>
|
540
|
+
# x y z
|
541
|
+
# <uint8> <string> <boolean>
|
542
|
+
# 0 2 A true
|
543
|
+
# 1 3 B false
|
544
|
+
# 2 4 B (nil)
|
545
|
+
# 3 5 B true
|
546
|
+
# ---
|
547
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdd4>
|
548
|
+
# x y z
|
549
|
+
# <uint8> <string> <boolean>
|
550
|
+
# 0 3 B false
|
551
|
+
# 1 4 B (nil)
|
552
|
+
# 2 5 B true
|
553
|
+
# 3 6 C false
|
554
|
+
#
|
555
|
+
# @since 0.4.0
|
556
|
+
#
|
557
|
+
def sub_by_enum(enumerator_method, *args)
|
558
|
+
SubFrames.new(self, indices.send(enumerator_method, *args).to_a)
|
559
|
+
end
|
560
|
+
alias_method :subframes_by_enum, :sub_by_enum
|
561
|
+
|
562
|
+
# Create SubFrames by windowing with a kernel (i.e. masked window) and step.
|
563
|
+
#
|
564
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
565
|
+
# @param kernel [Array<true, false>, Vector]
|
566
|
+
# boolean array-like to pick records in the window.
|
567
|
+
# Kernel is a boolean Array and it behaves like a masked window.
|
568
|
+
# @param step [Integer]
|
569
|
+
# moving step of window.
|
570
|
+
# @return [SubFrames]
|
571
|
+
# a created SubFrames.
|
572
|
+
# @example
|
573
|
+
# kernel = [true, false, false, true]
|
574
|
+
# df.sub_by_kernel(kernel, step: 2)
|
575
|
+
#
|
576
|
+
# # =>
|
577
|
+
# #<RedAmber::SubFrames : 0x000000000000fde8>
|
578
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
579
|
+
# 2 SubFrames: [2, 2] in sizes.
|
580
|
+
# ---
|
581
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fdfc>
|
582
|
+
# x y z
|
583
|
+
# <uint8> <string> <boolean>
|
584
|
+
# 0 1 A false
|
585
|
+
# 1 4 B (nil)
|
586
|
+
# ---
|
587
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fe10>
|
588
|
+
# x y z
|
589
|
+
# <uint8> <string> <boolean>
|
590
|
+
# 0 3 B false
|
591
|
+
# 1 6 C false
|
592
|
+
#
|
593
|
+
# @since 0.4.0
|
594
|
+
#
|
595
|
+
def sub_by_kernel(kernel, step: 1)
|
596
|
+
limit_size = size - kernel.size
|
597
|
+
kernel_vector = Vector.new(kernel.concat([nil] * limit_size))
|
598
|
+
SubFrames.new(self) do
|
599
|
+
0.step(by: step, to: limit_size).map do |i|
|
600
|
+
kernel_vector.shift(i)
|
601
|
+
end
|
602
|
+
end
|
603
|
+
end
|
604
|
+
alias_method :subframes_by_kernel, :sub_by_kernel
|
605
|
+
|
606
|
+
# Generic builder of sub-dataframes from self.
|
607
|
+
#
|
608
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
609
|
+
# @overload build_subframes(subset_specifier)
|
610
|
+
# Create a new SubFrames object.
|
611
|
+
#
|
612
|
+
# @param subset_specifier [Array<Vector>, Array<array-like>]
|
613
|
+
# an Array of numeric indices or boolean filters
|
614
|
+
# to create subsets of DataFrame.
|
615
|
+
# @return [SubFrames]
|
616
|
+
# new SubFrames.
|
617
|
+
# @example
|
618
|
+
# df.build_subframes([[0, 2, 4], [1, 3, 5]])
|
619
|
+
#
|
620
|
+
# # =>
|
621
|
+
# #<RedAmber::SubFrames : 0x000000000000fe9c>
|
622
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
623
|
+
# 2 SubFrames: [3, 3] in sizes.
|
624
|
+
# ---
|
625
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000feb0>
|
626
|
+
# x y z
|
627
|
+
# <uint8> <string> <boolean>
|
628
|
+
# 0 1 A false
|
629
|
+
# 1 3 B false
|
630
|
+
# 2 5 B true
|
631
|
+
# ---
|
632
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fec4>
|
633
|
+
# x y z
|
634
|
+
# <uint8> <string> <boolean>
|
635
|
+
# 0 2 A true
|
636
|
+
# 1 4 B (nil)
|
637
|
+
# 2 6 C false
|
638
|
+
#
|
639
|
+
# @overload build_subframes
|
640
|
+
# Create a new SubFrames object by block.
|
641
|
+
#
|
642
|
+
# @yield [self]
|
643
|
+
# the block is called within the context of self.
|
644
|
+
# (Block is called by instance_eval(&block). )
|
645
|
+
# @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
|
646
|
+
# an Array of index or boolean array-likes to create subsets of DataFrame.
|
647
|
+
# All array-likes are responsible to #numeric? or #boolean?.
|
648
|
+
# @example
|
649
|
+
# dataframe.build_subframes do
|
650
|
+
# even = indices.map(&:even?)
|
651
|
+
# [even, !even]
|
652
|
+
# end
|
653
|
+
#
|
654
|
+
# # =>
|
655
|
+
# #<RedAmber::SubFrames : 0x000000000000fe60>
|
656
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
657
|
+
# 2 SubFrames: [3, 3] in sizes.
|
658
|
+
# ---
|
659
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe74>
|
660
|
+
# x y z
|
661
|
+
# <uint8> <string> <boolean>
|
662
|
+
# 0 1 A false
|
663
|
+
# 1 3 B false
|
664
|
+
# 2 5 B true
|
665
|
+
# ---
|
666
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe88>
|
667
|
+
# x y z
|
668
|
+
# <uint8> <string> <boolean>
|
669
|
+
# 0 2 A true
|
670
|
+
# 1 4 B (nil)
|
671
|
+
# 2 6 C false
|
672
|
+
#
|
673
|
+
# @since 0.4.0
|
674
|
+
#
|
675
|
+
def build_subframes(subset_specifier = nil, &block)
|
676
|
+
if block
|
677
|
+
SubFrames.new(self, instance_eval(&block))
|
678
|
+
else
|
679
|
+
SubFrames.new(self, subset_specifier)
|
680
|
+
end
|
681
|
+
end
|
682
|
+
|
683
|
+
# Catch variable (column) key as method name.
|
332
684
|
def method_missing(name, *args, &block)
|
333
685
|
return v(name) if args.empty? && key?(name)
|
334
686
|
|
335
687
|
super
|
336
688
|
end
|
337
689
|
|
690
|
+
# Catch variable (column) key as method name.
|
338
691
|
def respond_to_missing?(name, include_private)
|
339
692
|
return true if key?(name)
|
340
693
|
|
@@ -346,15 +699,16 @@ module RedAmber
|
|
346
699
|
# initialize @variable, @keys, @vectors and return one of them
|
347
700
|
def init_instance_vars(var)
|
348
701
|
ary =
|
349
|
-
@table
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
702
|
+
@table
|
703
|
+
.columns
|
704
|
+
.each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
|
705
|
+
v = Vector.create(column.data)
|
706
|
+
k = column.name.to_sym
|
707
|
+
v.key = k
|
708
|
+
variables[k] = v
|
709
|
+
keys << k
|
710
|
+
vectors << v
|
711
|
+
end
|
358
712
|
|
359
713
|
@variables, @keys, @vectors = ary
|
360
714
|
ary[%i[variables keys vectors].index(var)]
|