red_amber 0.3.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +56 -22
- data/.yardopts +2 -0
- data/CHANGELOG.md +178 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +29 -30
- data/benchmark/basic.yml +7 -7
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -3
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +454 -85
- data/lib/red_amber/data_frame_combinable.rb +609 -115
- data/lib/red_amber/data_frame_displayable.rb +313 -34
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +623 -70
- data/lib/red_amber/data_frame_variable_operation.rb +452 -35
- data/lib/red_amber/group.rb +186 -22
- data/lib/red_amber/helper.rb +74 -14
- data/lib/red_amber/refinements.rb +26 -6
- data/lib/red_amber/subframes.rb +1101 -0
- data/lib/red_amber/vector.rb +362 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +506 -0
- data/lib/red_amber/vector_selectable.rb +265 -23
- data/lib/red_amber/vector_unary_element_wise.rb +529 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
data/lib/red_amber/data_frame.rb
CHANGED
@@ -4,7 +4,7 @@ module RedAmber
|
|
4
4
|
# Class to represent a data frame.
|
5
5
|
# Variable @table holds an Arrow::Table object.
|
6
6
|
class DataFrame
|
7
|
-
#
|
7
|
+
# Mix-in
|
8
8
|
include DataFrameCombinable
|
9
9
|
include DataFrameDisplayable
|
10
10
|
include DataFrameIndexable
|
@@ -17,26 +17,79 @@ module RedAmber
|
|
17
17
|
using RefineArrowTable
|
18
18
|
using RefineHash
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
20
|
+
class << self
|
21
|
+
# Quicker DataFrame constructor from a `Arrow::Table`.
|
22
|
+
#
|
23
|
+
# @param table [Arrow::Table]
|
24
|
+
# A table to have in the DataFrame.
|
25
|
+
# @return [DataFrame]
|
26
|
+
# Initialized DataFrame.
|
27
|
+
#
|
28
|
+
# @note This method will allocate table directly and may be used in the method.
|
29
|
+
# @note `table` must have unique keys.
|
30
|
+
#
|
31
|
+
def create(table)
|
32
|
+
instance = allocate
|
33
|
+
instance.instance_variable_set(:@table, table)
|
34
|
+
instance
|
35
|
+
end
|
36
|
+
|
37
|
+
# Return new DataFrame for specified schema and value.
|
38
|
+
#
|
39
|
+
# @param dataframe_for_schema [Dataframe]
|
40
|
+
# schema of this dataframe will be used.
|
41
|
+
# @param dataframe_for_value [DataFrame]
|
42
|
+
# column values of thes dataframe will be used.
|
43
|
+
# @return [DataFrame]
|
44
|
+
# created DataFrame.
|
45
|
+
# @since 0.4.1
|
46
|
+
#
|
47
|
+
def new_dataframe_with_schema(dataframe_for_schema, dataframe_for_value)
|
48
|
+
DataFrame.create(
|
49
|
+
Arrow::Table.new(dataframe_for_schema.table.schema,
|
50
|
+
dataframe_for_value.table.columns)
|
51
|
+
)
|
52
|
+
end
|
31
53
|
end
|
32
54
|
|
33
55
|
# Creates a new DataFrame.
|
34
56
|
#
|
57
|
+
# @overload initialize(hash)
|
58
|
+
# Initialize a DataFrame by a Hash.
|
59
|
+
#
|
60
|
+
# @param hash [Hash<key => <Array, Arrow::Array, #to_arrow_array>>]
|
61
|
+
# a Hash of `key` with array-like for column values.
|
62
|
+
# `key`s are Symbol or String.
|
63
|
+
# @example Initialize by a Hash
|
64
|
+
# hash = { x: [1, 2, 3], y: %w[A B C] }
|
65
|
+
# DataFrame.new(hash)
|
66
|
+
# @example Initialize by a Hash like arguments.
|
67
|
+
# DataFrame.new(x: [1, 2, 3], y: %w[A B C])
|
68
|
+
# @example Initialize from #to_arrow_array responsibles.
|
69
|
+
# # #to_arrow_array responsible `array-like` is also available.
|
70
|
+
# require 'arrow-numo-narray'
|
71
|
+
# DataFrame.new(numo: Numo::DFloat.new(3).rand)
|
72
|
+
#
|
35
73
|
# @overload initialize(table)
|
36
|
-
# Initialize DataFrame by an `Arrow::Table
|
74
|
+
# Initialize a DataFrame by an `Arrow::Table`.
|
37
75
|
#
|
38
76
|
# @param table [Arrow::Table]
|
39
|
-
#
|
77
|
+
# a table to have in the DataFrame.
|
78
|
+
# @example Initialize by a Table
|
79
|
+
# table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
|
80
|
+
# DataFrame.new(table)
|
81
|
+
#
|
82
|
+
# @overload initialize(schama, row_oriented_array)
|
83
|
+
# Initialize a DataFrame by schema and row_oriented_array.
|
84
|
+
#
|
85
|
+
# @param schema [Hash<key => type>]
|
86
|
+
# a schema of key and data type.
|
87
|
+
# @param row_oriented_array [Array]
|
88
|
+
# an Array of rows.
|
89
|
+
# @example Initialize by a schema and a row_oriented_array.
|
90
|
+
# schema = { x: :uint8, y: :string }
|
91
|
+
# row_oriented_array = [[1, 'A'], [2, 'B'], [3, 'C']]
|
92
|
+
# DataFrame.new(schema, row_oriented_array)
|
40
93
|
#
|
41
94
|
# @overload initialize(arrowable)
|
42
95
|
# Initialize DataFrame by a `#to_arrow` responsible object.
|
@@ -47,6 +100,11 @@ module RedAmber
|
|
47
100
|
#
|
48
101
|
# @note `RedAmber::DataFrame` itself is readable by this.
|
49
102
|
# @note Hash is refined to respond to `#to_arrow` in this class.
|
103
|
+
# @example Initialize by Red Dataset object.
|
104
|
+
# require 'datasets-arrow'
|
105
|
+
# dataset = Datasets::Penguins.new
|
106
|
+
# penguins = DataFrame.new(dataset)
|
107
|
+
# @since 0.2.2
|
50
108
|
#
|
51
109
|
# @overload initialize(rover_like)
|
52
110
|
# Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
|
@@ -60,20 +118,18 @@ module RedAmber
|
|
60
118
|
# @overload initialize()
|
61
119
|
# Create empty DataFrame
|
62
120
|
#
|
63
|
-
# @example
|
121
|
+
# @example
|
122
|
+
# DataFrame.new
|
64
123
|
#
|
65
124
|
# @overload initialize(empty)
|
66
125
|
# Create empty DataFrame
|
67
126
|
#
|
68
127
|
# @param empty [nil, [], {}]
|
69
128
|
#
|
70
|
-
# @example
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
74
|
-
# @param args [values]
|
75
|
-
# Accepts any argments which is valid for `Arrow::Table.new(args)`. See
|
76
|
-
# {https://github.com/apache/arrow/blob/master/ruby/red-arrow/lib/arrow/table.rb
|
129
|
+
# @example Return empty DataFrame.
|
130
|
+
# DataFrame.new([])
|
131
|
+
# DataFrame.new({})
|
132
|
+
# DataFrame.new(nil)
|
77
133
|
#
|
78
134
|
def initialize(*args)
|
79
135
|
case args
|
@@ -109,15 +165,16 @@ module RedAmber
|
|
109
165
|
|
110
166
|
# Returns the table having within.
|
111
167
|
#
|
112
|
-
# @return [Arrow::Table]
|
168
|
+
# @return [Arrow::Table]
|
169
|
+
# the table within.
|
113
170
|
#
|
114
171
|
attr_reader :table
|
115
|
-
|
116
172
|
alias_method :to_arrow, :table
|
117
173
|
|
118
|
-
# Returns the number of rows.
|
174
|
+
# Returns the number of records (rows).
|
119
175
|
#
|
120
|
-
# @return [Integer]
|
176
|
+
# @return [Integer]
|
177
|
+
# number of records (rows).
|
121
178
|
#
|
122
179
|
def size
|
123
180
|
@table.n_rows
|
@@ -126,9 +183,10 @@ module RedAmber
|
|
126
183
|
alias_method :n_obs, :size
|
127
184
|
alias_method :n_rows, :size
|
128
185
|
|
129
|
-
# Returns the number of columns.
|
186
|
+
# Returns the number of variables (columns).
|
130
187
|
#
|
131
|
-
# @return [Integer]
|
188
|
+
# @return [Integer]
|
189
|
+
# number of variables (columns).
|
132
190
|
#
|
133
191
|
def n_keys
|
134
192
|
@table.n_columns
|
@@ -140,7 +198,7 @@ module RedAmber
|
|
140
198
|
# Returns the numbers of rows and columns.
|
141
199
|
#
|
142
200
|
# @return [Array]
|
143
|
-
#
|
201
|
+
# number of rows and number of columns in an array.
|
144
202
|
# Same as [size, n_keys].
|
145
203
|
#
|
146
204
|
def shape
|
@@ -153,26 +211,27 @@ module RedAmber
|
|
153
211
|
# `key => Vector` pairs for each columns.
|
154
212
|
#
|
155
213
|
def variables
|
156
|
-
@variables
|
214
|
+
@variables ||= init_instance_vars(:variables)
|
157
215
|
end
|
158
216
|
alias_method :vars, :variables
|
159
217
|
|
160
218
|
# Returns an Array of keys.
|
161
219
|
#
|
162
220
|
# @return [Array]
|
163
|
-
#
|
221
|
+
# keys in an Array.
|
164
222
|
#
|
165
223
|
def keys
|
166
|
-
@keys
|
224
|
+
@keys ||= init_instance_vars(:keys)
|
167
225
|
end
|
168
226
|
alias_method :column_names, :keys
|
169
227
|
alias_method :var_names, :keys
|
170
228
|
|
171
229
|
# Returns true if self has a specified key in the argument.
|
172
230
|
#
|
173
|
-
# @param key [Symbol, String]
|
231
|
+
# @param key [Symbol, String]
|
232
|
+
# key to test.
|
174
233
|
# @return [Boolean]
|
175
|
-
#
|
234
|
+
# returns true if self has key in Symbol.
|
176
235
|
#
|
177
236
|
def key?(key)
|
178
237
|
keys.include?(key.to_sym)
|
@@ -181,9 +240,10 @@ module RedAmber
|
|
181
240
|
|
182
241
|
# Returns index of specified key in the Array keys.
|
183
242
|
#
|
184
|
-
# @param key [Symbol, String]
|
243
|
+
# @param key [Symbol, String]
|
244
|
+
# key to know.
|
185
245
|
# @return [Integer]
|
186
|
-
#
|
246
|
+
# index of key in the Array keys.
|
187
247
|
#
|
188
248
|
def key_index(key)
|
189
249
|
keys.find_index(key.to_sym)
|
@@ -194,10 +254,10 @@ module RedAmber
|
|
194
254
|
# Returns abbreviated type names in an Array.
|
195
255
|
#
|
196
256
|
# @return [Array]
|
197
|
-
#
|
257
|
+
# abbreviated Red Arrow data type names.
|
198
258
|
#
|
199
259
|
def types
|
200
|
-
@types
|
260
|
+
@types ||= @table.columns.map do |column|
|
201
261
|
column.data.value_type.nick.to_sym
|
202
262
|
end
|
203
263
|
end
|
@@ -205,43 +265,25 @@ module RedAmber
|
|
205
265
|
# Returns an Array of Classes of data type.
|
206
266
|
#
|
207
267
|
# @return [Array]
|
208
|
-
#
|
268
|
+
# an Array of Red Arrow data type Classes.
|
209
269
|
#
|
210
270
|
def type_classes
|
211
|
-
@
|
271
|
+
@type_classes ||= @table.columns.map { |column| column.data_type.class }
|
212
272
|
end
|
213
273
|
|
214
274
|
# Returns Vectors in an Array.
|
215
275
|
#
|
216
276
|
# @return [Array]
|
217
|
-
#
|
277
|
+
# an Array of Vector.
|
218
278
|
#
|
219
279
|
def vectors
|
220
|
-
@vectors
|
280
|
+
@vectors ||= init_instance_vars(:vectors)
|
221
281
|
end
|
222
282
|
|
223
|
-
# Returns row indices (start...(size+start)) in a Vector.
|
224
|
-
#
|
225
|
-
# @param start [Object]
|
226
|
-
# Object which have `#succ` method.
|
227
|
-
#
|
228
|
-
# @return [Array]
|
229
|
-
# A Vector of row indices.
|
230
|
-
#
|
231
|
-
# @example
|
232
|
-
# (when self.size == 5)
|
233
|
-
# - indices #=> Vector[0, 1, 2, 3, 4]
|
234
|
-
# - indices(1) #=> Vector[1, 2, 3, 4, 5]
|
235
|
-
# - indices('a') #=> Vector['a', 'b', 'c', 'd', 'e']
|
236
|
-
#
|
237
|
-
def indices(start = 0)
|
238
|
-
Vector.new((start..).take(size))
|
239
|
-
end
|
240
|
-
alias_method :indexes, :indices
|
241
|
-
|
242
283
|
# Returns column-oriented data in a Hash.
|
243
284
|
#
|
244
|
-
# @return [Hash]
|
285
|
+
# @return [Hash]
|
286
|
+
# a Hash of 'key => column_in_an_array'.
|
245
287
|
#
|
246
288
|
def to_h
|
247
289
|
variables.transform_values(&:to_a)
|
@@ -249,7 +291,8 @@ module RedAmber
|
|
249
291
|
|
250
292
|
# Returns a row-oriented array without header.
|
251
293
|
#
|
252
|
-
# @return [Array]
|
294
|
+
# @return [Array]
|
295
|
+
# row-oriented data without header.
|
253
296
|
#
|
254
297
|
# @note If you need column-oriented array, use `.to_h.to_a`.
|
255
298
|
#
|
@@ -260,7 +303,8 @@ module RedAmber
|
|
260
303
|
|
261
304
|
# Returns column name and data type in a Hash.
|
262
305
|
#
|
263
|
-
# @return [Hash]
|
306
|
+
# @return [Hash]
|
307
|
+
# column name and data type.
|
264
308
|
#
|
265
309
|
# @example
|
266
310
|
# RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
|
@@ -273,7 +317,7 @@ module RedAmber
|
|
273
317
|
# Compare DataFrames.
|
274
318
|
#
|
275
319
|
# @return [true, false]
|
276
|
-
#
|
320
|
+
# true if other is a DataFrame and table is same.
|
277
321
|
# Otherwise return false.
|
278
322
|
#
|
279
323
|
def ==(other)
|
@@ -282,7 +326,8 @@ module RedAmber
|
|
282
326
|
|
283
327
|
# Check if it is a empty DataFrame.
|
284
328
|
#
|
285
|
-
# @return [true, false
|
329
|
+
# @return [true, false
|
330
|
+
# ] true if it has no columns.
|
286
331
|
#
|
287
332
|
def empty?
|
288
333
|
variables.empty?
|
@@ -293,14 +338,18 @@ module RedAmber
|
|
293
338
|
# @overload each_row
|
294
339
|
# Returns Enumerator when no block given.
|
295
340
|
#
|
296
|
-
# @return [Enumerator]
|
341
|
+
# @return [Enumerator]
|
342
|
+
# enumerator of each rows.
|
297
343
|
#
|
298
344
|
# @overload each_row(&block)
|
299
345
|
# Yields with key and row pairs.
|
300
346
|
#
|
301
|
-
# @
|
302
|
-
#
|
303
|
-
# @yieldreturn [Integer]
|
347
|
+
# @yieldparam key_row_pairs [Hash]
|
348
|
+
# key and row pairs.
|
349
|
+
# @yieldreturn [Integer]
|
350
|
+
# size of the DataFrame.
|
351
|
+
# @return [Integer]
|
352
|
+
# returns size.
|
304
353
|
#
|
305
354
|
def each_row
|
306
355
|
return enum_for(:each_row) unless block_given?
|
@@ -316,25 +365,346 @@ module RedAmber
|
|
316
365
|
|
317
366
|
# Returns self in a `Rover::DataFrame`.
|
318
367
|
#
|
319
|
-
# @return [Rover::DataFrame]
|
368
|
+
# @return [Rover::DataFrame]
|
369
|
+
# a `Rover::DataFrame`.
|
320
370
|
#
|
321
371
|
def to_rover
|
322
372
|
require 'rover'
|
323
373
|
Rover::DataFrame.new(to_h)
|
324
374
|
end
|
325
375
|
|
376
|
+
# Create a Group object. Or create a Group and summarize it.
|
377
|
+
#
|
378
|
+
# @overload group(*group_keys)
|
379
|
+
# Create a Group object.
|
380
|
+
#
|
381
|
+
# @param group_keys [Array<Symbol, String>]
|
382
|
+
# keys for grouping.
|
383
|
+
# @return [Group]
|
384
|
+
# Group object.
|
385
|
+
# @example Create a Group
|
386
|
+
# penguins.group(:species)
|
387
|
+
#
|
388
|
+
# # =>
|
389
|
+
# #<RedAmber::Group : 0x000000000000c3c8>
|
390
|
+
# species group_count
|
391
|
+
# <string> <uint8>
|
392
|
+
# 0 Adelie 152
|
393
|
+
# 1 Chinstrap 68
|
394
|
+
# 2 Gentoo 124
|
395
|
+
#
|
396
|
+
# @overload group(*group_keys)
|
397
|
+
# Create a Group and summarize it by aggregation functions from the block.
|
398
|
+
#
|
399
|
+
# @yieldparam group [Group]
|
400
|
+
# passes Group object.
|
401
|
+
# @yieldreturn [DataFrame, Array<DataFrame>]
|
402
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
403
|
+
# @return [DataFrame]
|
404
|
+
# summarized DataFrame.
|
405
|
+
# @example Create a group and summarize it.
|
406
|
+
# penguins.group(:species) { mean(:bill_length_mm) }
|
407
|
+
#
|
408
|
+
# # =>
|
409
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f3fc>
|
410
|
+
# species mean(bill_length_mm)
|
411
|
+
# <string> <double>
|
412
|
+
# 0 Adelie 38.79
|
413
|
+
# 1 Chinstrap 48.83
|
414
|
+
# 2 Gentoo 47.5
|
415
|
+
#
|
326
416
|
def group(*group_keys, &block)
|
327
417
|
g = Group.new(self, group_keys)
|
328
418
|
g = g.summarize(&block) if block
|
329
419
|
g
|
330
420
|
end
|
331
421
|
|
422
|
+
# Create SubFrames by value grouping.
|
423
|
+
#
|
424
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
425
|
+
# @param keys [Symbol, String, Array<Symbol, String>]
|
426
|
+
# grouping keys.
|
427
|
+
# @return [SubFrames]
|
428
|
+
# a created SubFrames grouped by column values on `keys`.
|
429
|
+
# @example
|
430
|
+
# df.sub_by_value(keys: :y)
|
431
|
+
#
|
432
|
+
# # =>
|
433
|
+
# #<RedAmber::SubFrames : 0x000000000000fc08>
|
434
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
435
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
436
|
+
# ---
|
437
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>
|
438
|
+
# x y z
|
439
|
+
# <uint8> <string> <boolean>
|
440
|
+
# 0 1 A false
|
441
|
+
# 1 2 A true
|
442
|
+
# ---
|
443
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fc30>
|
444
|
+
# x y z
|
445
|
+
# <uint8> <string> <boolean>
|
446
|
+
# 0 3 B false
|
447
|
+
# 1 4 B (nil)
|
448
|
+
# 2 5 B true
|
449
|
+
# ---
|
450
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>
|
451
|
+
# x y z
|
452
|
+
# <uint8> <string> <boolean>
|
453
|
+
# 0 6 C false
|
454
|
+
#
|
455
|
+
# @since 0.4.0
|
456
|
+
#
|
457
|
+
def sub_by_value(keys: nil)
|
458
|
+
SubFrames.new(self, group(keys).filters)
|
459
|
+
end
|
460
|
+
alias_method :subframes_by_value, :sub_by_value
|
461
|
+
|
462
|
+
# Create SubFrames by Windowing with `from`, `size` and `step`.
|
463
|
+
#
|
464
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
465
|
+
# @param from [Integer]
|
466
|
+
# start position of window.
|
467
|
+
# @param size [Integer]
|
468
|
+
# window size.
|
469
|
+
# @param step [Integer]
|
470
|
+
# moving step of window.
|
471
|
+
# @return [SubFrames]
|
472
|
+
# a created SubFrames.
|
473
|
+
# @example
|
474
|
+
# df.sub_by_window(size: 4, step: 2)
|
475
|
+
#
|
476
|
+
# # =>
|
477
|
+
# #<RedAmber::SubFrames : 0x000000000000fc58>
|
478
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
479
|
+
# 2 SubFrames: [4, 4] in sizes.
|
480
|
+
# ---
|
481
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc6c>
|
482
|
+
# x y z
|
483
|
+
# <uint8> <string> <boolean>
|
484
|
+
# 0 1 A false
|
485
|
+
# 1 2 A true
|
486
|
+
# 2 3 B false
|
487
|
+
# 3 4 B (nil)
|
488
|
+
# ---
|
489
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc80>
|
490
|
+
# x y z
|
491
|
+
# <uint8> <string> <boolean>
|
492
|
+
# 0 3 B false
|
493
|
+
# 1 4 B (nil)
|
494
|
+
# 2 5 B true
|
495
|
+
# 3 6 C false
|
496
|
+
#
|
497
|
+
# @since 0.4.0
|
498
|
+
#
|
499
|
+
def sub_by_window(from: 0, size: nil, step: 1)
|
500
|
+
SubFrames.new(self) do
|
501
|
+
from.step(by: step, to: (size() - size)).map do |i| # rubocop:disable Style/MethodCallWithoutArgsParentheses
|
502
|
+
[*i...(i + size)]
|
503
|
+
end
|
504
|
+
end
|
505
|
+
end
|
506
|
+
alias_method :subframes_by_window, :sub_by_window
|
507
|
+
|
508
|
+
# Create SubFrames by Grouping/Windowing by posion from a enumrator method.
|
509
|
+
#
|
510
|
+
# This method will process the indices of self by enumerator.
|
511
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
512
|
+
# @param enumerator_method [Symbol]
|
513
|
+
# Enumerator name.
|
514
|
+
# @param args [<Object>]
|
515
|
+
# arguments for the enumerator method.
|
516
|
+
# @return [SubFrames]
|
517
|
+
# a created SubFrames.
|
518
|
+
# @example Create a SubFrames object sliced by 3 rows.
|
519
|
+
# df.sub_by_enum(:each_slice, 3)
|
520
|
+
#
|
521
|
+
# # =>
|
522
|
+
# #<RedAmber::SubFrames : 0x000000000000fd20>
|
523
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
524
|
+
# 2 SubFrames: [3, 3] in sizes.
|
525
|
+
# ---
|
526
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd34>
|
527
|
+
# x y z
|
528
|
+
# <uint8> <string> <boolean>
|
529
|
+
# 0 1 A false
|
530
|
+
# 1 2 A true
|
531
|
+
# 2 3 B false
|
532
|
+
# ---
|
533
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd48>
|
534
|
+
# x y z
|
535
|
+
# <uint8> <string> <boolean>
|
536
|
+
# 0 4 B (nil)
|
537
|
+
# 1 5 B true
|
538
|
+
# 2 6 C false
|
539
|
+
#
|
540
|
+
# @example Create a SubFrames object for each consecutive 3 rows.
|
541
|
+
# df.sub_by_enum(:each_cons, 4)
|
542
|
+
#
|
543
|
+
# # =>
|
544
|
+
# #<RedAmber::SubFrames : 0x000000000000fd98>
|
545
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
546
|
+
# 3 SubFrames: [4, 4, 4] in sizes.
|
547
|
+
# ---
|
548
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdac>
|
549
|
+
# x y z
|
550
|
+
# <uint8> <string> <boolean>
|
551
|
+
# 0 1 A false
|
552
|
+
# 1 2 A true
|
553
|
+
# 2 3 B false
|
554
|
+
# 3 4 B (nil)
|
555
|
+
# ---
|
556
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdc0>
|
557
|
+
# x y z
|
558
|
+
# <uint8> <string> <boolean>
|
559
|
+
# 0 2 A true
|
560
|
+
# 1 3 B false
|
561
|
+
# 2 4 B (nil)
|
562
|
+
# 3 5 B true
|
563
|
+
# ---
|
564
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdd4>
|
565
|
+
# x y z
|
566
|
+
# <uint8> <string> <boolean>
|
567
|
+
# 0 3 B false
|
568
|
+
# 1 4 B (nil)
|
569
|
+
# 2 5 B true
|
570
|
+
# 3 6 C false
|
571
|
+
#
|
572
|
+
# @since 0.4.0
|
573
|
+
#
|
574
|
+
def sub_by_enum(enumerator_method, *args)
|
575
|
+
SubFrames.new(self, indices.send(enumerator_method, *args).to_a)
|
576
|
+
end
|
577
|
+
alias_method :subframes_by_enum, :sub_by_enum
|
578
|
+
|
579
|
+
# Create SubFrames by windowing with a kernel (i.e. masked window) and step.
|
580
|
+
#
|
581
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
582
|
+
# @param kernel [Array<true, false>, Vector]
|
583
|
+
# boolean array-like to pick records in the window.
|
584
|
+
# Kernel is a boolean Array and it behaves like a masked window.
|
585
|
+
# @param step [Integer]
|
586
|
+
# moving step of window.
|
587
|
+
# @return [SubFrames]
|
588
|
+
# a created SubFrames.
|
589
|
+
# @example
|
590
|
+
# kernel = [true, false, false, true]
|
591
|
+
# df.sub_by_kernel(kernel, step: 2)
|
592
|
+
#
|
593
|
+
# # =>
|
594
|
+
# #<RedAmber::SubFrames : 0x000000000000fde8>
|
595
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
596
|
+
# 2 SubFrames: [2, 2] in sizes.
|
597
|
+
# ---
|
598
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fdfc>
|
599
|
+
# x y z
|
600
|
+
# <uint8> <string> <boolean>
|
601
|
+
# 0 1 A false
|
602
|
+
# 1 4 B (nil)
|
603
|
+
# ---
|
604
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fe10>
|
605
|
+
# x y z
|
606
|
+
# <uint8> <string> <boolean>
|
607
|
+
# 0 3 B false
|
608
|
+
# 1 6 C false
|
609
|
+
#
|
610
|
+
# @since 0.4.0
|
611
|
+
#
|
612
|
+
def sub_by_kernel(kernel, step: 1)
|
613
|
+
limit_size = size - kernel.size
|
614
|
+
kernel_vector = Vector.new(kernel.concat([nil] * limit_size))
|
615
|
+
SubFrames.new(self) do
|
616
|
+
0.step(by: step, to: limit_size).map do |i|
|
617
|
+
kernel_vector.shift(i)
|
618
|
+
end
|
619
|
+
end
|
620
|
+
end
|
621
|
+
alias_method :subframes_by_kernel, :sub_by_kernel
|
622
|
+
|
623
|
+
# Generic builder of sub-dataframes from self.
|
624
|
+
#
|
625
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
626
|
+
# @overload build_subframes(subset_specifier)
|
627
|
+
# Create a new SubFrames object.
|
628
|
+
#
|
629
|
+
# @param subset_specifier [Array<Vector>, Array<array-like>]
|
630
|
+
# an Array of numeric indices or boolean filters
|
631
|
+
# to create subsets of DataFrame.
|
632
|
+
# @return [SubFrames]
|
633
|
+
# new SubFrames.
|
634
|
+
# @example
|
635
|
+
# df.build_subframes([[0, 2, 4], [1, 3, 5]])
|
636
|
+
#
|
637
|
+
# # =>
|
638
|
+
# #<RedAmber::SubFrames : 0x000000000000fe9c>
|
639
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
640
|
+
# 2 SubFrames: [3, 3] in sizes.
|
641
|
+
# ---
|
642
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000feb0>
|
643
|
+
# x y z
|
644
|
+
# <uint8> <string> <boolean>
|
645
|
+
# 0 1 A false
|
646
|
+
# 1 3 B false
|
647
|
+
# 2 5 B true
|
648
|
+
# ---
|
649
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fec4>
|
650
|
+
# x y z
|
651
|
+
# <uint8> <string> <boolean>
|
652
|
+
# 0 2 A true
|
653
|
+
# 1 4 B (nil)
|
654
|
+
# 2 6 C false
|
655
|
+
#
|
656
|
+
# @overload build_subframes
|
657
|
+
# Create a new SubFrames object by block.
|
658
|
+
#
|
659
|
+
# @yield [self]
|
660
|
+
# the block is called within the context of self.
|
661
|
+
# (Block is called by instance_eval(&block). )
|
662
|
+
# @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
|
663
|
+
# an Array of index or boolean array-likes to create subsets of DataFrame.
|
664
|
+
# All array-likes are responsible to #numeric? or #boolean?.
|
665
|
+
# @example
|
666
|
+
# dataframe.build_subframes do
|
667
|
+
# even = indices.map(&:even?)
|
668
|
+
# [even, !even]
|
669
|
+
# end
|
670
|
+
#
|
671
|
+
# # =>
|
672
|
+
# #<RedAmber::SubFrames : 0x000000000000fe60>
|
673
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
674
|
+
# 2 SubFrames: [3, 3] in sizes.
|
675
|
+
# ---
|
676
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe74>
|
677
|
+
# x y z
|
678
|
+
# <uint8> <string> <boolean>
|
679
|
+
# 0 1 A false
|
680
|
+
# 1 3 B false
|
681
|
+
# 2 5 B true
|
682
|
+
# ---
|
683
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe88>
|
684
|
+
# x y z
|
685
|
+
# <uint8> <string> <boolean>
|
686
|
+
# 0 2 A true
|
687
|
+
# 1 4 B (nil)
|
688
|
+
# 2 6 C false
|
689
|
+
#
|
690
|
+
# @since 0.4.0
|
691
|
+
#
|
692
|
+
def build_subframes(subset_specifier = nil, &block)
|
693
|
+
if block
|
694
|
+
SubFrames.new(self, instance_eval(&block))
|
695
|
+
else
|
696
|
+
SubFrames.new(self, subset_specifier)
|
697
|
+
end
|
698
|
+
end
|
699
|
+
|
700
|
+
# Catch variable (column) key as method name.
|
332
701
|
def method_missing(name, *args, &block)
|
333
|
-
return
|
702
|
+
return variables[name] if args.empty? && key?(name)
|
334
703
|
|
335
704
|
super
|
336
705
|
end
|
337
706
|
|
707
|
+
# Catch variable (column) key as method name.
|
338
708
|
def respond_to_missing?(name, include_private)
|
339
709
|
return true if key?(name)
|
340
710
|
|
@@ -346,15 +716,16 @@ module RedAmber
|
|
346
716
|
# initialize @variable, @keys, @vectors and return one of them
|
347
717
|
def init_instance_vars(var)
|
348
718
|
ary =
|
349
|
-
@table
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
719
|
+
@table
|
720
|
+
.columns
|
721
|
+
.each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
|
722
|
+
v = Vector.create(column.data)
|
723
|
+
k = column.name.to_sym
|
724
|
+
v.key = k
|
725
|
+
variables[k] = v
|
726
|
+
keys << k
|
727
|
+
vectors << v
|
728
|
+
end
|
358
729
|
|
359
730
|
@variables, @keys, @vectors = ary
|
360
731
|
ary[%i[variables keys vectors].index(var)]
|
@@ -369,11 +740,9 @@ module RedAmber
|
|
369
740
|
end
|
370
741
|
|
371
742
|
def name_unnamed_keys
|
372
|
-
return unless @table.key?('')
|
743
|
+
return unless @table.key?(:'')
|
373
744
|
|
374
|
-
|
375
|
-
keys = @table.schema.fields.map { |f| f.name.to_sym }
|
376
|
-
unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
|
745
|
+
unnamed = (:unnamed1..).find { |name| !@table.key?(name) }
|
377
746
|
fields =
|
378
747
|
@table.schema.fields.map do |field|
|
379
748
|
if field.name.empty?
|