red_amber 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +56 -22
- data/.yardopts +2 -0
- data/CHANGELOG.md +178 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +29 -30
- data/benchmark/basic.yml +7 -7
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -3
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +454 -85
- data/lib/red_amber/data_frame_combinable.rb +609 -115
- data/lib/red_amber/data_frame_displayable.rb +313 -34
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +623 -70
- data/lib/red_amber/data_frame_variable_operation.rb +452 -35
- data/lib/red_amber/group.rb +186 -22
- data/lib/red_amber/helper.rb +74 -14
- data/lib/red_amber/refinements.rb +26 -6
- data/lib/red_amber/subframes.rb +1101 -0
- data/lib/red_amber/vector.rb +362 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +506 -0
- data/lib/red_amber/vector_selectable.rb +265 -23
- data/lib/red_amber/vector_unary_element_wise.rb +529 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
data/lib/red_amber/data_frame.rb
CHANGED
@@ -4,7 +4,7 @@ module RedAmber
|
|
4
4
|
# Class to represent a data frame.
|
5
5
|
# Variable @table holds an Arrow::Table object.
|
6
6
|
class DataFrame
|
7
|
-
#
|
7
|
+
# Mix-in
|
8
8
|
include DataFrameCombinable
|
9
9
|
include DataFrameDisplayable
|
10
10
|
include DataFrameIndexable
|
@@ -17,26 +17,79 @@ module RedAmber
|
|
17
17
|
using RefineArrowTable
|
18
18
|
using RefineHash
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
20
|
+
class << self
|
21
|
+
# Quicker DataFrame constructor from a `Arrow::Table`.
|
22
|
+
#
|
23
|
+
# @param table [Arrow::Table]
|
24
|
+
# A table to have in the DataFrame.
|
25
|
+
# @return [DataFrame]
|
26
|
+
# Initialized DataFrame.
|
27
|
+
#
|
28
|
+
# @note This method will allocate table directly and may be used in the method.
|
29
|
+
# @note `table` must have unique keys.
|
30
|
+
#
|
31
|
+
def create(table)
|
32
|
+
instance = allocate
|
33
|
+
instance.instance_variable_set(:@table, table)
|
34
|
+
instance
|
35
|
+
end
|
36
|
+
|
37
|
+
# Return new DataFrame for specified schema and value.
|
38
|
+
#
|
39
|
+
# @param dataframe_for_schema [Dataframe]
|
40
|
+
# schema of this dataframe will be used.
|
41
|
+
# @param dataframe_for_value [DataFrame]
|
42
|
+
# column values of thes dataframe will be used.
|
43
|
+
# @return [DataFrame]
|
44
|
+
# created DataFrame.
|
45
|
+
# @since 0.4.1
|
46
|
+
#
|
47
|
+
def new_dataframe_with_schema(dataframe_for_schema, dataframe_for_value)
|
48
|
+
DataFrame.create(
|
49
|
+
Arrow::Table.new(dataframe_for_schema.table.schema,
|
50
|
+
dataframe_for_value.table.columns)
|
51
|
+
)
|
52
|
+
end
|
31
53
|
end
|
32
54
|
|
33
55
|
# Creates a new DataFrame.
|
34
56
|
#
|
57
|
+
# @overload initialize(hash)
|
58
|
+
# Initialize a DataFrame by a Hash.
|
59
|
+
#
|
60
|
+
# @param hash [Hash<key => <Array, Arrow::Array, #to_arrow_array>>]
|
61
|
+
# a Hash of `key` with array-like for column values.
|
62
|
+
# `key`s are Symbol or String.
|
63
|
+
# @example Initialize by a Hash
|
64
|
+
# hash = { x: [1, 2, 3], y: %w[A B C] }
|
65
|
+
# DataFrame.new(hash)
|
66
|
+
# @example Initialize by a Hash like arguments.
|
67
|
+
# DataFrame.new(x: [1, 2, 3], y: %w[A B C])
|
68
|
+
# @example Initialize from #to_arrow_array responsibles.
|
69
|
+
# # #to_arrow_array responsible `array-like` is also available.
|
70
|
+
# require 'arrow-numo-narray'
|
71
|
+
# DataFrame.new(numo: Numo::DFloat.new(3).rand)
|
72
|
+
#
|
35
73
|
# @overload initialize(table)
|
36
|
-
# Initialize DataFrame by an `Arrow::Table
|
74
|
+
# Initialize a DataFrame by an `Arrow::Table`.
|
37
75
|
#
|
38
76
|
# @param table [Arrow::Table]
|
39
|
-
#
|
77
|
+
# a table to have in the DataFrame.
|
78
|
+
# @example Initialize by a Table
|
79
|
+
# table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
|
80
|
+
# DataFrame.new(table)
|
81
|
+
#
|
82
|
+
# @overload initialize(schama, row_oriented_array)
|
83
|
+
# Initialize a DataFrame by schema and row_oriented_array.
|
84
|
+
#
|
85
|
+
# @param schema [Hash<key => type>]
|
86
|
+
# a schema of key and data type.
|
87
|
+
# @param row_oriented_array [Array]
|
88
|
+
# an Array of rows.
|
89
|
+
# @example Initialize by a schema and a row_oriented_array.
|
90
|
+
# schema = { x: :uint8, y: :string }
|
91
|
+
# row_oriented_array = [[1, 'A'], [2, 'B'], [3, 'C']]
|
92
|
+
# DataFrame.new(schema, row_oriented_array)
|
40
93
|
#
|
41
94
|
# @overload initialize(arrowable)
|
42
95
|
# Initialize DataFrame by a `#to_arrow` responsible object.
|
@@ -47,6 +100,11 @@ module RedAmber
|
|
47
100
|
#
|
48
101
|
# @note `RedAmber::DataFrame` itself is readable by this.
|
49
102
|
# @note Hash is refined to respond to `#to_arrow` in this class.
|
103
|
+
# @example Initialize by Red Dataset object.
|
104
|
+
# require 'datasets-arrow'
|
105
|
+
# dataset = Datasets::Penguins.new
|
106
|
+
# penguins = DataFrame.new(dataset)
|
107
|
+
# @since 0.2.2
|
50
108
|
#
|
51
109
|
# @overload initialize(rover_like)
|
52
110
|
# Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
|
@@ -60,20 +118,18 @@ module RedAmber
|
|
60
118
|
# @overload initialize()
|
61
119
|
# Create empty DataFrame
|
62
120
|
#
|
63
|
-
# @example
|
121
|
+
# @example
|
122
|
+
# DataFrame.new
|
64
123
|
#
|
65
124
|
# @overload initialize(empty)
|
66
125
|
# Create empty DataFrame
|
67
126
|
#
|
68
127
|
# @param empty [nil, [], {}]
|
69
128
|
#
|
70
|
-
# @example
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
74
|
-
# @param args [values]
|
75
|
-
# Accepts any argments which is valid for `Arrow::Table.new(args)`. See
|
76
|
-
# {https://github.com/apache/arrow/blob/master/ruby/red-arrow/lib/arrow/table.rb
|
129
|
+
# @example Return empty DataFrame.
|
130
|
+
# DataFrame.new([])
|
131
|
+
# DataFrame.new({})
|
132
|
+
# DataFrame.new(nil)
|
77
133
|
#
|
78
134
|
def initialize(*args)
|
79
135
|
case args
|
@@ -109,15 +165,16 @@ module RedAmber
|
|
109
165
|
|
110
166
|
# Returns the table having within.
|
111
167
|
#
|
112
|
-
# @return [Arrow::Table]
|
168
|
+
# @return [Arrow::Table]
|
169
|
+
# the table within.
|
113
170
|
#
|
114
171
|
attr_reader :table
|
115
|
-
|
116
172
|
alias_method :to_arrow, :table
|
117
173
|
|
118
|
-
# Returns the number of rows.
|
174
|
+
# Returns the number of records (rows).
|
119
175
|
#
|
120
|
-
# @return [Integer]
|
176
|
+
# @return [Integer]
|
177
|
+
# number of records (rows).
|
121
178
|
#
|
122
179
|
def size
|
123
180
|
@table.n_rows
|
@@ -126,9 +183,10 @@ module RedAmber
|
|
126
183
|
alias_method :n_obs, :size
|
127
184
|
alias_method :n_rows, :size
|
128
185
|
|
129
|
-
# Returns the number of columns.
|
186
|
+
# Returns the number of variables (columns).
|
130
187
|
#
|
131
|
-
# @return [Integer]
|
188
|
+
# @return [Integer]
|
189
|
+
# number of variables (columns).
|
132
190
|
#
|
133
191
|
def n_keys
|
134
192
|
@table.n_columns
|
@@ -140,7 +198,7 @@ module RedAmber
|
|
140
198
|
# Returns the numbers of rows and columns.
|
141
199
|
#
|
142
200
|
# @return [Array]
|
143
|
-
#
|
201
|
+
# number of rows and number of columns in an array.
|
144
202
|
# Same as [size, n_keys].
|
145
203
|
#
|
146
204
|
def shape
|
@@ -153,26 +211,27 @@ module RedAmber
|
|
153
211
|
# `key => Vector` pairs for each columns.
|
154
212
|
#
|
155
213
|
def variables
|
156
|
-
@variables
|
214
|
+
@variables ||= init_instance_vars(:variables)
|
157
215
|
end
|
158
216
|
alias_method :vars, :variables
|
159
217
|
|
160
218
|
# Returns an Array of keys.
|
161
219
|
#
|
162
220
|
# @return [Array]
|
163
|
-
#
|
221
|
+
# keys in an Array.
|
164
222
|
#
|
165
223
|
def keys
|
166
|
-
@keys
|
224
|
+
@keys ||= init_instance_vars(:keys)
|
167
225
|
end
|
168
226
|
alias_method :column_names, :keys
|
169
227
|
alias_method :var_names, :keys
|
170
228
|
|
171
229
|
# Returns true if self has a specified key in the argument.
|
172
230
|
#
|
173
|
-
# @param key [Symbol, String]
|
231
|
+
# @param key [Symbol, String]
|
232
|
+
# key to test.
|
174
233
|
# @return [Boolean]
|
175
|
-
#
|
234
|
+
# returns true if self has key in Symbol.
|
176
235
|
#
|
177
236
|
def key?(key)
|
178
237
|
keys.include?(key.to_sym)
|
@@ -181,9 +240,10 @@ module RedAmber
|
|
181
240
|
|
182
241
|
# Returns index of specified key in the Array keys.
|
183
242
|
#
|
184
|
-
# @param key [Symbol, String]
|
243
|
+
# @param key [Symbol, String]
|
244
|
+
# key to know.
|
185
245
|
# @return [Integer]
|
186
|
-
#
|
246
|
+
# index of key in the Array keys.
|
187
247
|
#
|
188
248
|
def key_index(key)
|
189
249
|
keys.find_index(key.to_sym)
|
@@ -194,10 +254,10 @@ module RedAmber
|
|
194
254
|
# Returns abbreviated type names in an Array.
|
195
255
|
#
|
196
256
|
# @return [Array]
|
197
|
-
#
|
257
|
+
# abbreviated Red Arrow data type names.
|
198
258
|
#
|
199
259
|
def types
|
200
|
-
@types
|
260
|
+
@types ||= @table.columns.map do |column|
|
201
261
|
column.data.value_type.nick.to_sym
|
202
262
|
end
|
203
263
|
end
|
@@ -205,43 +265,25 @@ module RedAmber
|
|
205
265
|
# Returns an Array of Classes of data type.
|
206
266
|
#
|
207
267
|
# @return [Array]
|
208
|
-
#
|
268
|
+
# an Array of Red Arrow data type Classes.
|
209
269
|
#
|
210
270
|
def type_classes
|
211
|
-
@
|
271
|
+
@type_classes ||= @table.columns.map { |column| column.data_type.class }
|
212
272
|
end
|
213
273
|
|
214
274
|
# Returns Vectors in an Array.
|
215
275
|
#
|
216
276
|
# @return [Array]
|
217
|
-
#
|
277
|
+
# an Array of Vector.
|
218
278
|
#
|
219
279
|
def vectors
|
220
|
-
@vectors
|
280
|
+
@vectors ||= init_instance_vars(:vectors)
|
221
281
|
end
|
222
282
|
|
223
|
-
# Returns row indices (start...(size+start)) in a Vector.
|
224
|
-
#
|
225
|
-
# @param start [Object]
|
226
|
-
# Object which have `#succ` method.
|
227
|
-
#
|
228
|
-
# @return [Array]
|
229
|
-
# A Vector of row indices.
|
230
|
-
#
|
231
|
-
# @example
|
232
|
-
# (when self.size == 5)
|
233
|
-
# - indices #=> Vector[0, 1, 2, 3, 4]
|
234
|
-
# - indices(1) #=> Vector[1, 2, 3, 4, 5]
|
235
|
-
# - indices('a') #=> Vector['a', 'b', 'c', 'd', 'e']
|
236
|
-
#
|
237
|
-
def indices(start = 0)
|
238
|
-
Vector.new((start..).take(size))
|
239
|
-
end
|
240
|
-
alias_method :indexes, :indices
|
241
|
-
|
242
283
|
# Returns column-oriented data in a Hash.
|
243
284
|
#
|
244
|
-
# @return [Hash]
|
285
|
+
# @return [Hash]
|
286
|
+
# a Hash of 'key => column_in_an_array'.
|
245
287
|
#
|
246
288
|
def to_h
|
247
289
|
variables.transform_values(&:to_a)
|
@@ -249,7 +291,8 @@ module RedAmber
|
|
249
291
|
|
250
292
|
# Returns a row-oriented array without header.
|
251
293
|
#
|
252
|
-
# @return [Array]
|
294
|
+
# @return [Array]
|
295
|
+
# row-oriented data without header.
|
253
296
|
#
|
254
297
|
# @note If you need column-oriented array, use `.to_h.to_a`.
|
255
298
|
#
|
@@ -260,7 +303,8 @@ module RedAmber
|
|
260
303
|
|
261
304
|
# Returns column name and data type in a Hash.
|
262
305
|
#
|
263
|
-
# @return [Hash]
|
306
|
+
# @return [Hash]
|
307
|
+
# column name and data type.
|
264
308
|
#
|
265
309
|
# @example
|
266
310
|
# RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
|
@@ -273,7 +317,7 @@ module RedAmber
|
|
273
317
|
# Compare DataFrames.
|
274
318
|
#
|
275
319
|
# @return [true, false]
|
276
|
-
#
|
320
|
+
# true if other is a DataFrame and table is same.
|
277
321
|
# Otherwise return false.
|
278
322
|
#
|
279
323
|
def ==(other)
|
@@ -282,7 +326,8 @@ module RedAmber
|
|
282
326
|
|
283
327
|
# Check if it is a empty DataFrame.
|
284
328
|
#
|
285
|
-
# @return [true, false
|
329
|
+
# @return [true, false
|
330
|
+
# ] true if it has no columns.
|
286
331
|
#
|
287
332
|
def empty?
|
288
333
|
variables.empty?
|
@@ -293,14 +338,18 @@ module RedAmber
|
|
293
338
|
# @overload each_row
|
294
339
|
# Returns Enumerator when no block given.
|
295
340
|
#
|
296
|
-
# @return [Enumerator]
|
341
|
+
# @return [Enumerator]
|
342
|
+
# enumerator of each rows.
|
297
343
|
#
|
298
344
|
# @overload each_row(&block)
|
299
345
|
# Yields with key and row pairs.
|
300
346
|
#
|
301
|
-
# @
|
302
|
-
#
|
303
|
-
# @yieldreturn [Integer]
|
347
|
+
# @yieldparam key_row_pairs [Hash]
|
348
|
+
# key and row pairs.
|
349
|
+
# @yieldreturn [Integer]
|
350
|
+
# size of the DataFrame.
|
351
|
+
# @return [Integer]
|
352
|
+
# returns size.
|
304
353
|
#
|
305
354
|
def each_row
|
306
355
|
return enum_for(:each_row) unless block_given?
|
@@ -316,25 +365,346 @@ module RedAmber
|
|
316
365
|
|
317
366
|
# Returns self in a `Rover::DataFrame`.
|
318
367
|
#
|
319
|
-
# @return [Rover::DataFrame]
|
368
|
+
# @return [Rover::DataFrame]
|
369
|
+
# a `Rover::DataFrame`.
|
320
370
|
#
|
321
371
|
def to_rover
|
322
372
|
require 'rover'
|
323
373
|
Rover::DataFrame.new(to_h)
|
324
374
|
end
|
325
375
|
|
376
|
+
# Create a Group object. Or create a Group and summarize it.
|
377
|
+
#
|
378
|
+
# @overload group(*group_keys)
|
379
|
+
# Create a Group object.
|
380
|
+
#
|
381
|
+
# @param group_keys [Array<Symbol, String>]
|
382
|
+
# keys for grouping.
|
383
|
+
# @return [Group]
|
384
|
+
# Group object.
|
385
|
+
# @example Create a Group
|
386
|
+
# penguins.group(:species)
|
387
|
+
#
|
388
|
+
# # =>
|
389
|
+
# #<RedAmber::Group : 0x000000000000c3c8>
|
390
|
+
# species group_count
|
391
|
+
# <string> <uint8>
|
392
|
+
# 0 Adelie 152
|
393
|
+
# 1 Chinstrap 68
|
394
|
+
# 2 Gentoo 124
|
395
|
+
#
|
396
|
+
# @overload group(*group_keys)
|
397
|
+
# Create a Group and summarize it by aggregation functions from the block.
|
398
|
+
#
|
399
|
+
# @yieldparam group [Group]
|
400
|
+
# passes Group object.
|
401
|
+
# @yieldreturn [DataFrame, Array<DataFrame>]
|
402
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
403
|
+
# @return [DataFrame]
|
404
|
+
# summarized DataFrame.
|
405
|
+
# @example Create a group and summarize it.
|
406
|
+
# penguins.group(:species) { mean(:bill_length_mm) }
|
407
|
+
#
|
408
|
+
# # =>
|
409
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f3fc>
|
410
|
+
# species mean(bill_length_mm)
|
411
|
+
# <string> <double>
|
412
|
+
# 0 Adelie 38.79
|
413
|
+
# 1 Chinstrap 48.83
|
414
|
+
# 2 Gentoo 47.5
|
415
|
+
#
|
326
416
|
def group(*group_keys, &block)
|
327
417
|
g = Group.new(self, group_keys)
|
328
418
|
g = g.summarize(&block) if block
|
329
419
|
g
|
330
420
|
end
|
331
421
|
|
422
|
+
# Create SubFrames by value grouping.
|
423
|
+
#
|
424
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
425
|
+
# @param keys [Symbol, String, Array<Symbol, String>]
|
426
|
+
# grouping keys.
|
427
|
+
# @return [SubFrames]
|
428
|
+
# a created SubFrames grouped by column values on `keys`.
|
429
|
+
# @example
|
430
|
+
# df.sub_by_value(keys: :y)
|
431
|
+
#
|
432
|
+
# # =>
|
433
|
+
# #<RedAmber::SubFrames : 0x000000000000fc08>
|
434
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
435
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
436
|
+
# ---
|
437
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>
|
438
|
+
# x y z
|
439
|
+
# <uint8> <string> <boolean>
|
440
|
+
# 0 1 A false
|
441
|
+
# 1 2 A true
|
442
|
+
# ---
|
443
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fc30>
|
444
|
+
# x y z
|
445
|
+
# <uint8> <string> <boolean>
|
446
|
+
# 0 3 B false
|
447
|
+
# 1 4 B (nil)
|
448
|
+
# 2 5 B true
|
449
|
+
# ---
|
450
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>
|
451
|
+
# x y z
|
452
|
+
# <uint8> <string> <boolean>
|
453
|
+
# 0 6 C false
|
454
|
+
#
|
455
|
+
# @since 0.4.0
|
456
|
+
#
|
457
|
+
def sub_by_value(keys: nil)
|
458
|
+
SubFrames.new(self, group(keys).filters)
|
459
|
+
end
|
460
|
+
alias_method :subframes_by_value, :sub_by_value
|
461
|
+
|
462
|
+
# Create SubFrames by Windowing with `from`, `size` and `step`.
|
463
|
+
#
|
464
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
465
|
+
# @param from [Integer]
|
466
|
+
# start position of window.
|
467
|
+
# @param size [Integer]
|
468
|
+
# window size.
|
469
|
+
# @param step [Integer]
|
470
|
+
# moving step of window.
|
471
|
+
# @return [SubFrames]
|
472
|
+
# a created SubFrames.
|
473
|
+
# @example
|
474
|
+
# df.sub_by_window(size: 4, step: 2)
|
475
|
+
#
|
476
|
+
# # =>
|
477
|
+
# #<RedAmber::SubFrames : 0x000000000000fc58>
|
478
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
479
|
+
# 2 SubFrames: [4, 4] in sizes.
|
480
|
+
# ---
|
481
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc6c>
|
482
|
+
# x y z
|
483
|
+
# <uint8> <string> <boolean>
|
484
|
+
# 0 1 A false
|
485
|
+
# 1 2 A true
|
486
|
+
# 2 3 B false
|
487
|
+
# 3 4 B (nil)
|
488
|
+
# ---
|
489
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc80>
|
490
|
+
# x y z
|
491
|
+
# <uint8> <string> <boolean>
|
492
|
+
# 0 3 B false
|
493
|
+
# 1 4 B (nil)
|
494
|
+
# 2 5 B true
|
495
|
+
# 3 6 C false
|
496
|
+
#
|
497
|
+
# @since 0.4.0
|
498
|
+
#
|
499
|
+
def sub_by_window(from: 0, size: nil, step: 1)
|
500
|
+
SubFrames.new(self) do
|
501
|
+
from.step(by: step, to: (size() - size)).map do |i| # rubocop:disable Style/MethodCallWithoutArgsParentheses
|
502
|
+
[*i...(i + size)]
|
503
|
+
end
|
504
|
+
end
|
505
|
+
end
|
506
|
+
alias_method :subframes_by_window, :sub_by_window
|
507
|
+
|
508
|
+
# Create SubFrames by Grouping/Windowing by posion from a enumrator method.
|
509
|
+
#
|
510
|
+
# This method will process the indices of self by enumerator.
|
511
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
512
|
+
# @param enumerator_method [Symbol]
|
513
|
+
# Enumerator name.
|
514
|
+
# @param args [<Object>]
|
515
|
+
# arguments for the enumerator method.
|
516
|
+
# @return [SubFrames]
|
517
|
+
# a created SubFrames.
|
518
|
+
# @example Create a SubFrames object sliced by 3 rows.
|
519
|
+
# df.sub_by_enum(:each_slice, 3)
|
520
|
+
#
|
521
|
+
# # =>
|
522
|
+
# #<RedAmber::SubFrames : 0x000000000000fd20>
|
523
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
524
|
+
# 2 SubFrames: [3, 3] in sizes.
|
525
|
+
# ---
|
526
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd34>
|
527
|
+
# x y z
|
528
|
+
# <uint8> <string> <boolean>
|
529
|
+
# 0 1 A false
|
530
|
+
# 1 2 A true
|
531
|
+
# 2 3 B false
|
532
|
+
# ---
|
533
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd48>
|
534
|
+
# x y z
|
535
|
+
# <uint8> <string> <boolean>
|
536
|
+
# 0 4 B (nil)
|
537
|
+
# 1 5 B true
|
538
|
+
# 2 6 C false
|
539
|
+
#
|
540
|
+
# @example Create a SubFrames object for each consecutive 3 rows.
|
541
|
+
# df.sub_by_enum(:each_cons, 4)
|
542
|
+
#
|
543
|
+
# # =>
|
544
|
+
# #<RedAmber::SubFrames : 0x000000000000fd98>
|
545
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
546
|
+
# 3 SubFrames: [4, 4, 4] in sizes.
|
547
|
+
# ---
|
548
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdac>
|
549
|
+
# x y z
|
550
|
+
# <uint8> <string> <boolean>
|
551
|
+
# 0 1 A false
|
552
|
+
# 1 2 A true
|
553
|
+
# 2 3 B false
|
554
|
+
# 3 4 B (nil)
|
555
|
+
# ---
|
556
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdc0>
|
557
|
+
# x y z
|
558
|
+
# <uint8> <string> <boolean>
|
559
|
+
# 0 2 A true
|
560
|
+
# 1 3 B false
|
561
|
+
# 2 4 B (nil)
|
562
|
+
# 3 5 B true
|
563
|
+
# ---
|
564
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdd4>
|
565
|
+
# x y z
|
566
|
+
# <uint8> <string> <boolean>
|
567
|
+
# 0 3 B false
|
568
|
+
# 1 4 B (nil)
|
569
|
+
# 2 5 B true
|
570
|
+
# 3 6 C false
|
571
|
+
#
|
572
|
+
# @since 0.4.0
|
573
|
+
#
|
574
|
+
def sub_by_enum(enumerator_method, *args)
|
575
|
+
SubFrames.new(self, indices.send(enumerator_method, *args).to_a)
|
576
|
+
end
|
577
|
+
alias_method :subframes_by_enum, :sub_by_enum
|
578
|
+
|
579
|
+
# Create SubFrames by windowing with a kernel (i.e. masked window) and step.
|
580
|
+
#
|
581
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
582
|
+
# @param kernel [Array<true, false>, Vector]
|
583
|
+
# boolean array-like to pick records in the window.
|
584
|
+
# Kernel is a boolean Array and it behaves like a masked window.
|
585
|
+
# @param step [Integer]
|
586
|
+
# moving step of window.
|
587
|
+
# @return [SubFrames]
|
588
|
+
# a created SubFrames.
|
589
|
+
# @example
|
590
|
+
# kernel = [true, false, false, true]
|
591
|
+
# df.sub_by_kernel(kernel, step: 2)
|
592
|
+
#
|
593
|
+
# # =>
|
594
|
+
# #<RedAmber::SubFrames : 0x000000000000fde8>
|
595
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
596
|
+
# 2 SubFrames: [2, 2] in sizes.
|
597
|
+
# ---
|
598
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fdfc>
|
599
|
+
# x y z
|
600
|
+
# <uint8> <string> <boolean>
|
601
|
+
# 0 1 A false
|
602
|
+
# 1 4 B (nil)
|
603
|
+
# ---
|
604
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fe10>
|
605
|
+
# x y z
|
606
|
+
# <uint8> <string> <boolean>
|
607
|
+
# 0 3 B false
|
608
|
+
# 1 6 C false
|
609
|
+
#
|
610
|
+
# @since 0.4.0
|
611
|
+
#
|
612
|
+
def sub_by_kernel(kernel, step: 1)
|
613
|
+
limit_size = size - kernel.size
|
614
|
+
kernel_vector = Vector.new(kernel.concat([nil] * limit_size))
|
615
|
+
SubFrames.new(self) do
|
616
|
+
0.step(by: step, to: limit_size).map do |i|
|
617
|
+
kernel_vector.shift(i)
|
618
|
+
end
|
619
|
+
end
|
620
|
+
end
|
621
|
+
alias_method :subframes_by_kernel, :sub_by_kernel
|
622
|
+
|
623
|
+
# Generic builder of sub-dataframes from self.
|
624
|
+
#
|
625
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
626
|
+
# @overload build_subframes(subset_specifier)
|
627
|
+
# Create a new SubFrames object.
|
628
|
+
#
|
629
|
+
# @param subset_specifier [Array<Vector>, Array<array-like>]
|
630
|
+
# an Array of numeric indices or boolean filters
|
631
|
+
# to create subsets of DataFrame.
|
632
|
+
# @return [SubFrames]
|
633
|
+
# new SubFrames.
|
634
|
+
# @example
|
635
|
+
# df.build_subframes([[0, 2, 4], [1, 3, 5]])
|
636
|
+
#
|
637
|
+
# # =>
|
638
|
+
# #<RedAmber::SubFrames : 0x000000000000fe9c>
|
639
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
640
|
+
# 2 SubFrames: [3, 3] in sizes.
|
641
|
+
# ---
|
642
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000feb0>
|
643
|
+
# x y z
|
644
|
+
# <uint8> <string> <boolean>
|
645
|
+
# 0 1 A false
|
646
|
+
# 1 3 B false
|
647
|
+
# 2 5 B true
|
648
|
+
# ---
|
649
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fec4>
|
650
|
+
# x y z
|
651
|
+
# <uint8> <string> <boolean>
|
652
|
+
# 0 2 A true
|
653
|
+
# 1 4 B (nil)
|
654
|
+
# 2 6 C false
|
655
|
+
#
|
656
|
+
# @overload build_subframes
|
657
|
+
# Create a new SubFrames object by block.
|
658
|
+
#
|
659
|
+
# @yield [self]
|
660
|
+
# the block is called within the context of self.
|
661
|
+
# (Block is called by instance_eval(&block). )
|
662
|
+
# @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
|
663
|
+
# an Array of index or boolean array-likes to create subsets of DataFrame.
|
664
|
+
# All array-likes are responsible to #numeric? or #boolean?.
|
665
|
+
# @example
|
666
|
+
# dataframe.build_subframes do
|
667
|
+
# even = indices.map(&:even?)
|
668
|
+
# [even, !even]
|
669
|
+
# end
|
670
|
+
#
|
671
|
+
# # =>
|
672
|
+
# #<RedAmber::SubFrames : 0x000000000000fe60>
|
673
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
674
|
+
# 2 SubFrames: [3, 3] in sizes.
|
675
|
+
# ---
|
676
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe74>
|
677
|
+
# x y z
|
678
|
+
# <uint8> <string> <boolean>
|
679
|
+
# 0 1 A false
|
680
|
+
# 1 3 B false
|
681
|
+
# 2 5 B true
|
682
|
+
# ---
|
683
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe88>
|
684
|
+
# x y z
|
685
|
+
# <uint8> <string> <boolean>
|
686
|
+
# 0 2 A true
|
687
|
+
# 1 4 B (nil)
|
688
|
+
# 2 6 C false
|
689
|
+
#
|
690
|
+
# @since 0.4.0
|
691
|
+
#
|
692
|
+
def build_subframes(subset_specifier = nil, &block)
|
693
|
+
if block
|
694
|
+
SubFrames.new(self, instance_eval(&block))
|
695
|
+
else
|
696
|
+
SubFrames.new(self, subset_specifier)
|
697
|
+
end
|
698
|
+
end
|
699
|
+
|
700
|
+
# Catch variable (column) key as method name.
|
332
701
|
def method_missing(name, *args, &block)
|
333
|
-
return
|
702
|
+
return variables[name] if args.empty? && key?(name)
|
334
703
|
|
335
704
|
super
|
336
705
|
end
|
337
706
|
|
707
|
+
# Catch variable (column) key as method name.
|
338
708
|
def respond_to_missing?(name, include_private)
|
339
709
|
return true if key?(name)
|
340
710
|
|
@@ -346,15 +716,16 @@ module RedAmber
|
|
346
716
|
# initialize @variable, @keys, @vectors and return one of them
|
347
717
|
def init_instance_vars(var)
|
348
718
|
ary =
|
349
|
-
@table
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
719
|
+
@table
|
720
|
+
.columns
|
721
|
+
.each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
|
722
|
+
v = Vector.create(column.data)
|
723
|
+
k = column.name.to_sym
|
724
|
+
v.key = k
|
725
|
+
variables[k] = v
|
726
|
+
keys << k
|
727
|
+
vectors << v
|
728
|
+
end
|
358
729
|
|
359
730
|
@variables, @keys, @vectors = ary
|
360
731
|
ary[%i[variables keys vectors].index(var)]
|
@@ -369,11 +740,9 @@ module RedAmber
|
|
369
740
|
end
|
370
741
|
|
371
742
|
def name_unnamed_keys
|
372
|
-
return unless @table.key?('')
|
743
|
+
return unless @table.key?(:'')
|
373
744
|
|
374
|
-
|
375
|
-
keys = @table.schema.fields.map { |f| f.name.to_sym }
|
376
|
-
unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
|
745
|
+
unnamed = (:unnamed1..).find { |name| !@table.key?(name) }
|
377
746
|
fields =
|
378
747
|
@table.schema.fields.map do |field|
|
379
748
|
if field.name.empty?
|