red_amber 0.2.3 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +133 -51
- data/.yardopts +2 -0
- data/CHANGELOG.md +203 -1
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +61 -45
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +63 -0
- data/doc/DataFrame.md +35 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +295 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +537 -68
- data/lib/red_amber/data_frame_combinable.rb +776 -123
- data/lib/red_amber/data_frame_displayable.rb +248 -18
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +81 -10
- data/lib/red_amber/data_frame_reshaping.rb +216 -21
- data/lib/red_amber/data_frame_selectable.rb +781 -120
- data/lib/red_amber/data_frame_variable_operation.rb +561 -85
- data/lib/red_amber/group.rb +195 -21
- data/lib/red_amber/helper.rb +114 -32
- data/lib/red_amber/refinements.rb +206 -0
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +435 -58
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +321 -69
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +397 -24
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +15 -1
- data/red_amber.gemspec +4 -3
- metadata +19 -11
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -294
data/lib/red_amber/data_frame.rb
CHANGED
@@ -4,7 +4,7 @@ module RedAmber
|
|
4
4
|
# Class to represent a data frame.
|
5
5
|
# Variable @table holds an Arrow::Table object.
|
6
6
|
class DataFrame
|
7
|
-
#
|
7
|
+
# Mix-in
|
8
8
|
include DataFrameCombinable
|
9
9
|
include DataFrameDisplayable
|
10
10
|
include DataFrameIndexable
|
@@ -14,65 +14,151 @@ module RedAmber
|
|
14
14
|
include DataFrameVariableOperation
|
15
15
|
include Helper
|
16
16
|
|
17
|
-
|
17
|
+
using RefineArrowTable
|
18
|
+
using RefineHash
|
19
|
+
|
20
|
+
class << self
|
21
|
+
# Quicker DataFrame constructor from a `Arrow::Table`.
|
22
|
+
#
|
23
|
+
# @param table [Arrow::Table]
|
24
|
+
# A table to have in the DataFrame.
|
25
|
+
# @return [DataFrame]
|
26
|
+
# Initialized DataFrame.
|
27
|
+
#
|
28
|
+
# @note This method will allocate table directly and may be used in the method.
|
29
|
+
# @note `table` must have unique keys.
|
30
|
+
#
|
31
|
+
def create(table)
|
32
|
+
instance = allocate
|
33
|
+
instance.instance_variable_set(:@table, table)
|
34
|
+
instance
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Creates a new DataFrame.
|
18
39
|
#
|
19
40
|
# @overload initialize(hash)
|
41
|
+
# Initialize a DataFrame by a Hash.
|
20
42
|
#
|
21
|
-
# @
|
43
|
+
# @param hash [Hash<key => <Array, Arrow::Array, #to_arrow_array>>]
|
44
|
+
# a Hash of `key` with array-like for column values.
|
45
|
+
# `key`s are Symbol or String.
|
46
|
+
# @example Initialize by a Hash
|
47
|
+
# hash = { x: [1, 2, 3], y: %w[A B C] }
|
48
|
+
# DataFrame.new(hash)
|
49
|
+
# @example Initialize by a Hash like arguments.
|
50
|
+
# DataFrame.new(x: [1, 2, 3], y: %w[A B C])
|
51
|
+
# @example Initialize from #to_arrow_array responsibles.
|
52
|
+
# # #to_arrow_array responsible `array-like` is also available.
|
53
|
+
# require 'arrow-numo-narray'
|
54
|
+
# DataFrame.new(numo: Numo::DFloat.new(3).rand)
|
22
55
|
#
|
23
56
|
# @overload initialize(table)
|
57
|
+
# Initialize a DataFrame by an `Arrow::Table`.
|
58
|
+
#
|
59
|
+
# @param table [Arrow::Table]
|
60
|
+
# a table to have in the DataFrame.
|
61
|
+
# @example Initialize by a Table
|
62
|
+
# table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
|
63
|
+
# DataFrame.new(table)
|
64
|
+
#
|
65
|
+
# @overload initialize(schama, row_oriented_array)
|
66
|
+
# Initialize a DataFrame by schema and row_oriented_array.
|
67
|
+
#
|
68
|
+
# @param schema [Hash<key => type>]
|
69
|
+
# a schema of key and data type.
|
70
|
+
# @param row_oriented_array [Array]
|
71
|
+
# an Array of rows.
|
72
|
+
# @example Initialize by a schema and a row_oriented_array.
|
73
|
+
# schema = { x: :uint8, y: :string }
|
74
|
+
# row_oriented_array = [[1, 'A'], [2, 'B'], [3, 'C']]
|
75
|
+
# DataFrame.new(schema, row_oriented_array)
|
76
|
+
#
|
77
|
+
# @overload initialize(arrowable)
|
78
|
+
# Initialize DataFrame by a `#to_arrow` responsible object.
|
79
|
+
#
|
80
|
+
# @param arrowable [#to_arrow]
|
81
|
+
# Any object which responds to `#to_arrow`.
|
82
|
+
# `#to_arrow` must return `Arrow::Table`.
|
24
83
|
#
|
25
|
-
# @
|
84
|
+
# @note `RedAmber::DataFrame` itself is readable by this.
|
85
|
+
# @note Hash is refined to respond to `#to_arrow` in this class.
|
86
|
+
# @example Initialize by Red Dataset object.
|
87
|
+
# require 'datasets-arrow'
|
88
|
+
# dataset = Datasets::Penguins.new
|
89
|
+
# penguins = DataFrame.new(dataset)
|
90
|
+
# @since 0.2.2
|
26
91
|
#
|
27
|
-
# @overload initialize(
|
92
|
+
# @overload initialize(rover_like)
|
93
|
+
# Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
|
28
94
|
#
|
29
|
-
# @
|
95
|
+
# @param rover_like [#to_h]
|
96
|
+
# Any object which responds to `#to_h`.
|
97
|
+
# `#to_h` must return a Hash which is convertable by `Arrow::Table.new`.
|
30
98
|
#
|
31
|
-
#
|
99
|
+
# @note `Rover::DataFrame` is readable by this.
|
32
100
|
#
|
33
|
-
#
|
101
|
+
# @overload initialize()
|
102
|
+
# Create empty DataFrame
|
103
|
+
#
|
104
|
+
# @example
|
105
|
+
# DataFrame.new
|
106
|
+
#
|
107
|
+
# @overload initialize(empty)
|
108
|
+
# Create empty DataFrame
|
109
|
+
#
|
110
|
+
# @param empty [nil, [], {}]
|
111
|
+
#
|
112
|
+
# @example Return empty DataFrame.
|
113
|
+
# DataFrame.new([])
|
114
|
+
# DataFrame.new({})
|
115
|
+
# DataFrame.new(nil)
|
34
116
|
#
|
35
117
|
def initialize(*args)
|
36
|
-
@variables = @keys = @vectors = @types = @data_types = nil
|
37
118
|
case args
|
38
119
|
in nil | [nil] | [] | {} | [[]] | [{}]
|
39
|
-
# DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
|
40
|
-
# returns empty DataFrame
|
41
120
|
@table = Arrow::Table.new({}, [])
|
42
|
-
in [
|
121
|
+
in [Arrow::Table => table]
|
122
|
+
@table = table
|
123
|
+
in [arrowable] if arrowable.respond_to?(:to_arrow)
|
43
124
|
table = arrowable.to_arrow
|
44
125
|
unless table.is_a?(Arrow::Table)
|
45
126
|
raise DataFrameTypeError,
|
46
127
|
"to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
|
47
128
|
end
|
48
129
|
@table = table
|
49
|
-
in [
|
50
|
-
@table = table
|
51
|
-
in [rover_or_hash]
|
130
|
+
in [rover_like] if rover_like.respond_to?(:to_h)
|
52
131
|
begin
|
53
|
-
# Accepts Rover::DataFrame
|
54
|
-
@table = Arrow::Table.new(
|
132
|
+
# Accepts Rover::DataFrame
|
133
|
+
@table = Arrow::Table.new(rover_like.to_h)
|
55
134
|
rescue StandardError
|
56
|
-
raise DataFrameTypeError, "
|
135
|
+
raise DataFrameTypeError, "to_h must return Arrowable object: #{rover_like}"
|
57
136
|
end
|
58
137
|
else
|
59
|
-
|
138
|
+
begin
|
139
|
+
@table = Arrow::Table.new(*args)
|
140
|
+
rescue StandardError
|
141
|
+
raise DataFrameTypeError, "invalid argument to create Arrow::Table: #{args}"
|
142
|
+
end
|
60
143
|
end
|
61
|
-
name_unnamed_keys
|
62
144
|
|
63
|
-
|
64
|
-
|
145
|
+
name_unnamed_keys
|
146
|
+
check_duplicate_keys(keys)
|
65
147
|
end
|
66
148
|
|
149
|
+
# Returns the table having within.
|
150
|
+
#
|
151
|
+
# @return [Arrow::Table]
|
152
|
+
# the table within.
|
153
|
+
#
|
67
154
|
attr_reader :table
|
155
|
+
alias_method :to_arrow, :table
|
68
156
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
# Returns the number of rows.
|
157
|
+
# Returns the number of records (rows).
|
158
|
+
#
|
159
|
+
# @return [Integer]
|
160
|
+
# number of records (rows).
|
74
161
|
#
|
75
|
-
# @return [Integer] Number of rows.
|
76
162
|
def size
|
77
163
|
@table.n_rows
|
78
164
|
end
|
@@ -80,9 +166,11 @@ module RedAmber
|
|
80
166
|
alias_method :n_obs, :size
|
81
167
|
alias_method :n_rows, :size
|
82
168
|
|
83
|
-
# Returns the number of columns.
|
169
|
+
# Returns the number of variables (columns).
|
170
|
+
#
|
171
|
+
# @return [Integer]
|
172
|
+
# number of variables (columns).
|
84
173
|
#
|
85
|
-
# @return [Integer] Number of columns.
|
86
174
|
def n_keys
|
87
175
|
@table.n_columns
|
88
176
|
end
|
@@ -93,8 +181,9 @@ module RedAmber
|
|
93
181
|
# Returns the numbers of rows and columns.
|
94
182
|
#
|
95
183
|
# @return [Array]
|
96
|
-
#
|
184
|
+
# number of rows and number of columns in an array.
|
97
185
|
# Same as [size, n_keys].
|
186
|
+
#
|
98
187
|
def shape
|
99
188
|
[size, n_keys]
|
100
189
|
end
|
@@ -102,7 +191,8 @@ module RedAmber
|
|
102
191
|
# Returns a Hash of key and Vector pairs in the columns.
|
103
192
|
#
|
104
193
|
# @return [Hash]
|
105
|
-
# key => Vector pairs for each columns.
|
194
|
+
# `key => Vector` pairs for each columns.
|
195
|
+
#
|
106
196
|
def variables
|
107
197
|
@variables || @variables = init_instance_vars(:variables)
|
108
198
|
end
|
@@ -111,7 +201,8 @@ module RedAmber
|
|
111
201
|
# Returns an Array of keys.
|
112
202
|
#
|
113
203
|
# @return [Array]
|
114
|
-
#
|
204
|
+
# keys in an Array.
|
205
|
+
#
|
115
206
|
def keys
|
116
207
|
@keys || @keys = init_instance_vars(:keys)
|
117
208
|
end
|
@@ -120,9 +211,11 @@ module RedAmber
|
|
120
211
|
|
121
212
|
# Returns true if self has a specified key in the argument.
|
122
213
|
#
|
123
|
-
# @param key [Symbol, String]
|
214
|
+
# @param key [Symbol, String]
|
215
|
+
# key to test.
|
124
216
|
# @return [Boolean]
|
125
|
-
#
|
217
|
+
# returns true if self has key in Symbol.
|
218
|
+
#
|
126
219
|
def key?(key)
|
127
220
|
keys.include?(key.to_sym)
|
128
221
|
end
|
@@ -130,9 +223,11 @@ module RedAmber
|
|
130
223
|
|
131
224
|
# Returns index of specified key in the Array keys.
|
132
225
|
#
|
133
|
-
# @param key [Symbol, String]
|
226
|
+
# @param key [Symbol, String]
|
227
|
+
# key to know.
|
134
228
|
# @return [Integer]
|
135
|
-
#
|
229
|
+
# index of key in the Array keys.
|
230
|
+
#
|
136
231
|
def key_index(key)
|
137
232
|
keys.find_index(key.to_sym)
|
138
233
|
end
|
@@ -142,15 +237,19 @@ module RedAmber
|
|
142
237
|
# Returns abbreviated type names in an Array.
|
143
238
|
#
|
144
239
|
# @return [Array]
|
145
|
-
#
|
240
|
+
# abbreviated Red Arrow data type names.
|
241
|
+
#
|
146
242
|
def types
|
147
|
-
@types || @types = @table.columns.map
|
243
|
+
@types || @types = @table.columns.map do |column|
|
244
|
+
column.data.value_type.nick.to_sym
|
245
|
+
end
|
148
246
|
end
|
149
247
|
|
150
248
|
# Returns an Array of Classes of data type.
|
151
249
|
#
|
152
250
|
# @return [Array]
|
153
|
-
#
|
251
|
+
# an Array of Red Arrow data type Classes.
|
252
|
+
#
|
154
253
|
def type_classes
|
155
254
|
@data_types || @data_types = @table.columns.map { |column| column.data_type.class }
|
156
255
|
end
|
@@ -158,50 +257,83 @@ module RedAmber
|
|
158
257
|
# Returns Vectors in an Array.
|
159
258
|
#
|
160
259
|
# @return [Array]
|
161
|
-
#
|
260
|
+
# an Array of Vector.
|
261
|
+
#
|
162
262
|
def vectors
|
163
263
|
@vectors || @vectors = init_instance_vars(:vectors)
|
164
264
|
end
|
165
265
|
|
166
|
-
# Returns
|
266
|
+
# Returns column-oriented data in a Hash.
|
267
|
+
#
|
268
|
+
# @return [Hash]
|
269
|
+
# a Hash of 'key => column_in_an_array'.
|
167
270
|
#
|
168
|
-
# @param start [Object]
|
169
|
-
# Object which have #succ method.
|
170
|
-
# @return [Array]
|
171
|
-
# An Array of indices of the row.
|
172
|
-
# @example
|
173
|
-
# (when self.size == 5)
|
174
|
-
# - indices #=> [0, 1, 2, 3, 4]
|
175
|
-
# - indices(1) #=> [1, 2, 3, 4, 5]
|
176
|
-
# - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
|
177
|
-
def indices(start = 0)
|
178
|
-
Vector.new((start..).take(size))
|
179
|
-
end
|
180
|
-
alias_method :indexes, :indices
|
181
|
-
|
182
271
|
def to_h
|
183
272
|
variables.transform_values(&:to_a)
|
184
273
|
end
|
185
274
|
|
275
|
+
# Returns a row-oriented array without header.
|
276
|
+
#
|
277
|
+
# @return [Array]
|
278
|
+
# row-oriented data without header.
|
279
|
+
#
|
280
|
+
# @note If you need column-oriented array, use `.to_h.to_a`.
|
281
|
+
#
|
186
282
|
def to_a
|
187
|
-
# output an array of row-oriented data without header
|
188
|
-
# if you need column-oriented array, use `.to_h.to_a`
|
189
283
|
@table.raw_records
|
190
284
|
end
|
191
285
|
alias_method :raw_records, :to_a
|
192
286
|
|
287
|
+
# Returns column name and data type in a Hash.
|
288
|
+
#
|
289
|
+
# @return [Hash]
|
290
|
+
# column name and data type.
|
291
|
+
#
|
292
|
+
# @example
|
293
|
+
# RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
|
294
|
+
# # => {:x=>:uint8, :y=>:string}
|
295
|
+
#
|
193
296
|
def schema
|
194
297
|
keys.zip(types).to_h
|
195
298
|
end
|
196
299
|
|
300
|
+
# Compare DataFrames.
|
301
|
+
#
|
302
|
+
# @return [true, false]
|
303
|
+
# true if other is a DataFrame and table is same.
|
304
|
+
# Otherwise return false.
|
305
|
+
#
|
197
306
|
def ==(other)
|
198
307
|
other.is_a?(DataFrame) && @table == other.table
|
199
308
|
end
|
200
309
|
|
310
|
+
# Check if it is a empty DataFrame.
|
311
|
+
#
|
312
|
+
# @return [true, false
|
313
|
+
# ] true if it has no columns.
|
314
|
+
#
|
201
315
|
def empty?
|
202
316
|
variables.empty?
|
203
317
|
end
|
204
318
|
|
319
|
+
# Enumerate for each row.
|
320
|
+
#
|
321
|
+
# @overload each_row
|
322
|
+
# Returns Enumerator when no block given.
|
323
|
+
#
|
324
|
+
# @return [Enumerator]
|
325
|
+
# enumerator of each rows.
|
326
|
+
#
|
327
|
+
# @overload each_row(&block)
|
328
|
+
# Yields with key and row pairs.
|
329
|
+
#
|
330
|
+
# @yieldparam key_row_pairs [Hash]
|
331
|
+
# key and row pairs.
|
332
|
+
# @yieldreturn [Integer]
|
333
|
+
# size of the DataFrame.
|
334
|
+
# @return [Integer]
|
335
|
+
# returns size.
|
336
|
+
#
|
205
337
|
def each_row
|
206
338
|
return enum_for(:each_row) unless block_given?
|
207
339
|
|
@@ -214,23 +346,348 @@ module RedAmber
|
|
214
346
|
end
|
215
347
|
end
|
216
348
|
|
349
|
+
# Returns self in a `Rover::DataFrame`.
|
350
|
+
#
|
351
|
+
# @return [Rover::DataFrame]
|
352
|
+
# a `Rover::DataFrame`.
|
353
|
+
#
|
217
354
|
def to_rover
|
218
355
|
require 'rover'
|
219
356
|
Rover::DataFrame.new(to_h)
|
220
357
|
end
|
221
358
|
|
359
|
+
# Create a Group object. Or create a Group and summarize it.
|
360
|
+
#
|
361
|
+
# @overload group(*group_keys)
|
362
|
+
# Create a Group object.
|
363
|
+
#
|
364
|
+
# @param group_keys [Array<Symbol, String>]
|
365
|
+
# keys for grouping.
|
366
|
+
# @return [Group]
|
367
|
+
# Group object.
|
368
|
+
# @example Create a Group
|
369
|
+
# penguins.group(:species)
|
370
|
+
#
|
371
|
+
# # =>
|
372
|
+
# #<RedAmber::Group : 0x000000000000c3c8>
|
373
|
+
# species group_count
|
374
|
+
# <string> <uint8>
|
375
|
+
# 0 Adelie 152
|
376
|
+
# 1 Chinstrap 68
|
377
|
+
# 2 Gentoo 124
|
378
|
+
#
|
379
|
+
# @overload group(*group_keys)
|
380
|
+
# Create a Group and summarize it by aggregation functions from the block.
|
381
|
+
#
|
382
|
+
# @yieldparam group [Group]
|
383
|
+
# passes Group object.
|
384
|
+
# @yieldreturn [DataFrame, Array<DataFrame>]
|
385
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
386
|
+
# @return [DataFrame]
|
387
|
+
# summarized DataFrame.
|
388
|
+
# @example Create a group and summarize it.
|
389
|
+
# penguins.group(:species) { mean(:bill_length_mm) }
|
390
|
+
#
|
391
|
+
# # =>
|
392
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f3fc>
|
393
|
+
# species mean(bill_length_mm)
|
394
|
+
# <string> <double>
|
395
|
+
# 0 Adelie 38.79
|
396
|
+
# 1 Chinstrap 48.83
|
397
|
+
# 2 Gentoo 47.5
|
398
|
+
#
|
222
399
|
def group(*group_keys, &block)
|
223
400
|
g = Group.new(self, group_keys)
|
224
401
|
g = g.summarize(&block) if block
|
225
402
|
g
|
226
403
|
end
|
227
404
|
|
405
|
+
# Create SubFrames by value grouping.
|
406
|
+
#
|
407
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
408
|
+
# @param keys [Symbol, String, Array<Symbol, String>]
|
409
|
+
# grouping keys.
|
410
|
+
# @return [SubFrames]
|
411
|
+
# a created SubFrames grouped by column values on `keys`.
|
412
|
+
# @example
|
413
|
+
# df.sub_by_value(keys: :y)
|
414
|
+
#
|
415
|
+
# # =>
|
416
|
+
# #<RedAmber::SubFrames : 0x000000000000fc08>
|
417
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
418
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
419
|
+
# ---
|
420
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>
|
421
|
+
# x y z
|
422
|
+
# <uint8> <string> <boolean>
|
423
|
+
# 0 1 A false
|
424
|
+
# 1 2 A true
|
425
|
+
# ---
|
426
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fc30>
|
427
|
+
# x y z
|
428
|
+
# <uint8> <string> <boolean>
|
429
|
+
# 0 3 B false
|
430
|
+
# 1 4 B (nil)
|
431
|
+
# 2 5 B true
|
432
|
+
# ---
|
433
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>
|
434
|
+
# x y z
|
435
|
+
# <uint8> <string> <boolean>
|
436
|
+
# 0 6 C false
|
437
|
+
#
|
438
|
+
# @since 0.4.0
|
439
|
+
#
|
440
|
+
def sub_by_value(keys: nil)
|
441
|
+
SubFrames.new(self, group(keys).filters)
|
442
|
+
end
|
443
|
+
alias_method :subframes_by_value, :sub_by_value
|
444
|
+
|
445
|
+
# Create SubFrames by Windowing with `from`, `size` and `step`.
|
446
|
+
#
|
447
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
448
|
+
# @param from [Integer]
|
449
|
+
# start position of window.
|
450
|
+
# @param size [Integer]
|
451
|
+
# window size.
|
452
|
+
# @param step [Integer]
|
453
|
+
# moving step of window.
|
454
|
+
# @return [SubFrames]
|
455
|
+
# a created SubFrames.
|
456
|
+
# @example
|
457
|
+
# df.sub_by_window(size: 4, step: 2)
|
458
|
+
#
|
459
|
+
# # =>
|
460
|
+
# #<RedAmber::SubFrames : 0x000000000000fc58>
|
461
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
462
|
+
# 2 SubFrames: [4, 4] in sizes.
|
463
|
+
# ---
|
464
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc6c>
|
465
|
+
# x y z
|
466
|
+
# <uint8> <string> <boolean>
|
467
|
+
# 0 1 A false
|
468
|
+
# 1 2 A true
|
469
|
+
# 2 3 B false
|
470
|
+
# 3 4 B (nil)
|
471
|
+
# ---
|
472
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc80>
|
473
|
+
# x y z
|
474
|
+
# <uint8> <string> <boolean>
|
475
|
+
# 0 3 B false
|
476
|
+
# 1 4 B (nil)
|
477
|
+
# 2 5 B true
|
478
|
+
# 3 6 C false
|
479
|
+
#
|
480
|
+
# @since 0.4.0
|
481
|
+
#
|
482
|
+
def sub_by_window(from: 0, size: nil, step: 1)
|
483
|
+
SubFrames.new(self) do
|
484
|
+
from.step(by: step, to: (size() - size)).map do |i| # rubocop:disable Style/MethodCallWithoutArgsParentheses
|
485
|
+
[*i...(i + size)]
|
486
|
+
end
|
487
|
+
end
|
488
|
+
end
|
489
|
+
alias_method :subframes_by_window, :sub_by_window
|
490
|
+
|
491
|
+
# Create SubFrames by Grouping/Windowing by posion from a enumrator method.
|
492
|
+
#
|
493
|
+
# This method will process the indices of self by enumerator.
|
494
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
495
|
+
# @param enumerator_method [Symbol]
|
496
|
+
# Enumerator name.
|
497
|
+
# @param args [<Object>]
|
498
|
+
# arguments for the enumerator method.
|
499
|
+
# @return [SubFrames]
|
500
|
+
# a created SubFrames.
|
501
|
+
# @example Create a SubFrames object sliced by 3 rows.
|
502
|
+
# df.sub_by_enum(:each_slice, 3)
|
503
|
+
#
|
504
|
+
# # =>
|
505
|
+
# #<RedAmber::SubFrames : 0x000000000000fd20>
|
506
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
507
|
+
# 2 SubFrames: [3, 3] in sizes.
|
508
|
+
# ---
|
509
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd34>
|
510
|
+
# x y z
|
511
|
+
# <uint8> <string> <boolean>
|
512
|
+
# 0 1 A false
|
513
|
+
# 1 2 A true
|
514
|
+
# 2 3 B false
|
515
|
+
# ---
|
516
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd48>
|
517
|
+
# x y z
|
518
|
+
# <uint8> <string> <boolean>
|
519
|
+
# 0 4 B (nil)
|
520
|
+
# 1 5 B true
|
521
|
+
# 2 6 C false
|
522
|
+
#
|
523
|
+
# @example Create a SubFrames object for each consecutive 3 rows.
|
524
|
+
# df.sub_by_enum(:each_cons, 4)
|
525
|
+
#
|
526
|
+
# # =>
|
527
|
+
# #<RedAmber::SubFrames : 0x000000000000fd98>
|
528
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
529
|
+
# 3 SubFrames: [4, 4, 4] in sizes.
|
530
|
+
# ---
|
531
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdac>
|
532
|
+
# x y z
|
533
|
+
# <uint8> <string> <boolean>
|
534
|
+
# 0 1 A false
|
535
|
+
# 1 2 A true
|
536
|
+
# 2 3 B false
|
537
|
+
# 3 4 B (nil)
|
538
|
+
# ---
|
539
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdc0>
|
540
|
+
# x y z
|
541
|
+
# <uint8> <string> <boolean>
|
542
|
+
# 0 2 A true
|
543
|
+
# 1 3 B false
|
544
|
+
# 2 4 B (nil)
|
545
|
+
# 3 5 B true
|
546
|
+
# ---
|
547
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdd4>
|
548
|
+
# x y z
|
549
|
+
# <uint8> <string> <boolean>
|
550
|
+
# 0 3 B false
|
551
|
+
# 1 4 B (nil)
|
552
|
+
# 2 5 B true
|
553
|
+
# 3 6 C false
|
554
|
+
#
|
555
|
+
# @since 0.4.0
|
556
|
+
#
|
557
|
+
def sub_by_enum(enumerator_method, *args)
|
558
|
+
SubFrames.new(self, indices.send(enumerator_method, *args).to_a)
|
559
|
+
end
|
560
|
+
alias_method :subframes_by_enum, :sub_by_enum
|
561
|
+
|
562
|
+
# Create SubFrames by windowing with a kernel (i.e. masked window) and step.
|
563
|
+
#
|
564
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
565
|
+
# @param kernel [Array<true, false>, Vector]
|
566
|
+
# boolean array-like to pick records in the window.
|
567
|
+
# Kernel is a boolean Array and it behaves like a masked window.
|
568
|
+
# @param step [Integer]
|
569
|
+
# moving step of window.
|
570
|
+
# @return [SubFrames]
|
571
|
+
# a created SubFrames.
|
572
|
+
# @example
|
573
|
+
# kernel = [true, false, false, true]
|
574
|
+
# df.sub_by_kernel(kernel, step: 2)
|
575
|
+
#
|
576
|
+
# # =>
|
577
|
+
# #<RedAmber::SubFrames : 0x000000000000fde8>
|
578
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
579
|
+
# 2 SubFrames: [2, 2] in sizes.
|
580
|
+
# ---
|
581
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fdfc>
|
582
|
+
# x y z
|
583
|
+
# <uint8> <string> <boolean>
|
584
|
+
# 0 1 A false
|
585
|
+
# 1 4 B (nil)
|
586
|
+
# ---
|
587
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fe10>
|
588
|
+
# x y z
|
589
|
+
# <uint8> <string> <boolean>
|
590
|
+
# 0 3 B false
|
591
|
+
# 1 6 C false
|
592
|
+
#
|
593
|
+
# @since 0.4.0
|
594
|
+
#
|
595
|
+
def sub_by_kernel(kernel, step: 1)
|
596
|
+
limit_size = size - kernel.size
|
597
|
+
kernel_vector = Vector.new(kernel.concat([nil] * limit_size))
|
598
|
+
SubFrames.new(self) do
|
599
|
+
0.step(by: step, to: limit_size).map do |i|
|
600
|
+
kernel_vector.shift(i)
|
601
|
+
end
|
602
|
+
end
|
603
|
+
end
|
604
|
+
alias_method :subframes_by_kernel, :sub_by_kernel
|
605
|
+
|
606
|
+
# Generic builder of sub-dataframes from self.
|
607
|
+
#
|
608
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
609
|
+
# @overload build_subframes(subset_specifier)
|
610
|
+
# Create a new SubFrames object.
|
611
|
+
#
|
612
|
+
# @param subset_specifier [Array<Vector>, Array<array-like>]
|
613
|
+
# an Array of numeric indices or boolean filters
|
614
|
+
# to create subsets of DataFrame.
|
615
|
+
# @return [SubFrames]
|
616
|
+
# new SubFrames.
|
617
|
+
# @example
|
618
|
+
# df.build_subframes([[0, 2, 4], [1, 3, 5]])
|
619
|
+
#
|
620
|
+
# # =>
|
621
|
+
# #<RedAmber::SubFrames : 0x000000000000fe9c>
|
622
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
623
|
+
# 2 SubFrames: [3, 3] in sizes.
|
624
|
+
# ---
|
625
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000feb0>
|
626
|
+
# x y z
|
627
|
+
# <uint8> <string> <boolean>
|
628
|
+
# 0 1 A false
|
629
|
+
# 1 3 B false
|
630
|
+
# 2 5 B true
|
631
|
+
# ---
|
632
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fec4>
|
633
|
+
# x y z
|
634
|
+
# <uint8> <string> <boolean>
|
635
|
+
# 0 2 A true
|
636
|
+
# 1 4 B (nil)
|
637
|
+
# 2 6 C false
|
638
|
+
#
|
639
|
+
# @overload build_subframes
|
640
|
+
# Create a new SubFrames object by block.
|
641
|
+
#
|
642
|
+
# @yield [self]
|
643
|
+
# the block is called within the context of self.
|
644
|
+
# (Block is called by instance_eval(&block). )
|
645
|
+
# @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
|
646
|
+
# an Array of index or boolean array-likes to create subsets of DataFrame.
|
647
|
+
# All array-likes are responsible to #numeric? or #boolean?.
|
648
|
+
# @example
|
649
|
+
# dataframe.build_subframes do
|
650
|
+
# even = indices.map(&:even?)
|
651
|
+
# [even, !even]
|
652
|
+
# end
|
653
|
+
#
|
654
|
+
# # =>
|
655
|
+
# #<RedAmber::SubFrames : 0x000000000000fe60>
|
656
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
657
|
+
# 2 SubFrames: [3, 3] in sizes.
|
658
|
+
# ---
|
659
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe74>
|
660
|
+
# x y z
|
661
|
+
# <uint8> <string> <boolean>
|
662
|
+
# 0 1 A false
|
663
|
+
# 1 3 B false
|
664
|
+
# 2 5 B true
|
665
|
+
# ---
|
666
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe88>
|
667
|
+
# x y z
|
668
|
+
# <uint8> <string> <boolean>
|
669
|
+
# 0 2 A true
|
670
|
+
# 1 4 B (nil)
|
671
|
+
# 2 6 C false
|
672
|
+
#
|
673
|
+
# @since 0.4.0
|
674
|
+
#
|
675
|
+
def build_subframes(subset_specifier = nil, &block)
|
676
|
+
if block
|
677
|
+
SubFrames.new(self, instance_eval(&block))
|
678
|
+
else
|
679
|
+
SubFrames.new(self, subset_specifier)
|
680
|
+
end
|
681
|
+
end
|
682
|
+
|
683
|
+
# Catch variable (column) key as method name.
|
228
684
|
def method_missing(name, *args, &block)
|
229
|
-
return v(name) if args.empty?
|
685
|
+
return v(name) if args.empty? && key?(name)
|
230
686
|
|
231
687
|
super
|
232
688
|
end
|
233
689
|
|
690
|
+
# Catch variable (column) key as method name.
|
234
691
|
def respond_to_missing?(name, include_private)
|
235
692
|
return true if key?(name)
|
236
693
|
|
@@ -241,20 +698,32 @@ module RedAmber
|
|
241
698
|
|
242
699
|
# initialize @variable, @keys, @vectors and return one of them
|
243
700
|
def init_instance_vars(var)
|
244
|
-
ary =
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
701
|
+
ary =
|
702
|
+
@table
|
703
|
+
.columns
|
704
|
+
.each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
|
705
|
+
v = Vector.create(column.data)
|
706
|
+
k = column.name.to_sym
|
707
|
+
v.key = k
|
708
|
+
variables[k] = v
|
709
|
+
keys << k
|
710
|
+
vectors << v
|
711
|
+
end
|
712
|
+
|
252
713
|
@variables, @keys, @vectors = ary
|
253
714
|
ary[%i[variables keys vectors].index(var)]
|
254
715
|
end
|
255
716
|
|
717
|
+
def check_duplicate_keys(array)
|
718
|
+
org = array.dup
|
719
|
+
return unless array.uniq!
|
720
|
+
|
721
|
+
raise DataFrameArgumentError,
|
722
|
+
"duplicate keys: #{org.tally.select { |_k, v| v > 1 }.keys}"
|
723
|
+
end
|
724
|
+
|
256
725
|
def name_unnamed_keys
|
257
|
-
return unless @table
|
726
|
+
return unless @table.key?('')
|
258
727
|
|
259
728
|
# We can't use #keys because it causes mismatch of @table and @keys
|
260
729
|
keys = @table.schema.fields.map { |f| f.name.to_sym }
|