red_amber 0.2.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +133 -51
- data/.yardopts +2 -0
- data/CHANGELOG.md +203 -1
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +61 -45
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +63 -0
- data/doc/DataFrame.md +35 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +295 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +537 -68
- data/lib/red_amber/data_frame_combinable.rb +776 -123
- data/lib/red_amber/data_frame_displayable.rb +248 -18
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +81 -10
- data/lib/red_amber/data_frame_reshaping.rb +216 -21
- data/lib/red_amber/data_frame_selectable.rb +781 -120
- data/lib/red_amber/data_frame_variable_operation.rb +561 -85
- data/lib/red_amber/group.rb +195 -21
- data/lib/red_amber/helper.rb +114 -32
- data/lib/red_amber/refinements.rb +206 -0
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +435 -58
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +321 -69
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +397 -24
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +15 -1
- data/red_amber.gemspec +4 -3
- metadata +19 -11
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -294
data/lib/red_amber/data_frame.rb
CHANGED
@@ -4,7 +4,7 @@ module RedAmber
|
|
4
4
|
# Class to represent a data frame.
|
5
5
|
# Variable @table holds an Arrow::Table object.
|
6
6
|
class DataFrame
|
7
|
-
#
|
7
|
+
# Mix-in
|
8
8
|
include DataFrameCombinable
|
9
9
|
include DataFrameDisplayable
|
10
10
|
include DataFrameIndexable
|
@@ -14,65 +14,151 @@ module RedAmber
|
|
14
14
|
include DataFrameVariableOperation
|
15
15
|
include Helper
|
16
16
|
|
17
|
-
|
17
|
+
using RefineArrowTable
|
18
|
+
using RefineHash
|
19
|
+
|
20
|
+
class << self
|
21
|
+
# Quicker DataFrame constructor from a `Arrow::Table`.
|
22
|
+
#
|
23
|
+
# @param table [Arrow::Table]
|
24
|
+
# A table to have in the DataFrame.
|
25
|
+
# @return [DataFrame]
|
26
|
+
# Initialized DataFrame.
|
27
|
+
#
|
28
|
+
# @note This method will allocate table directly and may be used in the method.
|
29
|
+
# @note `table` must have unique keys.
|
30
|
+
#
|
31
|
+
def create(table)
|
32
|
+
instance = allocate
|
33
|
+
instance.instance_variable_set(:@table, table)
|
34
|
+
instance
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Creates a new DataFrame.
|
18
39
|
#
|
19
40
|
# @overload initialize(hash)
|
41
|
+
# Initialize a DataFrame by a Hash.
|
20
42
|
#
|
21
|
-
# @
|
43
|
+
# @param hash [Hash<key => <Array, Arrow::Array, #to_arrow_array>>]
|
44
|
+
# a Hash of `key` with array-like for column values.
|
45
|
+
# `key`s are Symbol or String.
|
46
|
+
# @example Initialize by a Hash
|
47
|
+
# hash = { x: [1, 2, 3], y: %w[A B C] }
|
48
|
+
# DataFrame.new(hash)
|
49
|
+
# @example Initialize by a Hash like arguments.
|
50
|
+
# DataFrame.new(x: [1, 2, 3], y: %w[A B C])
|
51
|
+
# @example Initialize from #to_arrow_array responsibles.
|
52
|
+
# # #to_arrow_array responsible `array-like` is also available.
|
53
|
+
# require 'arrow-numo-narray'
|
54
|
+
# DataFrame.new(numo: Numo::DFloat.new(3).rand)
|
22
55
|
#
|
23
56
|
# @overload initialize(table)
|
57
|
+
# Initialize a DataFrame by an `Arrow::Table`.
|
58
|
+
#
|
59
|
+
# @param table [Arrow::Table]
|
60
|
+
# a table to have in the DataFrame.
|
61
|
+
# @example Initialize by a Table
|
62
|
+
# table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
|
63
|
+
# DataFrame.new(table)
|
64
|
+
#
|
65
|
+
# @overload initialize(schama, row_oriented_array)
|
66
|
+
# Initialize a DataFrame by schema and row_oriented_array.
|
67
|
+
#
|
68
|
+
# @param schema [Hash<key => type>]
|
69
|
+
# a schema of key and data type.
|
70
|
+
# @param row_oriented_array [Array]
|
71
|
+
# an Array of rows.
|
72
|
+
# @example Initialize by a schema and a row_oriented_array.
|
73
|
+
# schema = { x: :uint8, y: :string }
|
74
|
+
# row_oriented_array = [[1, 'A'], [2, 'B'], [3, 'C']]
|
75
|
+
# DataFrame.new(schema, row_oriented_array)
|
76
|
+
#
|
77
|
+
# @overload initialize(arrowable)
|
78
|
+
# Initialize DataFrame by a `#to_arrow` responsible object.
|
79
|
+
#
|
80
|
+
# @param arrowable [#to_arrow]
|
81
|
+
# Any object which responds to `#to_arrow`.
|
82
|
+
# `#to_arrow` must return `Arrow::Table`.
|
24
83
|
#
|
25
|
-
# @
|
84
|
+
# @note `RedAmber::DataFrame` itself is readable by this.
|
85
|
+
# @note Hash is refined to respond to `#to_arrow` in this class.
|
86
|
+
# @example Initialize by Red Dataset object.
|
87
|
+
# require 'datasets-arrow'
|
88
|
+
# dataset = Datasets::Penguins.new
|
89
|
+
# penguins = DataFrame.new(dataset)
|
90
|
+
# @since 0.2.2
|
26
91
|
#
|
27
|
-
# @overload initialize(
|
92
|
+
# @overload initialize(rover_like)
|
93
|
+
# Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
|
28
94
|
#
|
29
|
-
# @
|
95
|
+
# @param rover_like [#to_h]
|
96
|
+
# Any object which responds to `#to_h`.
|
97
|
+
# `#to_h` must return a Hash which is convertable by `Arrow::Table.new`.
|
30
98
|
#
|
31
|
-
#
|
99
|
+
# @note `Rover::DataFrame` is readable by this.
|
32
100
|
#
|
33
|
-
#
|
101
|
+
# @overload initialize()
|
102
|
+
# Create empty DataFrame
|
103
|
+
#
|
104
|
+
# @example
|
105
|
+
# DataFrame.new
|
106
|
+
#
|
107
|
+
# @overload initialize(empty)
|
108
|
+
# Create empty DataFrame
|
109
|
+
#
|
110
|
+
# @param empty [nil, [], {}]
|
111
|
+
#
|
112
|
+
# @example Return empty DataFrame.
|
113
|
+
# DataFrame.new([])
|
114
|
+
# DataFrame.new({})
|
115
|
+
# DataFrame.new(nil)
|
34
116
|
#
|
35
117
|
def initialize(*args)
|
36
|
-
@variables = @keys = @vectors = @types = @data_types = nil
|
37
118
|
case args
|
38
119
|
in nil | [nil] | [] | {} | [[]] | [{}]
|
39
|
-
# DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
|
40
|
-
# returns empty DataFrame
|
41
120
|
@table = Arrow::Table.new({}, [])
|
42
|
-
in [
|
121
|
+
in [Arrow::Table => table]
|
122
|
+
@table = table
|
123
|
+
in [arrowable] if arrowable.respond_to?(:to_arrow)
|
43
124
|
table = arrowable.to_arrow
|
44
125
|
unless table.is_a?(Arrow::Table)
|
45
126
|
raise DataFrameTypeError,
|
46
127
|
"to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
|
47
128
|
end
|
48
129
|
@table = table
|
49
|
-
in [
|
50
|
-
@table = table
|
51
|
-
in [rover_or_hash]
|
130
|
+
in [rover_like] if rover_like.respond_to?(:to_h)
|
52
131
|
begin
|
53
|
-
# Accepts Rover::DataFrame
|
54
|
-
@table = Arrow::Table.new(
|
132
|
+
# Accepts Rover::DataFrame
|
133
|
+
@table = Arrow::Table.new(rover_like.to_h)
|
55
134
|
rescue StandardError
|
56
|
-
raise DataFrameTypeError, "
|
135
|
+
raise DataFrameTypeError, "to_h must return Arrowable object: #{rover_like}"
|
57
136
|
end
|
58
137
|
else
|
59
|
-
|
138
|
+
begin
|
139
|
+
@table = Arrow::Table.new(*args)
|
140
|
+
rescue StandardError
|
141
|
+
raise DataFrameTypeError, "invalid argument to create Arrow::Table: #{args}"
|
142
|
+
end
|
60
143
|
end
|
61
|
-
name_unnamed_keys
|
62
144
|
|
63
|
-
|
64
|
-
|
145
|
+
name_unnamed_keys
|
146
|
+
check_duplicate_keys(keys)
|
65
147
|
end
|
66
148
|
|
149
|
+
# Returns the table having within.
|
150
|
+
#
|
151
|
+
# @return [Arrow::Table]
|
152
|
+
# the table within.
|
153
|
+
#
|
67
154
|
attr_reader :table
|
155
|
+
alias_method :to_arrow, :table
|
68
156
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
# Returns the number of rows.
|
157
|
+
# Returns the number of records (rows).
|
158
|
+
#
|
159
|
+
# @return [Integer]
|
160
|
+
# number of records (rows).
|
74
161
|
#
|
75
|
-
# @return [Integer] Number of rows.
|
76
162
|
def size
|
77
163
|
@table.n_rows
|
78
164
|
end
|
@@ -80,9 +166,11 @@ module RedAmber
|
|
80
166
|
alias_method :n_obs, :size
|
81
167
|
alias_method :n_rows, :size
|
82
168
|
|
83
|
-
# Returns the number of columns.
|
169
|
+
# Returns the number of variables (columns).
|
170
|
+
#
|
171
|
+
# @return [Integer]
|
172
|
+
# number of variables (columns).
|
84
173
|
#
|
85
|
-
# @return [Integer] Number of columns.
|
86
174
|
def n_keys
|
87
175
|
@table.n_columns
|
88
176
|
end
|
@@ -93,8 +181,9 @@ module RedAmber
|
|
93
181
|
# Returns the numbers of rows and columns.
|
94
182
|
#
|
95
183
|
# @return [Array]
|
96
|
-
#
|
184
|
+
# number of rows and number of columns in an array.
|
97
185
|
# Same as [size, n_keys].
|
186
|
+
#
|
98
187
|
def shape
|
99
188
|
[size, n_keys]
|
100
189
|
end
|
@@ -102,7 +191,8 @@ module RedAmber
|
|
102
191
|
# Returns a Hash of key and Vector pairs in the columns.
|
103
192
|
#
|
104
193
|
# @return [Hash]
|
105
|
-
# key => Vector pairs for each columns.
|
194
|
+
# `key => Vector` pairs for each columns.
|
195
|
+
#
|
106
196
|
def variables
|
107
197
|
@variables || @variables = init_instance_vars(:variables)
|
108
198
|
end
|
@@ -111,7 +201,8 @@ module RedAmber
|
|
111
201
|
# Returns an Array of keys.
|
112
202
|
#
|
113
203
|
# @return [Array]
|
114
|
-
#
|
204
|
+
# keys in an Array.
|
205
|
+
#
|
115
206
|
def keys
|
116
207
|
@keys || @keys = init_instance_vars(:keys)
|
117
208
|
end
|
@@ -120,9 +211,11 @@ module RedAmber
|
|
120
211
|
|
121
212
|
# Returns true if self has a specified key in the argument.
|
122
213
|
#
|
123
|
-
# @param key [Symbol, String]
|
214
|
+
# @param key [Symbol, String]
|
215
|
+
# key to test.
|
124
216
|
# @return [Boolean]
|
125
|
-
#
|
217
|
+
# returns true if self has key in Symbol.
|
218
|
+
#
|
126
219
|
def key?(key)
|
127
220
|
keys.include?(key.to_sym)
|
128
221
|
end
|
@@ -130,9 +223,11 @@ module RedAmber
|
|
130
223
|
|
131
224
|
# Returns index of specified key in the Array keys.
|
132
225
|
#
|
133
|
-
# @param key [Symbol, String]
|
226
|
+
# @param key [Symbol, String]
|
227
|
+
# key to know.
|
134
228
|
# @return [Integer]
|
135
|
-
#
|
229
|
+
# index of key in the Array keys.
|
230
|
+
#
|
136
231
|
def key_index(key)
|
137
232
|
keys.find_index(key.to_sym)
|
138
233
|
end
|
@@ -142,15 +237,19 @@ module RedAmber
|
|
142
237
|
# Returns abbreviated type names in an Array.
|
143
238
|
#
|
144
239
|
# @return [Array]
|
145
|
-
#
|
240
|
+
# abbreviated Red Arrow data type names.
|
241
|
+
#
|
146
242
|
def types
|
147
|
-
@types || @types = @table.columns.map
|
243
|
+
@types || @types = @table.columns.map do |column|
|
244
|
+
column.data.value_type.nick.to_sym
|
245
|
+
end
|
148
246
|
end
|
149
247
|
|
150
248
|
# Returns an Array of Classes of data type.
|
151
249
|
#
|
152
250
|
# @return [Array]
|
153
|
-
#
|
251
|
+
# an Array of Red Arrow data type Classes.
|
252
|
+
#
|
154
253
|
def type_classes
|
155
254
|
@data_types || @data_types = @table.columns.map { |column| column.data_type.class }
|
156
255
|
end
|
@@ -158,50 +257,83 @@ module RedAmber
|
|
158
257
|
# Returns Vectors in an Array.
|
159
258
|
#
|
160
259
|
# @return [Array]
|
161
|
-
#
|
260
|
+
# an Array of Vector.
|
261
|
+
#
|
162
262
|
def vectors
|
163
263
|
@vectors || @vectors = init_instance_vars(:vectors)
|
164
264
|
end
|
165
265
|
|
166
|
-
# Returns
|
266
|
+
# Returns column-oriented data in a Hash.
|
267
|
+
#
|
268
|
+
# @return [Hash]
|
269
|
+
# a Hash of 'key => column_in_an_array'.
|
167
270
|
#
|
168
|
-
# @param start [Object]
|
169
|
-
# Object which have #succ method.
|
170
|
-
# @return [Array]
|
171
|
-
# An Array of indices of the row.
|
172
|
-
# @example
|
173
|
-
# (when self.size == 5)
|
174
|
-
# - indices #=> [0, 1, 2, 3, 4]
|
175
|
-
# - indices(1) #=> [1, 2, 3, 4, 5]
|
176
|
-
# - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
|
177
|
-
def indices(start = 0)
|
178
|
-
Vector.new((start..).take(size))
|
179
|
-
end
|
180
|
-
alias_method :indexes, :indices
|
181
|
-
|
182
271
|
def to_h
|
183
272
|
variables.transform_values(&:to_a)
|
184
273
|
end
|
185
274
|
|
275
|
+
# Returns a row-oriented array without header.
|
276
|
+
#
|
277
|
+
# @return [Array]
|
278
|
+
# row-oriented data without header.
|
279
|
+
#
|
280
|
+
# @note If you need column-oriented array, use `.to_h.to_a`.
|
281
|
+
#
|
186
282
|
def to_a
|
187
|
-
# output an array of row-oriented data without header
|
188
|
-
# if you need column-oriented array, use `.to_h.to_a`
|
189
283
|
@table.raw_records
|
190
284
|
end
|
191
285
|
alias_method :raw_records, :to_a
|
192
286
|
|
287
|
+
# Returns column name and data type in a Hash.
|
288
|
+
#
|
289
|
+
# @return [Hash]
|
290
|
+
# column name and data type.
|
291
|
+
#
|
292
|
+
# @example
|
293
|
+
# RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
|
294
|
+
# # => {:x=>:uint8, :y=>:string}
|
295
|
+
#
|
193
296
|
def schema
|
194
297
|
keys.zip(types).to_h
|
195
298
|
end
|
196
299
|
|
300
|
+
# Compare DataFrames.
|
301
|
+
#
|
302
|
+
# @return [true, false]
|
303
|
+
# true if other is a DataFrame and table is same.
|
304
|
+
# Otherwise return false.
|
305
|
+
#
|
197
306
|
def ==(other)
|
198
307
|
other.is_a?(DataFrame) && @table == other.table
|
199
308
|
end
|
200
309
|
|
310
|
+
# Check if it is a empty DataFrame.
|
311
|
+
#
|
312
|
+
# @return [true, false
|
313
|
+
# ] true if it has no columns.
|
314
|
+
#
|
201
315
|
def empty?
|
202
316
|
variables.empty?
|
203
317
|
end
|
204
318
|
|
319
|
+
# Enumerate for each row.
|
320
|
+
#
|
321
|
+
# @overload each_row
|
322
|
+
# Returns Enumerator when no block given.
|
323
|
+
#
|
324
|
+
# @return [Enumerator]
|
325
|
+
# enumerator of each rows.
|
326
|
+
#
|
327
|
+
# @overload each_row(&block)
|
328
|
+
# Yields with key and row pairs.
|
329
|
+
#
|
330
|
+
# @yieldparam key_row_pairs [Hash]
|
331
|
+
# key and row pairs.
|
332
|
+
# @yieldreturn [Integer]
|
333
|
+
# size of the DataFrame.
|
334
|
+
# @return [Integer]
|
335
|
+
# returns size.
|
336
|
+
#
|
205
337
|
def each_row
|
206
338
|
return enum_for(:each_row) unless block_given?
|
207
339
|
|
@@ -214,23 +346,348 @@ module RedAmber
|
|
214
346
|
end
|
215
347
|
end
|
216
348
|
|
349
|
+
# Returns self in a `Rover::DataFrame`.
|
350
|
+
#
|
351
|
+
# @return [Rover::DataFrame]
|
352
|
+
# a `Rover::DataFrame`.
|
353
|
+
#
|
217
354
|
def to_rover
|
218
355
|
require 'rover'
|
219
356
|
Rover::DataFrame.new(to_h)
|
220
357
|
end
|
221
358
|
|
359
|
+
# Create a Group object. Or create a Group and summarize it.
|
360
|
+
#
|
361
|
+
# @overload group(*group_keys)
|
362
|
+
# Create a Group object.
|
363
|
+
#
|
364
|
+
# @param group_keys [Array<Symbol, String>]
|
365
|
+
# keys for grouping.
|
366
|
+
# @return [Group]
|
367
|
+
# Group object.
|
368
|
+
# @example Create a Group
|
369
|
+
# penguins.group(:species)
|
370
|
+
#
|
371
|
+
# # =>
|
372
|
+
# #<RedAmber::Group : 0x000000000000c3c8>
|
373
|
+
# species group_count
|
374
|
+
# <string> <uint8>
|
375
|
+
# 0 Adelie 152
|
376
|
+
# 1 Chinstrap 68
|
377
|
+
# 2 Gentoo 124
|
378
|
+
#
|
379
|
+
# @overload group(*group_keys)
|
380
|
+
# Create a Group and summarize it by aggregation functions from the block.
|
381
|
+
#
|
382
|
+
# @yieldparam group [Group]
|
383
|
+
# passes Group object.
|
384
|
+
# @yieldreturn [DataFrame, Array<DataFrame>]
|
385
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
386
|
+
# @return [DataFrame]
|
387
|
+
# summarized DataFrame.
|
388
|
+
# @example Create a group and summarize it.
|
389
|
+
# penguins.group(:species) { mean(:bill_length_mm) }
|
390
|
+
#
|
391
|
+
# # =>
|
392
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f3fc>
|
393
|
+
# species mean(bill_length_mm)
|
394
|
+
# <string> <double>
|
395
|
+
# 0 Adelie 38.79
|
396
|
+
# 1 Chinstrap 48.83
|
397
|
+
# 2 Gentoo 47.5
|
398
|
+
#
|
222
399
|
def group(*group_keys, &block)
|
223
400
|
g = Group.new(self, group_keys)
|
224
401
|
g = g.summarize(&block) if block
|
225
402
|
g
|
226
403
|
end
|
227
404
|
|
405
|
+
# Create SubFrames by value grouping.
|
406
|
+
#
|
407
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
408
|
+
# @param keys [Symbol, String, Array<Symbol, String>]
|
409
|
+
# grouping keys.
|
410
|
+
# @return [SubFrames]
|
411
|
+
# a created SubFrames grouped by column values on `keys`.
|
412
|
+
# @example
|
413
|
+
# df.sub_by_value(keys: :y)
|
414
|
+
#
|
415
|
+
# # =>
|
416
|
+
# #<RedAmber::SubFrames : 0x000000000000fc08>
|
417
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
418
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
419
|
+
# ---
|
420
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>
|
421
|
+
# x y z
|
422
|
+
# <uint8> <string> <boolean>
|
423
|
+
# 0 1 A false
|
424
|
+
# 1 2 A true
|
425
|
+
# ---
|
426
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fc30>
|
427
|
+
# x y z
|
428
|
+
# <uint8> <string> <boolean>
|
429
|
+
# 0 3 B false
|
430
|
+
# 1 4 B (nil)
|
431
|
+
# 2 5 B true
|
432
|
+
# ---
|
433
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>
|
434
|
+
# x y z
|
435
|
+
# <uint8> <string> <boolean>
|
436
|
+
# 0 6 C false
|
437
|
+
#
|
438
|
+
# @since 0.4.0
|
439
|
+
#
|
440
|
+
def sub_by_value(keys: nil)
|
441
|
+
SubFrames.new(self, group(keys).filters)
|
442
|
+
end
|
443
|
+
alias_method :subframes_by_value, :sub_by_value
|
444
|
+
|
445
|
+
# Create SubFrames by Windowing with `from`, `size` and `step`.
|
446
|
+
#
|
447
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
448
|
+
# @param from [Integer]
|
449
|
+
# start position of window.
|
450
|
+
# @param size [Integer]
|
451
|
+
# window size.
|
452
|
+
# @param step [Integer]
|
453
|
+
# moving step of window.
|
454
|
+
# @return [SubFrames]
|
455
|
+
# a created SubFrames.
|
456
|
+
# @example
|
457
|
+
# df.sub_by_window(size: 4, step: 2)
|
458
|
+
#
|
459
|
+
# # =>
|
460
|
+
# #<RedAmber::SubFrames : 0x000000000000fc58>
|
461
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
462
|
+
# 2 SubFrames: [4, 4] in sizes.
|
463
|
+
# ---
|
464
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc6c>
|
465
|
+
# x y z
|
466
|
+
# <uint8> <string> <boolean>
|
467
|
+
# 0 1 A false
|
468
|
+
# 1 2 A true
|
469
|
+
# 2 3 B false
|
470
|
+
# 3 4 B (nil)
|
471
|
+
# ---
|
472
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fc80>
|
473
|
+
# x y z
|
474
|
+
# <uint8> <string> <boolean>
|
475
|
+
# 0 3 B false
|
476
|
+
# 1 4 B (nil)
|
477
|
+
# 2 5 B true
|
478
|
+
# 3 6 C false
|
479
|
+
#
|
480
|
+
# @since 0.4.0
|
481
|
+
#
|
482
|
+
def sub_by_window(from: 0, size: nil, step: 1)
|
483
|
+
SubFrames.new(self) do
|
484
|
+
from.step(by: step, to: (size() - size)).map do |i| # rubocop:disable Style/MethodCallWithoutArgsParentheses
|
485
|
+
[*i...(i + size)]
|
486
|
+
end
|
487
|
+
end
|
488
|
+
end
|
489
|
+
alias_method :subframes_by_window, :sub_by_window
|
490
|
+
|
491
|
+
# Create SubFrames by Grouping/Windowing by posion from a enumrator method.
|
492
|
+
#
|
493
|
+
# This method will process the indices of self by enumerator.
|
494
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
495
|
+
# @param enumerator_method [Symbol]
|
496
|
+
# Enumerator name.
|
497
|
+
# @param args [<Object>]
|
498
|
+
# arguments for the enumerator method.
|
499
|
+
# @return [SubFrames]
|
500
|
+
# a created SubFrames.
|
501
|
+
# @example Create a SubFrames object sliced by 3 rows.
|
502
|
+
# df.sub_by_enum(:each_slice, 3)
|
503
|
+
#
|
504
|
+
# # =>
|
505
|
+
# #<RedAmber::SubFrames : 0x000000000000fd20>
|
506
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
507
|
+
# 2 SubFrames: [3, 3] in sizes.
|
508
|
+
# ---
|
509
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd34>
|
510
|
+
# x y z
|
511
|
+
# <uint8> <string> <boolean>
|
512
|
+
# 0 1 A false
|
513
|
+
# 1 2 A true
|
514
|
+
# 2 3 B false
|
515
|
+
# ---
|
516
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fd48>
|
517
|
+
# x y z
|
518
|
+
# <uint8> <string> <boolean>
|
519
|
+
# 0 4 B (nil)
|
520
|
+
# 1 5 B true
|
521
|
+
# 2 6 C false
|
522
|
+
#
|
523
|
+
# @example Create a SubFrames object for each consecutive 3 rows.
|
524
|
+
# df.sub_by_enum(:each_cons, 4)
|
525
|
+
#
|
526
|
+
# # =>
|
527
|
+
# #<RedAmber::SubFrames : 0x000000000000fd98>
|
528
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
529
|
+
# 3 SubFrames: [4, 4, 4] in sizes.
|
530
|
+
# ---
|
531
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdac>
|
532
|
+
# x y z
|
533
|
+
# <uint8> <string> <boolean>
|
534
|
+
# 0 1 A false
|
535
|
+
# 1 2 A true
|
536
|
+
# 2 3 B false
|
537
|
+
# 3 4 B (nil)
|
538
|
+
# ---
|
539
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdc0>
|
540
|
+
# x y z
|
541
|
+
# <uint8> <string> <boolean>
|
542
|
+
# 0 2 A true
|
543
|
+
# 1 3 B false
|
544
|
+
# 2 4 B (nil)
|
545
|
+
# 3 5 B true
|
546
|
+
# ---
|
547
|
+
# #<RedAmber::DataFrame : 4 x 3 Vectors, 0x000000000000fdd4>
|
548
|
+
# x y z
|
549
|
+
# <uint8> <string> <boolean>
|
550
|
+
# 0 3 B false
|
551
|
+
# 1 4 B (nil)
|
552
|
+
# 2 5 B true
|
553
|
+
# 3 6 C false
|
554
|
+
#
|
555
|
+
# @since 0.4.0
|
556
|
+
#
|
557
|
+
def sub_by_enum(enumerator_method, *args)
|
558
|
+
SubFrames.new(self, indices.send(enumerator_method, *args).to_a)
|
559
|
+
end
|
560
|
+
alias_method :subframes_by_enum, :sub_by_enum
|
561
|
+
|
562
|
+
# Create SubFrames by windowing with a kernel (i.e. masked window) and step.
|
563
|
+
#
|
564
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
565
|
+
# @param kernel [Array<true, false>, Vector]
|
566
|
+
# boolean array-like to pick records in the window.
|
567
|
+
# Kernel is a boolean Array and it behaves like a masked window.
|
568
|
+
# @param step [Integer]
|
569
|
+
# moving step of window.
|
570
|
+
# @return [SubFrames]
|
571
|
+
# a created SubFrames.
|
572
|
+
# @example
|
573
|
+
# kernel = [true, false, false, true]
|
574
|
+
# df.sub_by_kernel(kernel, step: 2)
|
575
|
+
#
|
576
|
+
# # =>
|
577
|
+
# #<RedAmber::SubFrames : 0x000000000000fde8>
|
578
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
579
|
+
# 2 SubFrames: [2, 2] in sizes.
|
580
|
+
# ---
|
581
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fdfc>
|
582
|
+
# x y z
|
583
|
+
# <uint8> <string> <boolean>
|
584
|
+
# 0 1 A false
|
585
|
+
# 1 4 B (nil)
|
586
|
+
# ---
|
587
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fe10>
|
588
|
+
# x y z
|
589
|
+
# <uint8> <string> <boolean>
|
590
|
+
# 0 3 B false
|
591
|
+
# 1 6 C false
|
592
|
+
#
|
593
|
+
# @since 0.4.0
|
594
|
+
#
|
595
|
+
def sub_by_kernel(kernel, step: 1)
|
596
|
+
limit_size = size - kernel.size
|
597
|
+
kernel_vector = Vector.new(kernel.concat([nil] * limit_size))
|
598
|
+
SubFrames.new(self) do
|
599
|
+
0.step(by: step, to: limit_size).map do |i|
|
600
|
+
kernel_vector.shift(i)
|
601
|
+
end
|
602
|
+
end
|
603
|
+
end
|
604
|
+
alias_method :subframes_by_kernel, :sub_by_kernel
|
605
|
+
|
606
|
+
# Generic builder of sub-dataframes from self.
|
607
|
+
#
|
608
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
609
|
+
# @overload build_subframes(subset_specifier)
|
610
|
+
# Create a new SubFrames object.
|
611
|
+
#
|
612
|
+
# @param subset_specifier [Array<Vector>, Array<array-like>]
|
613
|
+
# an Array of numeric indices or boolean filters
|
614
|
+
# to create subsets of DataFrame.
|
615
|
+
# @return [SubFrames]
|
616
|
+
# new SubFrames.
|
617
|
+
# @example
|
618
|
+
# df.build_subframes([[0, 2, 4], [1, 3, 5]])
|
619
|
+
#
|
620
|
+
# # =>
|
621
|
+
# #<RedAmber::SubFrames : 0x000000000000fe9c>
|
622
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
623
|
+
# 2 SubFrames: [3, 3] in sizes.
|
624
|
+
# ---
|
625
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000feb0>
|
626
|
+
# x y z
|
627
|
+
# <uint8> <string> <boolean>
|
628
|
+
# 0 1 A false
|
629
|
+
# 1 3 B false
|
630
|
+
# 2 5 B true
|
631
|
+
# ---
|
632
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fec4>
|
633
|
+
# x y z
|
634
|
+
# <uint8> <string> <boolean>
|
635
|
+
# 0 2 A true
|
636
|
+
# 1 4 B (nil)
|
637
|
+
# 2 6 C false
|
638
|
+
#
|
639
|
+
# @overload build_subframes
|
640
|
+
# Create a new SubFrames object by block.
|
641
|
+
#
|
642
|
+
# @yield [self]
|
643
|
+
# the block is called within the context of self.
|
644
|
+
# (Block is called by instance_eval(&block). )
|
645
|
+
# @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
|
646
|
+
# an Array of index or boolean array-likes to create subsets of DataFrame.
|
647
|
+
# All array-likes are responsible to #numeric? or #boolean?.
|
648
|
+
# @example
|
649
|
+
# dataframe.build_subframes do
|
650
|
+
# even = indices.map(&:even?)
|
651
|
+
# [even, !even]
|
652
|
+
# end
|
653
|
+
#
|
654
|
+
# # =>
|
655
|
+
# #<RedAmber::SubFrames : 0x000000000000fe60>
|
656
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
657
|
+
# 2 SubFrames: [3, 3] in sizes.
|
658
|
+
# ---
|
659
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe74>
|
660
|
+
# x y z
|
661
|
+
# <uint8> <string> <boolean>
|
662
|
+
# 0 1 A false
|
663
|
+
# 1 3 B false
|
664
|
+
# 2 5 B true
|
665
|
+
# ---
|
666
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fe88>
|
667
|
+
# x y z
|
668
|
+
# <uint8> <string> <boolean>
|
669
|
+
# 0 2 A true
|
670
|
+
# 1 4 B (nil)
|
671
|
+
# 2 6 C false
|
672
|
+
#
|
673
|
+
# @since 0.4.0
|
674
|
+
#
|
675
|
+
def build_subframes(subset_specifier = nil, &block)
|
676
|
+
if block
|
677
|
+
SubFrames.new(self, instance_eval(&block))
|
678
|
+
else
|
679
|
+
SubFrames.new(self, subset_specifier)
|
680
|
+
end
|
681
|
+
end
|
682
|
+
|
683
|
+
# Catch variable (column) key as method name.
|
228
684
|
def method_missing(name, *args, &block)
|
229
|
-
return v(name) if args.empty?
|
685
|
+
return v(name) if args.empty? && key?(name)
|
230
686
|
|
231
687
|
super
|
232
688
|
end
|
233
689
|
|
690
|
+
# Catch variable (column) key as method name.
|
234
691
|
def respond_to_missing?(name, include_private)
|
235
692
|
return true if key?(name)
|
236
693
|
|
@@ -241,20 +698,32 @@ module RedAmber
|
|
241
698
|
|
242
699
|
# initialize @variable, @keys, @vectors and return one of them
|
243
700
|
def init_instance_vars(var)
|
244
|
-
ary =
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
701
|
+
ary =
|
702
|
+
@table
|
703
|
+
.columns
|
704
|
+
.each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
|
705
|
+
v = Vector.create(column.data)
|
706
|
+
k = column.name.to_sym
|
707
|
+
v.key = k
|
708
|
+
variables[k] = v
|
709
|
+
keys << k
|
710
|
+
vectors << v
|
711
|
+
end
|
712
|
+
|
252
713
|
@variables, @keys, @vectors = ary
|
253
714
|
ary[%i[variables keys vectors].index(var)]
|
254
715
|
end
|
255
716
|
|
717
|
+
def check_duplicate_keys(array)
|
718
|
+
org = array.dup
|
719
|
+
return unless array.uniq!
|
720
|
+
|
721
|
+
raise DataFrameArgumentError,
|
722
|
+
"duplicate keys: #{org.tally.select { |_k, v| v > 1 }.keys}"
|
723
|
+
end
|
724
|
+
|
256
725
|
def name_unnamed_keys
|
257
|
-
return unless @table
|
726
|
+
return unless @table.key?('')
|
258
727
|
|
259
728
|
# We can't use #keys because it causes mismatch of @table and @keys
|
260
729
|
keys = @table.schema.fields.map { |f| f.name.to_sym }
|