red_amber 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +56 -22
- data/.yardopts +2 -0
- data/CHANGELOG.md +178 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +29 -30
- data/benchmark/basic.yml +7 -7
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -3
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +454 -85
- data/lib/red_amber/data_frame_combinable.rb +609 -115
- data/lib/red_amber/data_frame_displayable.rb +313 -34
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +623 -70
- data/lib/red_amber/data_frame_variable_operation.rb +452 -35
- data/lib/red_amber/group.rb +186 -22
- data/lib/red_amber/helper.rb +74 -14
- data/lib/red_amber/refinements.rb +26 -6
- data/lib/red_amber/subframes.rb +1101 -0
- data/lib/red_amber/vector.rb +362 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +506 -0
- data/lib/red_amber/vector_selectable.rb +265 -23
- data/lib/red_amber/vector_unary_element_wise.rb +529 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
@@ -0,0 +1,1101 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# class SubFrames treats a set of subsets of a DataFrame
|
5
|
+
# [Experimental feature] Class SubFrames may be removed or be changed in the future.
|
6
|
+
class SubFrames
|
7
|
+
include Enumerable # may change to use Forwardable.
|
8
|
+
include Helper
|
9
|
+
|
10
|
+
using RefineArray
|
11
|
+
using RefineArrayLike
|
12
|
+
|
13
|
+
class << self
|
14
|
+
# Create SubFrames from a Group.
|
15
|
+
#
|
16
|
+
# [Experimental feature] this method may be removed or be changed in the future.
|
17
|
+
# @param group [Group]
|
18
|
+
# a Group to be used to create SubFrames.
|
19
|
+
# @return [SubFrames]
|
20
|
+
# a created SubFrames.
|
21
|
+
# @example
|
22
|
+
# dataframe
|
23
|
+
#
|
24
|
+
# # =>
|
25
|
+
# #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fba4>
|
26
|
+
# x y z
|
27
|
+
# <uint8> <string> <boolean>
|
28
|
+
# 0 1 A false
|
29
|
+
# 1 2 A true
|
30
|
+
# 2 3 B false
|
31
|
+
# 3 4 B (nil)
|
32
|
+
# 4 5 B true
|
33
|
+
# 5 6 C false
|
34
|
+
#
|
35
|
+
# group = Group.new(dataframe, [:y])
|
36
|
+
# sf = SubFrames.by_group(group)
|
37
|
+
#
|
38
|
+
# # =>
|
39
|
+
# #<RedAmber::SubFrames : 0x000000000000fbb8>
|
40
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000fb7c>
|
41
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
42
|
+
# ---
|
43
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fbcc>
|
44
|
+
# x y z
|
45
|
+
# <uint8> <string> <boolean>
|
46
|
+
# 0 1 A false
|
47
|
+
# 1 2 A true
|
48
|
+
# ---
|
49
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fbe0>
|
50
|
+
# x y z
|
51
|
+
# <uint8> <string> <boolean>
|
52
|
+
# 0 3 B false
|
53
|
+
# 1 4 B (nil)
|
54
|
+
# 2 5 B true
|
55
|
+
# ---
|
56
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fbf4>
|
57
|
+
# x y z
|
58
|
+
# <uint8> <string> <boolean>
|
59
|
+
# 0 6 C false
|
60
|
+
#
|
61
|
+
# @since 0.4.0
|
62
|
+
#
|
63
|
+
def by_group(group)
|
64
|
+
SubFrames.new(group.dataframe, group.filters)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Create a new SubFrames object from a DataFrame and an array of indices.
|
68
|
+
#
|
69
|
+
# @api private
|
70
|
+
# @note this method doesn't check arguments.
|
71
|
+
# @param dataframe [DataFrame]
|
72
|
+
# a source dataframe.
|
73
|
+
# @param subset_indices [Array, Array<Vector>]
|
74
|
+
# an Array of numeric indices to create subsets of DataFrame.
|
75
|
+
# @return [SubFrames]
|
76
|
+
# a new SubFrames object.
|
77
|
+
# @since 0.4.0
|
78
|
+
#
|
79
|
+
def by_indices(dataframe, subset_indices)
|
80
|
+
instance = allocate
|
81
|
+
instance.instance_variable_set(:@baseframe, dataframe)
|
82
|
+
enum =
|
83
|
+
Enumerator.new(subset_indices.size) do |y|
|
84
|
+
subset_indices.each do |i|
|
85
|
+
y.yield DataFrame.new_dataframe_with_schema(dataframe, dataframe.take(i))
|
86
|
+
end
|
87
|
+
end
|
88
|
+
instance.instance_variable_set(:@enum, enum)
|
89
|
+
instance
|
90
|
+
end
|
91
|
+
|
92
|
+
# Create a new SubFrames object from a DataFrame and an array of filters.
|
93
|
+
#
|
94
|
+
# @api private
|
95
|
+
# @note this method doesn't check arguments.
|
96
|
+
# @param dataframe [DataFrame]
|
97
|
+
# a source dataframe.
|
98
|
+
# @param subset_filters [Array, Array<Vector>]
|
99
|
+
# an Array of booleans to specify subsets of DataFrame.
|
100
|
+
# Each filters must have same length as dataframe.
|
101
|
+
# @return [SubFrames]
|
102
|
+
# a new SubFrames object.
|
103
|
+
# @since 0.4.0
|
104
|
+
#
|
105
|
+
def by_filters(dataframe, subset_filters)
|
106
|
+
instance = allocate
|
107
|
+
instance.instance_variable_set(:@baseframe, dataframe)
|
108
|
+
enum =
|
109
|
+
Enumerator.new(subset_filters.size) do |y|
|
110
|
+
subset_filters.each do |i|
|
111
|
+
y.yield DataFrame.new_dataframe_with_schema(dataframe, dataframe.filter(i))
|
112
|
+
end
|
113
|
+
end
|
114
|
+
instance.instance_variable_set(:@enum, enum)
|
115
|
+
instance
|
116
|
+
end
|
117
|
+
|
118
|
+
# Create a new SubFrames from an Array of DataFrames.
|
119
|
+
#
|
120
|
+
# @api private
|
121
|
+
# @note dataframes must have same schema.
|
122
|
+
# @param dataframes [Array<DataFrame>]
|
123
|
+
# an array of DataFrames which have same schema.
|
124
|
+
# @return [SubFrames]
|
125
|
+
# a new SubFrames object.
|
126
|
+
# @since 0.4.0
|
127
|
+
#
|
128
|
+
def by_dataframes(dataframes)
|
129
|
+
instance = allocate
|
130
|
+
case Array(dataframes)
|
131
|
+
when [] || [nil]
|
132
|
+
instance.instance_variable_set(:@baseframe, DataFrame.new)
|
133
|
+
instance.instance_variable_set(:@frames, [])
|
134
|
+
enum = [].each
|
135
|
+
else
|
136
|
+
enum =
|
137
|
+
Enumerator.new(dataframes.size) do |y|
|
138
|
+
dataframes.each do |i|
|
139
|
+
y.yield i
|
140
|
+
end
|
141
|
+
end
|
142
|
+
instance.instance_variable_set(:@baseframe, enum.lazy)
|
143
|
+
end
|
144
|
+
instance.instance_variable_set(:@enum, enum)
|
145
|
+
instance
|
146
|
+
end
|
147
|
+
|
148
|
+
private
|
149
|
+
|
150
|
+
# This method upgrades a iterating method from Enumerable to return SubFrames.
|
151
|
+
|
152
|
+
# @!macro [attach] define_subframable_method
|
153
|
+
#
|
154
|
+
# [Returns SubFrames] Use `#each.$1` if you want to get DataFrames by Array.
|
155
|
+
# Returns an Enumerator with no block given.
|
156
|
+
# @yieldparam dataframe [DataFrame]
|
157
|
+
# gives each element.
|
158
|
+
# @yieldreturn [Array<DataFrame>]
|
159
|
+
# the block should return DataFrames with same schema.
|
160
|
+
# @return [SubFrames]
|
161
|
+
# a new SubFrames.
|
162
|
+
#
|
163
|
+
# @since 0.4.0
|
164
|
+
#
|
165
|
+
def define_subframable_method(method)
|
166
|
+
define_method(method) do |&block|
|
167
|
+
return enum_for(:each) { size } unless block # rubocop:disable Lint/ToEnumArguments
|
168
|
+
|
169
|
+
SubFrames.by_dataframes(super(&block))
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
# Create a new SubFrames object from a DataFrame and an array of indices or filters.
|
175
|
+
#
|
176
|
+
# @overload initialize(dataframe, subset_specifier)
|
177
|
+
# Create a new SubFrames object.
|
178
|
+
#
|
179
|
+
# @param dataframe [DataFrame]
|
180
|
+
# a source dataframe.
|
181
|
+
# @param subset_specifier [Array<Vector>, Array<array-like>]
|
182
|
+
# an Array of numeric indices or boolean filters
|
183
|
+
# to create subsets of DataFrame.
|
184
|
+
# @return [SubFrames]
|
185
|
+
# new SubFrames.
|
186
|
+
# @example
|
187
|
+
# dataframe
|
188
|
+
#
|
189
|
+
# # =>
|
190
|
+
# #<RedAmber::DataFrame : 6 x 3 Vectors, 0x00000000000039e4>
|
191
|
+
# x y z
|
192
|
+
# <uint8> <string> <boolean>
|
193
|
+
# 0 1 A false
|
194
|
+
# 1 2 A true
|
195
|
+
# 2 3 B false
|
196
|
+
# 3 4 B (nil)
|
197
|
+
# 4 5 B true
|
198
|
+
# 5 6 C false
|
199
|
+
#
|
200
|
+
# # --- This object is used as common source in this class ---
|
201
|
+
# subframes = SubFrames.new(dataframe, [[0 ,1], [2, 3, 4], [5]])
|
202
|
+
#
|
203
|
+
# # =>
|
204
|
+
# #<RedAmber::SubFrames : 0x000000000000cf6c>
|
205
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000cf80>
|
206
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
207
|
+
# ---
|
208
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000cf94>
|
209
|
+
# x y z
|
210
|
+
# <uint8> <string> <boolean>
|
211
|
+
# 0 1 A false
|
212
|
+
# 1 2 A true
|
213
|
+
# ---
|
214
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000cfa8>
|
215
|
+
# x y z
|
216
|
+
# <uint8> <string> <boolean>
|
217
|
+
# 0 3 B false
|
218
|
+
# 1 4 B (nil)
|
219
|
+
# 2 5 B true
|
220
|
+
# ---
|
221
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000cfbc>
|
222
|
+
# x y z
|
223
|
+
# <uint8> <string> <boolean>
|
224
|
+
# 0 6 C false
|
225
|
+
#
|
226
|
+
# @overload initialize(dataframe)
|
227
|
+
# Create a new SubFrames object by block.
|
228
|
+
#
|
229
|
+
# @param dataframe [DataFrame]
|
230
|
+
# a source dataframe.
|
231
|
+
# @yieldparam dataframe [DataFrame]
|
232
|
+
# the block is called with `dataframe`.
|
233
|
+
# @yieldreturn [Array<numeric_array_like>, Array<boolean_array_like>]
|
234
|
+
# an Array of index or boolean array-likes to create subsets of DataFrame.
|
235
|
+
# All array-likes are responsible to #numeric? or #boolean?.
|
236
|
+
# @return [SubFrames]
|
237
|
+
# a new SubFrames object.
|
238
|
+
# @example
|
239
|
+
# SubFrames.new(dataframe) do |df|
|
240
|
+
# booleans = df[:z]
|
241
|
+
# [booleans, !booleans]
|
242
|
+
# end
|
243
|
+
#
|
244
|
+
# # =>
|
245
|
+
# #<RedAmber::SubFrames : 0x0000000000003aac>
|
246
|
+
# @baseframe=#<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000003ac0>
|
247
|
+
# 2 SubFrames: [2, 3] in sizes.
|
248
|
+
# ---
|
249
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003ad4>
|
250
|
+
# x y z
|
251
|
+
# <uint8> <string> <boolean>
|
252
|
+
# 0 2 A true
|
253
|
+
# 1 5 B true
|
254
|
+
# ---
|
255
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003ae8>
|
256
|
+
# x y z
|
257
|
+
# <uint8> <string> <boolean>
|
258
|
+
# 0 1 A false
|
259
|
+
# 1 3 B false
|
260
|
+
# 2 6 C false
|
261
|
+
#
|
262
|
+
# @since 0.4.0
|
263
|
+
#
|
264
|
+
def initialize(dataframe, subset_specifier = nil, &block)
|
265
|
+
unless dataframe.is_a?(DataFrame)
|
266
|
+
raise SubFramesArgumentError, "not a DataFrame: #{dataframe}"
|
267
|
+
end
|
268
|
+
|
269
|
+
if block
|
270
|
+
unless subset_specifier.nil?
|
271
|
+
raise SubFramesArgumentError, 'Must not specify both arguments and block.'
|
272
|
+
end
|
273
|
+
|
274
|
+
subset_specifier = yield(dataframe)
|
275
|
+
end
|
276
|
+
|
277
|
+
if dataframe.empty? || subset_specifier.nil? || subset_specifier.empty?
|
278
|
+
@baseframe = DataFrame.new
|
279
|
+
@frames = []
|
280
|
+
@enum = @frames.each
|
281
|
+
else
|
282
|
+
@baseframe = nil
|
283
|
+
@enum =
|
284
|
+
Enumerator.new(subset_specifier.size) do |yielder|
|
285
|
+
subset_specifier.map do |i|
|
286
|
+
df =
|
287
|
+
if i.numeric?
|
288
|
+
dataframe.take(i)
|
289
|
+
elsif i.boolean?
|
290
|
+
dataframe.filter(i)
|
291
|
+
else
|
292
|
+
raise SubFramesArgumentError, "illegal type: #{i}"
|
293
|
+
end
|
294
|
+
yielder.yield DataFrame.new_dataframe_with_schema(dataframe, df)
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
# Return concatenated SubFrames as a DataFrame.
|
301
|
+
#
|
302
|
+
# Once evaluated, memorize it as @baseframe.
|
303
|
+
# @return [DataFrame]
|
304
|
+
# a concatenated DataFrame.
|
305
|
+
# @since 0.4.0
|
306
|
+
#
|
307
|
+
def baseframe
|
308
|
+
if @baseframe.nil? || @baseframe.is_a?(Enumerator)
|
309
|
+
@baseframe = reduce(&:concatenate)
|
310
|
+
else
|
311
|
+
@baseframe
|
312
|
+
end
|
313
|
+
end
|
314
|
+
alias_method :concatenate, :baseframe
|
315
|
+
alias_method :concat, :baseframe
|
316
|
+
|
317
|
+
# Iterates over sub DataFrames or returns an Enumerator.
|
318
|
+
#
|
319
|
+
# This method will memorize sub DataFrames and always returns the same object.
|
320
|
+
# The Class SubFrames is including Enumerable module.
|
321
|
+
# So many methods in Enumerable are available.
|
322
|
+
#
|
323
|
+
# @overload each
|
324
|
+
# Returns a new Enumerator if no block given.
|
325
|
+
#
|
326
|
+
# @return [Enumerator]
|
327
|
+
# Enumerator of each elements.
|
328
|
+
#
|
329
|
+
# @overload each
|
330
|
+
# When a block given, passes each sub DataFrames to the block.
|
331
|
+
#
|
332
|
+
# @yieldparam subframe [DataFrame]
|
333
|
+
# passes sub DataFrame by a block parameter.
|
334
|
+
# @yieldreturn [Object]
|
335
|
+
# evaluated result value from the block.
|
336
|
+
# @return [self]
|
337
|
+
# returns self.
|
338
|
+
#
|
339
|
+
# @example Returns Enumerator
|
340
|
+
# subframes.each
|
341
|
+
#
|
342
|
+
# # =>
|
343
|
+
# #<Enumerator: ...>
|
344
|
+
#
|
345
|
+
# @example `to_a` from Enumerable.
|
346
|
+
# subframes.to_a
|
347
|
+
#
|
348
|
+
# # =>
|
349
|
+
# [#<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000002a120>
|
350
|
+
# x y z
|
351
|
+
# <uint8> <string> <boolean>
|
352
|
+
# 0 1 A false
|
353
|
+
# 1 2 A true
|
354
|
+
# ,
|
355
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000002a134>
|
356
|
+
# x y z
|
357
|
+
# <uint8> <string> <boolean>
|
358
|
+
# 0 3 B false
|
359
|
+
# 1 4 B (nil)
|
360
|
+
# 2 5 B true
|
361
|
+
# ,
|
362
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000002a148>
|
363
|
+
# x y z
|
364
|
+
# <uint8> <string> <boolean>
|
365
|
+
# 0 6 C false
|
366
|
+
# ]
|
367
|
+
#
|
368
|
+
# @example Concatenate SubFrames. This example is used in #concatenate.
|
369
|
+
# subframes.reduce(&:concatenate)
|
370
|
+
#
|
371
|
+
# # =>
|
372
|
+
# #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000004883c>
|
373
|
+
# x y z
|
374
|
+
# <uint8> <string> <boolean>
|
375
|
+
# 0 1 A false
|
376
|
+
# 1 2 A true
|
377
|
+
# 2 3 B false
|
378
|
+
# 3 4 B (nil)
|
379
|
+
# 4 5 B true
|
380
|
+
# 5 6 C false
|
381
|
+
#
|
382
|
+
# @since 0.4.0
|
383
|
+
#
|
384
|
+
def each(&block)
|
385
|
+
return enum_for(__method__) { size } unless block
|
386
|
+
|
387
|
+
frames.each(&block)
|
388
|
+
nil
|
389
|
+
end
|
390
|
+
|
391
|
+
# Aggregate SubFrames to create a DataFrame.
|
392
|
+
#
|
393
|
+
# This method creates a DataFrame with one row corresponding to one sub dataframe.
|
394
|
+
# @note This method does not check if aggregation function is used.
|
395
|
+
#
|
396
|
+
# @overload aggregate(keys)
|
397
|
+
#
|
398
|
+
# Aggregate SubFrames creating DataFrame with label `keys` and
|
399
|
+
# its column values by block.
|
400
|
+
#
|
401
|
+
# @param keys [Symbol, Array<Symbol>]
|
402
|
+
# a key or keys of result. Key names may be renamed to new label.
|
403
|
+
# @yieldparam dataframe [DataFrame]
|
404
|
+
# passes each dataframe in self to the block. Block is called by instance_eval,
|
405
|
+
# so inside of the block is the context of passed dataframe.
|
406
|
+
# @yieldreturn [Array]
|
407
|
+
# aggregated values from the columns of passed dataframe.
|
408
|
+
# @return [DataFrame]
|
409
|
+
# created DataFrame.
|
410
|
+
# @example Aggregate by key labels in arguments and values from block.
|
411
|
+
# subframes.aggregate(:y, :sum_x) { [y.first, x.sum] }
|
412
|
+
#
|
413
|
+
# # =>
|
414
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
415
|
+
# y sum_x
|
416
|
+
# <string> <uint8>
|
417
|
+
# 0 A 3
|
418
|
+
# 1 B 12
|
419
|
+
# 2 C 6
|
420
|
+
#
|
421
|
+
# @example Aggregate by key labels in an Array and values from block.
|
422
|
+
# subframes.aggregate([:y, :sum_x]) { [y.first, x.sum] }
|
423
|
+
#
|
424
|
+
# # =>
|
425
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
426
|
+
# y sum_x
|
427
|
+
# <string> <uint8>
|
428
|
+
# 0 A 3
|
429
|
+
# 1 B 12
|
430
|
+
# 2 C 6
|
431
|
+
#
|
432
|
+
# @overload aggregate
|
433
|
+
#
|
434
|
+
# Aggregate SubFrames creating DataFrame with pairs of key and aggregated value
|
435
|
+
# in Hash from the block.
|
436
|
+
#
|
437
|
+
# @yieldparam dataframe [DataFrame]
|
438
|
+
# passes each dataframe in self to the block. Block is called by instance_eval,
|
439
|
+
# so inside of the block is the context of passed dataframe.
|
440
|
+
# @yieldreturn [Hash<key => aggregated_value>]
|
441
|
+
# pairs of key name and aggregated values from the columns of passed dataframe.
|
442
|
+
# Key names may be renamed to new label in the result.
|
443
|
+
# @return [DataFrame]
|
444
|
+
# created DataFrame.
|
445
|
+
# @example Aggregate by key and value pairs from block.
|
446
|
+
# subframes.aggregate do
|
447
|
+
# { y: y.first, sum_x: x.sum }
|
448
|
+
# end
|
449
|
+
#
|
450
|
+
# # =>
|
451
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
452
|
+
# y sum_x
|
453
|
+
# <string> <uint8>
|
454
|
+
# 0 A 3
|
455
|
+
# 1 B 12
|
456
|
+
# 2 C 6
|
457
|
+
#
|
458
|
+
# @overload aggregate
|
459
|
+
#
|
460
|
+
# Aggregate SubFrames creating DataFrame with an Array of key and aggregated value
|
461
|
+
# from the block.
|
462
|
+
#
|
463
|
+
# @yieldparam dataframe [DataFrame]
|
464
|
+
# passes each dataframe in self to the block. Block is called by instance_eval,
|
465
|
+
# so inside of the block is the context of passed dataframe.
|
466
|
+
# @yieldreturn [Array<key, aggregated_value>]
|
467
|
+
# pairs of key name and aggregated values from the columns of passed dataframe.
|
468
|
+
# Key names may be renamed to new label in the result.
|
469
|
+
# @return [DataFrame]
|
470
|
+
# created DataFrame.
|
471
|
+
# @example Aggregate by key and value arrays from block.
|
472
|
+
# subframes.aggregate do
|
473
|
+
# [[:y, y.first], [:sum_x, x.sum]]
|
474
|
+
# end
|
475
|
+
#
|
476
|
+
# # =>
|
477
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
478
|
+
# y sum_x
|
479
|
+
# <string> <uint8>
|
480
|
+
# 0 A 3
|
481
|
+
# 1 B 12
|
482
|
+
# 2 C 6
|
483
|
+
#
|
484
|
+
# @overload aggregate(group_keys, aggregations)
|
485
|
+
#
|
486
|
+
# Aggregate SubFrames for first values of the columns of
|
487
|
+
# `group_keys` and the aggregated results of key-function pairs.
|
488
|
+
# [Experimental] This API may be changed in the future.
|
489
|
+
#
|
490
|
+
# @param group_keys [Symbol, String, Array<Symbol, String>]
|
491
|
+
# group key name(s) to output values.
|
492
|
+
# @param aggregations [Hash<Array<Symbol, String> => Array<:Symbol>>]
|
493
|
+
# a Hash of variable (column) name and
|
494
|
+
# Vector aggregate function name to apply.
|
495
|
+
# @return [DataFrame]
|
496
|
+
# an aggregated DataFrame.
|
497
|
+
# @example Aggregate with a group key and key function pairs by a Hash.
|
498
|
+
# subframes.aggregate(:y, { x: :sum, z: :count })
|
499
|
+
#
|
500
|
+
# # =>
|
501
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
502
|
+
# y sum_x count_z
|
503
|
+
# <string> <uint8> <uint8>
|
504
|
+
# 0 A 3 2
|
505
|
+
# 1 B 12 2
|
506
|
+
# 2 C 6 1
|
507
|
+
#
|
508
|
+
# @overload aggregate(group_keys, aggregations)
|
509
|
+
#
|
510
|
+
# Aggregate SubFrames for first values of the columns of
|
511
|
+
# `group_keys` and the aggregated results of all combinations
|
512
|
+
# of supplied keys and functions.
|
513
|
+
# [Experimental] This API may be changed in the future.
|
514
|
+
#
|
515
|
+
# @param group_keys [Symbol, String, Array<Symbol, String>]
|
516
|
+
# group key name(s) to output values.
|
517
|
+
# @param aggregations [Array[Array<Symbol, String>, Array<:Symbol>]]
|
518
|
+
# an Array of Array of variable (column) names and
|
519
|
+
# Array of Vector aggregate function names to apply.
|
520
|
+
# @return [DataFrame]
|
521
|
+
# an aggregated DataFrame.
|
522
|
+
# @example Aggregate with group keys and keys and functions by an Array.
|
523
|
+
# sf.aggregate(:y, [[:x, :z], [:count, :sum]])
|
524
|
+
#
|
525
|
+
# # =>
|
526
|
+
# #<RedAmber::DataFrame : 3 x 5 Vectors, 0x000000000000fcbc>
|
527
|
+
# y count_x sum_x count_z sum_z
|
528
|
+
# <string> <uint8> <uint8> <uint8> <uint8>
|
529
|
+
# 0 A 2 3 2 1
|
530
|
+
# 1 B 3 12 2 1
|
531
|
+
# 2 C 1 6 1 0
|
532
|
+
#
|
533
|
+
# @since 0.4.0
|
534
|
+
#
|
535
|
+
def aggregate(*args, &block)
|
536
|
+
aggregator =
|
537
|
+
if block
|
538
|
+
if args.empty?
|
539
|
+
# aggregate { {key => value} or [[key, value], ...] }
|
540
|
+
each_with_object(Hash.new { |h, k| h[k] = [] }) do |df, hash|
|
541
|
+
df.instance_eval(&block).to_h.each do |k, v|
|
542
|
+
hash[k] << v
|
543
|
+
end
|
544
|
+
end
|
545
|
+
else
|
546
|
+
# aggregate(keys) { values }
|
547
|
+
values = each.map { |df| Array(df.instance_eval(&block)) }.transpose
|
548
|
+
args.flatten.zip(values)
|
549
|
+
end
|
550
|
+
else
|
551
|
+
# These functions may be removed in the future.
|
552
|
+
case args
|
553
|
+
in [group_keys1, Hash => h]
|
554
|
+
# aggregate(group_keys, { key => func })
|
555
|
+
ary = Array(group_keys1).map { |key| [:first, key] }
|
556
|
+
ary.concat(h.to_a.map { [_2, _1] }) # rubocop:disable Style/NumberedParametersLimit
|
557
|
+
in [group_keys2, [Array => keys, Array => funcs]]
|
558
|
+
# aggregate(group_keys, [keys, funcs])
|
559
|
+
ary = Array(group_keys2).map { |key| [:first, key] }
|
560
|
+
ary.concat(funcs.product(keys))
|
561
|
+
else
|
562
|
+
raise SubFramesArgumentError, "invalid argument: #{args}"
|
563
|
+
end
|
564
|
+
sf = self
|
565
|
+
ary.map do |func, key|
|
566
|
+
label = func == :first ? key : "#{func}_#{key}"
|
567
|
+
[label, sf.each.map { |df| df[key].send(func) }]
|
568
|
+
end
|
569
|
+
end
|
570
|
+
DataFrame.new(aggregator)
|
571
|
+
end
|
572
|
+
|
573
|
+
# Returns a SubFrames containing DataFrames returned by the block.
|
574
|
+
#
|
575
|
+
# @example Map as it is.
|
576
|
+
# subframes.map { _1 }
|
577
|
+
#
|
578
|
+
# # This will create a new SubFrame and a new baseframe,
|
579
|
+
# # But each element DataFrames are re-used.
|
580
|
+
# # =>
|
581
|
+
# #<RedAmber::SubFrames : 0x000000000001e6cc>
|
582
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000001e6e0>
|
583
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
584
|
+
# ---
|
585
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x00000000000135c4>
|
586
|
+
# x y z
|
587
|
+
# <uint8> <string> <boolean>
|
588
|
+
# 0 1 A false
|
589
|
+
# 1 2 A true
|
590
|
+
# ---
|
591
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000135d8>
|
592
|
+
# x y z
|
593
|
+
# <uint8> <string> <boolean>
|
594
|
+
# 0 3 B false
|
595
|
+
# 1 4 B (nil)
|
596
|
+
# 2 5 B true
|
597
|
+
# ---
|
598
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x00000000000135ec>
|
599
|
+
# x y z
|
600
|
+
# <uint8> <string> <boolean>
|
601
|
+
# 0 6 C false
|
602
|
+
#
|
603
|
+
# @example Assign a new column.
|
604
|
+
# subframes.map { |df| df.assign(x_plus1: df[:x] + 1) }
|
605
|
+
#
|
606
|
+
# # =>
|
607
|
+
# #<RedAmber::SubFrames : 0x0000000000040948>
|
608
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 4 Vectors, 0x000000000004095c>
|
609
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
610
|
+
# ---
|
611
|
+
# #<RedAmber::DataFrame : 2 x 4 Vectors, 0x0000000000040970>
|
612
|
+
# x y z x_plus1
|
613
|
+
# <uint8> <string> <boolean> <uint8>
|
614
|
+
# 0 1 A false 2
|
615
|
+
# 1 2 A true 3
|
616
|
+
# ---
|
617
|
+
# #<RedAmber::DataFrame : 3 x 4 Vectors, 0x0000000000040984>
|
618
|
+
# x y z x_plus1
|
619
|
+
# <uint8> <string> <boolean> <uint8>
|
620
|
+
# 0 3 B false 4
|
621
|
+
# 1 4 B (nil) 5
|
622
|
+
# 2 5 B true 6
|
623
|
+
# ---
|
624
|
+
# #<RedAmber::DataFrame : 1 x 4 Vectors, 0x0000000000040998>
|
625
|
+
# x y z x_plus1
|
626
|
+
# <uint8> <string> <boolean> <uint8>
|
627
|
+
# 0 6 C false 7
|
628
|
+
#
|
629
|
+
# @since 0.4.0
|
630
|
+
#
|
631
|
+
define_subframable_method :map
|
632
|
+
alias_method :collect, :map
|
633
|
+
|
634
|
+
# Update existing column(s) or create new columns(s) for each DataFrames in self.
|
635
|
+
#
|
636
|
+
# Column values are updated by an oveloaded common operation.
|
637
|
+
#
|
638
|
+
# @overload assign(key)
|
639
|
+
# Assign a column by argument and block.
|
640
|
+
#
|
641
|
+
# @param key [Symbol, String]
|
642
|
+
# a key of column to assign.
|
643
|
+
# @yieldparam dataframe [DataFrame]
|
644
|
+
# gives overloaded dataframe in self to the block.
|
645
|
+
# @yieldreturn [Vector, Array, Arrow::Array]
|
646
|
+
# an updated column value which are overloaded.
|
647
|
+
# @return [SubFrames]
|
648
|
+
# a new SubFrames object with updated DataFrames.
|
649
|
+
# @example
|
650
|
+
# subframes.assign(:x_plus1) { x + 1 }
|
651
|
+
#
|
652
|
+
# # =>
|
653
|
+
# #<RedAmber::SubFrames : 0x000000000000c3a0>
|
654
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 4 Vectors, 0x000000000000c3b4>
|
655
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
656
|
+
# ---
|
657
|
+
# #<RedAmber::DataFrame : 2 x 4 Vectors, 0x000000000000c3c8>
|
658
|
+
# x y z x_plus1
|
659
|
+
# <uint8> <string> <boolean> <uint8>
|
660
|
+
# 0 1 A false 2
|
661
|
+
# 1 2 A true 3
|
662
|
+
# ---
|
663
|
+
# #<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000c3dc>
|
664
|
+
# x y z x_plus1
|
665
|
+
# <uint8> <string> <boolean> <uint8>
|
666
|
+
# 0 3 B false 4
|
667
|
+
# 1 4 B (nil) 5
|
668
|
+
# 2 5 B true 6
|
669
|
+
# ---
|
670
|
+
# #<RedAmber::DataFrame : 1 x 4 Vectors, 0x000000000000c3f0>
|
671
|
+
# x y z x_plus1
|
672
|
+
# <uint8> <string> <boolean> <uint8>
|
673
|
+
# 0 6 C false 7
|
674
|
+
#
|
675
|
+
# @overload assign(keys)
|
676
|
+
# Assign columns by arguments and block.
|
677
|
+
#
|
678
|
+
# @param keys [Array<Symbol, String>]
|
679
|
+
# keys of columns to assign.
|
680
|
+
# @yieldparam dataframe [DataFrame]
|
681
|
+
# gives overloaded dataframes in self to the block.
|
682
|
+
# @yieldreturn [Array<Vector, Array, Arrow::Array>]
|
683
|
+
# an updated column values which are overloaded.
|
684
|
+
# @return [SubFrames]
|
685
|
+
# a new SubFrames object with updated DataFrames.
|
686
|
+
# @example
|
687
|
+
# subframes.assign(:sum_x, :frac_x) do
|
688
|
+
# group_sum = x.sum
|
689
|
+
# [[group_sum] * size, x / s.to_f]
|
690
|
+
# end
|
691
|
+
#
|
692
|
+
# # =>
|
693
|
+
# #<RedAmber::SubFrames : 0x000000000000fce4>
|
694
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 5 Vectors, 0x000000000000fcf8>
|
695
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
696
|
+
# ---
|
697
|
+
# #<RedAmber::DataFrame : 2 x 5 Vectors, 0x000000000000fd0c>
|
698
|
+
# x y z sum_x frac_x
|
699
|
+
# <uint8> <string> <boolean> <uint8> <double>
|
700
|
+
# 0 1 A false 3 0.33
|
701
|
+
# 1 2 A true 3 0.67
|
702
|
+
# ---
|
703
|
+
# #<RedAmber::DataFrame : 3 x 5 Vectors, 0x000000000000fd20>
|
704
|
+
# x y z sum_x frac_x
|
705
|
+
# <uint8> <string> <boolean> <uint8> <double>
|
706
|
+
# 0 3 B false 12 0.25
|
707
|
+
# 1 4 B (nil) 12 0.33
|
708
|
+
# 2 5 B true 12 0.42
|
709
|
+
# ---
|
710
|
+
# #<RedAmber::DataFrame : 1 x 5 Vectors, 0x000000000000fd34>
|
711
|
+
# x y z sum_x frac_x
|
712
|
+
# <uint8> <string> <boolean> <uint8> <double>
|
713
|
+
# 0 6 C false 6 1.0
|
714
|
+
#
|
715
|
+
# @overload assign
|
716
|
+
# Assign column(s) by block.
|
717
|
+
#
|
718
|
+
# @yieldparam dataframe [DataFrame]
|
719
|
+
# gives overloaded dataframes in self to the block.
|
720
|
+
# @yieldreturn [Hash, Array]
|
721
|
+
# pairs of keys and column values which are overloaded.
|
722
|
+
# @return [SubFrames]
|
723
|
+
# a new SubFrames object with updated DataFrames.
|
724
|
+
# @example Compute 'x * z' when (true, not_true) = (1, 0) in z
|
725
|
+
# subframes.assign do
|
726
|
+
# { 'x*z': x * z.if_else(1, 0) }
|
727
|
+
# end
|
728
|
+
#
|
729
|
+
# # =>
|
730
|
+
# #<RedAmber::SubFrames : 0x000000000000fd98>
|
731
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 4 Vectors, 0x000000000000fdac>
|
732
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
733
|
+
# ---
|
734
|
+
# #<RedAmber::DataFrame : 2 x 4 Vectors, 0x000000000000fdc0>
|
735
|
+
# x y z x*z
|
736
|
+
# <uint8> <string> <boolean> <uint8>
|
737
|
+
# 0 1 A false 0
|
738
|
+
# 1 2 A true 2
|
739
|
+
# ---
|
740
|
+
# #<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000fdd4>
|
741
|
+
# x y z x*z
|
742
|
+
# <uint8> <string> <boolean> <uint8>
|
743
|
+
# 0 3 B false 0
|
744
|
+
# 1 4 B (nil) (nil)
|
745
|
+
# 2 5 B true 5
|
746
|
+
# ---
|
747
|
+
# #<RedAmber::DataFrame : 1 x 4 Vectors, 0x000000000000fde8>
|
748
|
+
# x y z x*z
|
749
|
+
# <uint8> <string> <boolean> <uint8>
|
750
|
+
# 0 6 C false 0
|
751
|
+
#
|
752
|
+
# @since 0.4.0
|
753
|
+
#
|
754
|
+
def assign(...)
|
755
|
+
map { |df| df.assign(...) }
|
756
|
+
end
|
757
|
+
|
758
|
+
# Returns a SubFrames containing DataFrames selected by the block.
|
759
|
+
#
|
760
|
+
# With a block given, calls the block with successive DataFrames;
|
761
|
+
# returns a SubFrames of those DataFrames for
|
762
|
+
# which the block returns a truthy value.
|
763
|
+
#
|
764
|
+
# @example Select all.
|
765
|
+
# subframes.select { true }
|
766
|
+
#
|
767
|
+
# # =>
|
768
|
+
# #<RedAmber::SubFrames : 0x0000000000003a84>
|
769
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x0000000000003a98>
|
770
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
771
|
+
# ---
|
772
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003a0c>
|
773
|
+
# x y z
|
774
|
+
# <uint8> <string> <boolean>
|
775
|
+
# 0 1 A false
|
776
|
+
# 1 2 A true
|
777
|
+
# ---
|
778
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003a20>
|
779
|
+
# x y z
|
780
|
+
# <uint8> <string> <boolean>
|
781
|
+
# 0 3 B false
|
782
|
+
# 1 4 B (nil)
|
783
|
+
# 2 5 B true
|
784
|
+
# ---
|
785
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x0000000000003a34>
|
786
|
+
# x y z
|
787
|
+
# <uint8> <string> <boolean>
|
788
|
+
# 0 6 C false
|
789
|
+
#
|
790
|
+
# @example Select nothing.
|
791
|
+
# subframes.select { false }
|
792
|
+
#
|
793
|
+
# # =>
|
794
|
+
# #<RedAmber::SubFrames : 0x00000000000238c0>
|
795
|
+
# @baseframe=#<RedAmber::DataFrame : (empty), 0x00000000000238d4>
|
796
|
+
# 0 SubFrame: [] in size.
|
797
|
+
# ---
|
798
|
+
#
|
799
|
+
# @example Select if Vector `:z` has any true.
|
800
|
+
# subframes.select { |df| df[:z].any? }
|
801
|
+
#
|
802
|
+
# # =>
|
803
|
+
# #<RedAmber::SubFrames : 0x000000000000fba4>
|
804
|
+
# @baseframe=#<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fbb8>
|
805
|
+
# 2 SubFrames: [2, 1] in sizes.
|
806
|
+
# ---
|
807
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003a0c>
|
808
|
+
# x y z
|
809
|
+
# <uint8> <string> <boolean>
|
810
|
+
# 0 1 A false
|
811
|
+
# 1 2 A true
|
812
|
+
# ---
|
813
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003a20>
|
814
|
+
# x y z
|
815
|
+
# <uint8> <string> <boolean>
|
816
|
+
# 0 3 B false
|
817
|
+
# 1 4 B (nil)
|
818
|
+
# 2 5 B true
|
819
|
+
#
|
820
|
+
# @since 0.4.0
|
821
|
+
#
|
822
|
+
define_subframable_method :select
|
823
|
+
alias_method :filter, :select
|
824
|
+
alias_method :find_all, :select
|
825
|
+
|
826
|
+
# Returns a SubFrames containing DataFrames rejected by the block.
|
827
|
+
#
|
828
|
+
# With a block given, calls the block with successive DataFrames;
|
829
|
+
# returns a SubFrames of those DataFrames for
|
830
|
+
# which the block returns nil or false.
|
831
|
+
# @example Reject all.
|
832
|
+
# subframes.reject { true }
|
833
|
+
#
|
834
|
+
# # =>
|
835
|
+
# #<RedAmber::SubFrames : 0x00000000000238c0>
|
836
|
+
# @baseframe=#<RedAmber::DataFrame : (empty), 0x00000000000238d4>
|
837
|
+
# 0 SubFrame: [] in size.
|
838
|
+
# ---
|
839
|
+
#
|
840
|
+
# @example Reject nothing.
|
841
|
+
# subframes.reject { false }
|
842
|
+
#
|
843
|
+
# # =>
|
844
|
+
# #<RedAmber::SubFrames : 0x0000000000003a84>
|
845
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x0000000000003a98>
|
846
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
847
|
+
# ---
|
848
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000003a0c>
|
849
|
+
# x y z
|
850
|
+
# <uint8> <string> <boolean>
|
851
|
+
# 0 1 A false
|
852
|
+
# 1 2 A true
|
853
|
+
# ---
|
854
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000003a20>
|
855
|
+
# x y z
|
856
|
+
# <uint8> <string> <boolean>
|
857
|
+
# 0 3 B false
|
858
|
+
# 1 4 B (nil)
|
859
|
+
# 2 5 B true
|
860
|
+
# ---
|
861
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x0000000000003a34>
|
862
|
+
# x y z
|
863
|
+
# <uint8> <string> <boolean>
|
864
|
+
# 0 6 C false
|
865
|
+
#
|
866
|
+
# @example Reject if Vector `:z` has any true.
|
867
|
+
# subframes.reject { |df| df[:z].any? }
|
868
|
+
#
|
869
|
+
# # =>
|
870
|
+
# #<RedAmber::SubFrames : 0x0000000000038d74>
|
871
|
+
# @baseframe=#<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000001ad10>
|
872
|
+
# 1 SubFrame: [1] in size.
|
873
|
+
# ---
|
874
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000001ad10>
|
875
|
+
# x y z
|
876
|
+
# <uint8> <string> <boolean>
|
877
|
+
# 0 6 C false
|
878
|
+
#
|
879
|
+
# @since 0.4.0
|
880
|
+
#
|
881
|
+
define_subframable_method :reject
|
882
|
+
|
883
|
+
# Returns a SubFrames containing truthy DataFrames returned by the block.
|
884
|
+
#
|
885
|
+
# With a block given, calls the block with successive DataFrames;
|
886
|
+
# returns a SubFrames of those DataFrames for
|
887
|
+
# which the block returns nil or false.
|
888
|
+
# @example Filter for size is larger than 1 and append number to column 'y'.
|
889
|
+
# subframes.filter_map do |df|
|
890
|
+
# if df.size > 1
|
891
|
+
# df.assign(:y) do
|
892
|
+
# y.merge(indices('1'), sep: '')
|
893
|
+
# end
|
894
|
+
# end
|
895
|
+
# end
|
896
|
+
#
|
897
|
+
# # =>
|
898
|
+
# #<RedAmber::SubFrames : 0x000000000001da88>
|
899
|
+
# @baseframe=#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000001da9c>
|
900
|
+
# 2 SubFrames: [2, 3] in sizes.
|
901
|
+
# ---
|
902
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000001dab0>
|
903
|
+
# x y z
|
904
|
+
# <uint8> <string> <boolean>
|
905
|
+
# 0 1 A1 false
|
906
|
+
# 1 2 A2 true
|
907
|
+
# ---
|
908
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000001dac4>
|
909
|
+
# x y z
|
910
|
+
# <uint8> <string> <boolean>
|
911
|
+
# 0 3 B1 false
|
912
|
+
# 1 4 B2 (nil)
|
913
|
+
# 2 5 B3 true
|
914
|
+
#
|
915
|
+
# @since 0.4.0
|
916
|
+
#
|
917
|
+
define_subframable_method :filter_map
|
918
|
+
|
919
|
+
# Number of subsets.
|
920
|
+
#
|
921
|
+
# @return [Integer]
|
922
|
+
# number of subsets in self.
|
923
|
+
# @since 0.4.0
|
924
|
+
#
|
925
|
+
def size
|
926
|
+
@size ||= @enum.size
|
927
|
+
end
|
928
|
+
|
929
|
+
# Size list of subsets.
|
930
|
+
#
|
931
|
+
# @return [Array<Integer>]
|
932
|
+
# sizes of sub DataFrames.
|
933
|
+
# @since 0.4.0
|
934
|
+
#
|
935
|
+
def sizes
|
936
|
+
@sizes ||= @enum.map(&:size)
|
937
|
+
end
|
938
|
+
|
939
|
+
# Indices at the top of each sub DataFrames.
|
940
|
+
#
|
941
|
+
# @return [Array<Integer>]
|
942
|
+
# indices of offset of each sub DataFrames.
|
943
|
+
# @example When `sizes` is [2, 3, 1].
|
944
|
+
# subframes.offset_indices # => [0, 2, 5]
|
945
|
+
# @since 0.4.0
|
946
|
+
#
|
947
|
+
def offset_indices
|
948
|
+
sum = 0
|
949
|
+
sizes.map do |size|
|
950
|
+
sum += size
|
951
|
+
sum - size
|
952
|
+
end
|
953
|
+
end
|
954
|
+
|
955
|
+
# Test if subset is empty?.
|
956
|
+
#
|
957
|
+
# @return [true, false]
|
958
|
+
# true if self is an empty subset.
|
959
|
+
# @since 0.4.0
|
960
|
+
#
|
961
|
+
def empty?
|
962
|
+
size.zero?
|
963
|
+
end
|
964
|
+
|
965
|
+
# Test if self has only one subset and it is comprehensive.
|
966
|
+
#
|
967
|
+
# @return [true, false]
|
968
|
+
# true if only member of self is equal to universal DataFrame.
|
969
|
+
# @since 0.4.0
|
970
|
+
#
|
971
|
+
def universal?
|
972
|
+
size == 1 && @enum.first == baseframe
|
973
|
+
end
|
974
|
+
|
975
|
+
# Return string representation of self.
|
976
|
+
#
|
977
|
+
# @param limit [Integer]
|
978
|
+
# maximum number of DataFrames to show.
|
979
|
+
# @return [String]
|
980
|
+
# return string representation of each sub DataFrame.
|
981
|
+
# @example
|
982
|
+
# df
|
983
|
+
#
|
984
|
+
# # =>
|
985
|
+
# #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000caa8>
|
986
|
+
# x y z
|
987
|
+
# <uint8> <string> <boolean>
|
988
|
+
# 0 1 A false
|
989
|
+
# 1 2 A true
|
990
|
+
# 2 3 B false
|
991
|
+
# 3 4 B (nil)
|
992
|
+
# 4 5 B true
|
993
|
+
# 5 6 C false
|
994
|
+
#
|
995
|
+
# puts SubFrames.new(df, [[0, 1], [2, 3, 4], [5]])
|
996
|
+
#
|
997
|
+
# # =>
|
998
|
+
# x y z
|
999
|
+
# <uint8> <string> <boolean>
|
1000
|
+
# 0 1 A false
|
1001
|
+
# 1 2 A true
|
1002
|
+
# ---
|
1003
|
+
# x y z
|
1004
|
+
# <uint8> <string> <boolean>
|
1005
|
+
# 0 3 B false
|
1006
|
+
# 1 4 B (nil)
|
1007
|
+
# 2 5 B true
|
1008
|
+
# ---
|
1009
|
+
# x y z
|
1010
|
+
# <uint8> <string> <boolean>
|
1011
|
+
# 0 6 C false
|
1012
|
+
#
|
1013
|
+
# @since 0.4.0
|
1014
|
+
#
|
1015
|
+
def to_s(limit: 16)
|
1016
|
+
_to_s(limit: limit)
|
1017
|
+
end
|
1018
|
+
|
1019
|
+
# Return summary information of self.
|
1020
|
+
#
|
1021
|
+
# @param limit [Integer]
|
1022
|
+
# maximum number of DataFrames to show.
|
1023
|
+
# @return [String]
|
1024
|
+
# return class name, object id, universal DataFrame,
|
1025
|
+
# size and subset sizes in a String.
|
1026
|
+
# @example
|
1027
|
+
# df
|
1028
|
+
#
|
1029
|
+
# # =>
|
1030
|
+
# #<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000caa8>
|
1031
|
+
# x y z
|
1032
|
+
# <uint8> <string> <boolean>
|
1033
|
+
# 0 1 A false
|
1034
|
+
# 1 2 A true
|
1035
|
+
# 2 3 B false
|
1036
|
+
# 3 4 B (nil)
|
1037
|
+
# 4 5 B true
|
1038
|
+
# 5 6 C false
|
1039
|
+
#
|
1040
|
+
# SubFrames.new(df, [[0, 1], [2, 3, 4], [5]])
|
1041
|
+
#
|
1042
|
+
# # =>
|
1043
|
+
# #<RedAmber::SubFrames : 0x000000000000c1fc>
|
1044
|
+
# @baseframe=#<RedAmber::DataFrame : 6 x 3 Vectors, 0x000000000000c170>
|
1045
|
+
# 3 SubFrames: [2, 3, 1] in sizes.
|
1046
|
+
# ---
|
1047
|
+
# #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000002a120>
|
1048
|
+
# x y z
|
1049
|
+
# <uint8> <string> <boolean>
|
1050
|
+
# 0 1 A false
|
1051
|
+
# 1 2 A true
|
1052
|
+
# ---
|
1053
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000002a134>
|
1054
|
+
# x y z
|
1055
|
+
# <uint8> <string> <boolean>
|
1056
|
+
# 0 3 B false
|
1057
|
+
# 1 4 B (nil)
|
1058
|
+
# 2 5 B true
|
1059
|
+
# ---
|
1060
|
+
# #<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000002a148>
|
1061
|
+
# x y z
|
1062
|
+
# <uint8> <string> <boolean>
|
1063
|
+
# 0 6 C false
|
1064
|
+
#
|
1065
|
+
# @since 0.4.0
|
1066
|
+
#
|
1067
|
+
def inspect(limit: 16)
|
1068
|
+
shape =
|
1069
|
+
if @baseframe.is_a?(Enumerator)
|
1070
|
+
"Enumerator::Lazy:size=#{@baseframe.size}"
|
1071
|
+
else
|
1072
|
+
baseframe.shape_str(with_id: true)
|
1073
|
+
end
|
1074
|
+
sizes_truncated = (size > limit ? sizes.take(limit) << '...' : sizes).join(', ')
|
1075
|
+
"#<#{self.class} : #{format('0x%016x', object_id)}>\n" \
|
1076
|
+
"@baseframe=#<#{shape}>\n" \
|
1077
|
+
"#{size} SubFrame#{pl(size)}: " \
|
1078
|
+
"[#{sizes_truncated}] in size#{pl(size)}.\n" \
|
1079
|
+
"---\n#{_to_s(limit: limit, with_id: true)}"
|
1080
|
+
end
|
1081
|
+
|
1082
|
+
private
|
1083
|
+
|
1084
|
+
def frames
|
1085
|
+
@frames ||= @enum.to_a
|
1086
|
+
end
|
1087
|
+
|
1088
|
+
def _to_s(limit: 16, with_id: false)
|
1089
|
+
a = take(limit).map do |df|
|
1090
|
+
if with_id
|
1091
|
+
"#<#{df.shape_str(with_id: with_id)}>\n" \
|
1092
|
+
"#{df.to_s(head: 2, tail: 2)}"
|
1093
|
+
else
|
1094
|
+
df.to_s(head: 2, tail: 2)
|
1095
|
+
end
|
1096
|
+
end
|
1097
|
+
a << "+ #{size - limit} more DataFrame#{pl(size - limit)}.\n" if size > limit
|
1098
|
+
a.join("---\n")
|
1099
|
+
end
|
1100
|
+
end
|
1101
|
+
end
|