red_amber 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +114 -39
- data/CHANGELOG.md +203 -31
- data/Gemfile +5 -2
- data/README.md +62 -29
- data/benchmark/basic.yml +86 -0
- data/benchmark/combine.yml +62 -0
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/drop_nil.yml +15 -3
- data/benchmark/group.yml +39 -0
- data/benchmark/reshape.yml +31 -0
- data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
- data/benchmark/rover/flights.yml +23 -0
- data/benchmark/rover/penguins.yml +23 -0
- data/benchmark/rover/planes.yml +23 -0
- data/benchmark/rover/weather.yml +23 -0
- data/benchmark/vector.yml +60 -0
- data/doc/DataFrame.md +335 -53
- data/doc/Vector.md +91 -0
- data/doc/image/dataframe/join.png +0 -0
- data/doc/image/dataframe/set_and_bind.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/lib/red_amber/data_frame.rb +167 -51
- data/lib/red_amber/data_frame_combinable.rb +486 -0
- data/lib/red_amber/data_frame_displayable.rb +6 -4
- data/lib/red_amber/data_frame_indexable.rb +2 -2
- data/lib/red_amber/data_frame_loadsave.rb +4 -1
- data/lib/red_amber/data_frame_reshaping.rb +35 -10
- data/lib/red_amber/data_frame_selectable.rb +221 -116
- data/lib/red_amber/data_frame_variable_operation.rb +146 -82
- data/lib/red_amber/group.rb +108 -18
- data/lib/red_amber/helper.rb +53 -43
- data/lib/red_amber/refinements.rb +199 -0
- data/lib/red_amber/vector.rb +56 -46
- data/lib/red_amber/vector_functions.rb +23 -83
- data/lib/red_amber/vector_selectable.rb +116 -69
- data/lib/red_amber/vector_updatable.rb +189 -65
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +3 -0
- data/red_amber.gemspec +4 -3
- metadata +24 -10
@@ -0,0 +1,486 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# mix-in for the class DataFrame
|
5
|
+
module DataFrameCombinable
|
6
|
+
# Refinements for Arrow::Table
|
7
|
+
using RefineArrowTable
|
8
|
+
|
9
|
+
# Concatenate other dataframe onto the bottom.
|
10
|
+
#
|
11
|
+
# @param other [DataFrame, Arrow::Table, Array<DataFrame, Arrow::Table>]
|
12
|
+
# DataFrame/Table to concatenate onto the bottom of self.
|
13
|
+
# @return [DataFrame]
|
14
|
+
# Concatenated dataframe.
|
15
|
+
def concatenate(*other)
|
16
|
+
case other
|
17
|
+
in [] | [nil] | [[]]
|
18
|
+
return self
|
19
|
+
in [Array => array]
|
20
|
+
# Nop
|
21
|
+
else
|
22
|
+
array = other
|
23
|
+
end
|
24
|
+
|
25
|
+
table_array = array.map do |e|
|
26
|
+
case e
|
27
|
+
when Arrow::Table
|
28
|
+
e
|
29
|
+
when DataFrame
|
30
|
+
e.table
|
31
|
+
else
|
32
|
+
raise DataFrameArgumentError, "#{e} is not a Table or a DataFrame"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
DataFrame.create(table.concatenate(table_array))
|
37
|
+
end
|
38
|
+
|
39
|
+
alias_method :concat, :concatenate
|
40
|
+
alias_method :bind_rows, :concatenate
|
41
|
+
|
42
|
+
# Merge other DataFrame or Table from other.
|
43
|
+
# - Self and other must have same size.
|
44
|
+
# - Self and other do not share the same key.
|
45
|
+
# - If they share any keys, raise Error.
|
46
|
+
# @param other [DataFrame, Arrow::Table, Array<DataFrame, Arrow::Table>]
|
47
|
+
# DataFrame/Table to concatenate.
|
48
|
+
# @return [DataFrame]
|
49
|
+
# Merged dataframe.
|
50
|
+
def merge(*other)
|
51
|
+
case other
|
52
|
+
in [] | [nil] | [[]]
|
53
|
+
return self
|
54
|
+
in [Array => array]
|
55
|
+
# Nop
|
56
|
+
else
|
57
|
+
array = other
|
58
|
+
end
|
59
|
+
|
60
|
+
hash = array.each_with_object({}) do |e, h|
|
61
|
+
df =
|
62
|
+
case e
|
63
|
+
when Arrow::Table
|
64
|
+
DataFrame.create(e)
|
65
|
+
when DataFrame
|
66
|
+
e
|
67
|
+
else
|
68
|
+
raise DataFrameArgumentError, "#{e} is not a Table or a DataFrame"
|
69
|
+
end
|
70
|
+
|
71
|
+
if size != df.size
|
72
|
+
raise DataFrameArgumentError, "#{e} do not have same size as self"
|
73
|
+
end
|
74
|
+
|
75
|
+
k = keys.intersection(df.keys).any?
|
76
|
+
raise DataFrameArgumentError, "There are some shared keys: #{k}" if k
|
77
|
+
|
78
|
+
h.merge!(df.to_h)
|
79
|
+
end
|
80
|
+
|
81
|
+
assign(hash)
|
82
|
+
end
|
83
|
+
|
84
|
+
alias_method :bind_cols, :merge
|
85
|
+
|
86
|
+
# Mutating joins (#inner_join, #full_join, #left_join, #right_join)
|
87
|
+
|
88
|
+
# Join another DataFrame or Table, leaving only the matching records.
|
89
|
+
# - Same as `#join` with `type: :inner`
|
90
|
+
# - A kind of mutating join.
|
91
|
+
#
|
92
|
+
# @!macro join_before
|
93
|
+
# @param other [DataFrame, Arrow::Table]
|
94
|
+
# A DataFrame or a Table to be joined with self.
|
95
|
+
#
|
96
|
+
# @!macro join_after
|
97
|
+
# @param suffix [#succ]
|
98
|
+
# a suffix to rename keys when key names conflict as a result of join.
|
99
|
+
# `suffix` must be responsible to `#succ`.
|
100
|
+
# @return [DataFrame]
|
101
|
+
# Joined dataframe.
|
102
|
+
#
|
103
|
+
# @!macro join_key_in_array
|
104
|
+
# @param join_keys [String, Symbol, Array<String, Symbol>]
|
105
|
+
# A key or keys to match.
|
106
|
+
#
|
107
|
+
# @!macro join_key_in_hash
|
108
|
+
# @param join_key_pairs [Hash]
|
109
|
+
# Pairs of a key name or key names to match in left and right.
|
110
|
+
# @option join_key_pairs [String, Symbol, Array<String, Symbol>] :left
|
111
|
+
# Join keys in `self`.
|
112
|
+
# @option join_key_pairs [String, Symbol, Array<String, Symbol>] :right
|
113
|
+
# Join keys in `other`.
|
114
|
+
#
|
115
|
+
# @overload inner_join(other, suffix: '.1')
|
116
|
+
# If `join_key` is not specified, common keys in self and other are used
|
117
|
+
# (natural keys). Returns joined dataframe.
|
118
|
+
#
|
119
|
+
# @macro join_before
|
120
|
+
# @macro join_after
|
121
|
+
#
|
122
|
+
# @overload inner_join(other, join_keys, suffix: '.1')
|
123
|
+
#
|
124
|
+
# @macro join_before
|
125
|
+
# @macro join_key_in_array
|
126
|
+
# @macro join_after
|
127
|
+
#
|
128
|
+
# @overload inner_join(other, join_key_pairs, suffix: '.1')
|
129
|
+
#
|
130
|
+
# @macro join_before
|
131
|
+
# @macro join_key_in_hash
|
132
|
+
# @macro join_after
|
133
|
+
#
|
134
|
+
def inner_join(other, join_keys = nil, suffix: '.1')
|
135
|
+
join(other, join_keys, type: :inner, suffix: suffix)
|
136
|
+
end
|
137
|
+
|
138
|
+
# Join another DataFrame or Table, leaving all records.
|
139
|
+
# - Same as `#join` with `type: :full_outer`
|
140
|
+
# - A kind of mutating join.
|
141
|
+
#
|
142
|
+
# @overload full_join(other, suffix: '.1')
|
143
|
+
# If `join_key` is not specified, common keys in self and other are used
|
144
|
+
# (natural keys). Returns joined dataframe.
|
145
|
+
#
|
146
|
+
# @macro join_before
|
147
|
+
# @macro join_after
|
148
|
+
#
|
149
|
+
# @overload full_join(other, join_keys, suffix: '.1')
|
150
|
+
#
|
151
|
+
# @macro join_before
|
152
|
+
# @macro join_key_in_array
|
153
|
+
# @macro join_after
|
154
|
+
#
|
155
|
+
# @overload full_join(other, join_key_pairs, suffix: '.1')
|
156
|
+
#
|
157
|
+
# @macro join_before
|
158
|
+
# @macro join_key_in_hash
|
159
|
+
# @macro join_after
|
160
|
+
#
|
161
|
+
def full_join(other, join_keys = nil, suffix: '.1')
|
162
|
+
join(other, join_keys, type: :full_outer, suffix: suffix)
|
163
|
+
end
|
164
|
+
|
165
|
+
alias_method :outer_join, :full_join
|
166
|
+
|
167
|
+
# Join matching values to self from other.
|
168
|
+
# - Same as `#join` with `type: :left_outer`
|
169
|
+
# - A kind of mutating join.
|
170
|
+
#
|
171
|
+
# @overload left_join(other, suffix: '.1')
|
172
|
+
# If `join_key` is not specified, common keys in self and other are used
|
173
|
+
# (natural keys). Returns joined dataframe.
|
174
|
+
#
|
175
|
+
# @macro join_before
|
176
|
+
# @macro join_after
|
177
|
+
#
|
178
|
+
# @overload left_join(other, join_keys, suffix: '.1')
|
179
|
+
#
|
180
|
+
# @macro join_before
|
181
|
+
# @macro join_key_in_array
|
182
|
+
# @macro join_after
|
183
|
+
#
|
184
|
+
# @overload left_join(other, join_key_pairs, suffix: '.1')
|
185
|
+
#
|
186
|
+
# @macro join_before
|
187
|
+
# @macro join_key_in_hash
|
188
|
+
# @macro join_after
|
189
|
+
#
|
190
|
+
def left_join(other, join_keys = nil, suffix: '.1')
|
191
|
+
join(other, join_keys, type: :left_outer, suffix: suffix)
|
192
|
+
end
|
193
|
+
|
194
|
+
# Join matching values from self to other.
|
195
|
+
# - Same as `#join` with `type: :right_outer`
|
196
|
+
# - A kind of mutating join.
|
197
|
+
#
|
198
|
+
# @overload right_join(other, suffix: '.1')
|
199
|
+
# If `join_key` is not specified, common keys in self and other are used
|
200
|
+
# (natural keys). Returns joined dataframe.
|
201
|
+
#
|
202
|
+
# @macro join_before
|
203
|
+
# @macro join_after
|
204
|
+
#
|
205
|
+
# @overload right_join(other, join_keys, suffix: '.1')
|
206
|
+
#
|
207
|
+
# @macro join_before
|
208
|
+
# @macro join_key_in_array
|
209
|
+
# @macro join_after
|
210
|
+
#
|
211
|
+
# @overload right_join(other, join_key_pairs, suffix: '.1')
|
212
|
+
#
|
213
|
+
# @macro join_before
|
214
|
+
# @macro join_key_in_hash
|
215
|
+
# @macro join_after
|
216
|
+
#
|
217
|
+
def right_join(other, join_keys = nil, suffix: '.1')
|
218
|
+
join(other, join_keys, type: :right_outer, suffix: suffix)
|
219
|
+
end
|
220
|
+
|
221
|
+
# Filtering joins (#semi_join, #anti_join)
|
222
|
+
|
223
|
+
# Return records of self that have a match in other.
|
224
|
+
# - Same as `#join` with `type: :left_semi`
|
225
|
+
# - A kind of filtering join.
|
226
|
+
#
|
227
|
+
# @overload semi_join(other, suffix: '.1')
|
228
|
+
# If `join_key` is not specified, common keys in self and other are used
|
229
|
+
# (natural keys). Returns joined dataframe.
|
230
|
+
#
|
231
|
+
# @macro join_before
|
232
|
+
# @macro join_after
|
233
|
+
#
|
234
|
+
# @overload semi_join(other, join_keys, suffix: '.1')
|
235
|
+
#
|
236
|
+
# @macro join_before
|
237
|
+
# @macro join_key_in_array
|
238
|
+
# @macro join_after
|
239
|
+
#
|
240
|
+
# @overload semi_join(other, join_key_pairs, suffix: '.1')
|
241
|
+
#
|
242
|
+
# @macro join_before
|
243
|
+
# @macro join_key_in_hash
|
244
|
+
# @macro join_after
|
245
|
+
#
|
246
|
+
def semi_join(other, join_keys = nil, suffix: '.1')
|
247
|
+
join(other, join_keys, type: :left_semi, suffix: suffix)
|
248
|
+
end
|
249
|
+
|
250
|
+
# Return records of self that do not have a match in other.
|
251
|
+
# - Same as `#join` with `type: :left_anti`
|
252
|
+
# - A kind of filtering join.
|
253
|
+
#
|
254
|
+
# @overload anti_join(other, suffix: '.1')
|
255
|
+
# If `join_key` is not specified, common keys in self and other are used
|
256
|
+
# (natural keys). Returns joined dataframe.
|
257
|
+
#
|
258
|
+
# @macro join_before
|
259
|
+
# @macro join_after
|
260
|
+
#
|
261
|
+
# @overload anti_join(other, join_keys, suffix: '.1')
|
262
|
+
#
|
263
|
+
# @macro join_before
|
264
|
+
# @macro join_key_in_array
|
265
|
+
# @macro join_after
|
266
|
+
#
|
267
|
+
# @overload anti_join(other, join_key_pairs, suffix: '.1')
|
268
|
+
#
|
269
|
+
# @macro join_before
|
270
|
+
# @macro join_key_in_hash
|
271
|
+
# @macro join_after
|
272
|
+
#
|
273
|
+
def anti_join(other, join_keys = nil, suffix: '.1')
|
274
|
+
join(other, join_keys, type: :left_anti, suffix: suffix)
|
275
|
+
end
|
276
|
+
|
277
|
+
# Set operations (#intersect, #union, #difference, #set_operable?)
|
278
|
+
|
279
|
+
# Check if set operation with self and other is possible.
|
280
|
+
#
|
281
|
+
# @macro join_before
|
282
|
+
#
|
283
|
+
# @return [Boolean] true if set operation is possible.
|
284
|
+
#
|
285
|
+
def set_operable?(other) # rubocop:disable Naming/AccessorMethodName
|
286
|
+
keys == other.keys.map(&:to_sym)
|
287
|
+
end
|
288
|
+
|
289
|
+
# Select records appearing in both self and other.
|
290
|
+
# - Same as `#join` with `type: :inner` when keys in self are same with other.
|
291
|
+
# - A kind of set operations.
|
292
|
+
#
|
293
|
+
# @macro join_before
|
294
|
+
#
|
295
|
+
# @return [DataFrame] Joined dataframe.
|
296
|
+
#
|
297
|
+
def intersect(other)
|
298
|
+
unless keys == other.keys.map(&:to_sym)
|
299
|
+
raise DataFrameArgumentError, 'keys are not same with self and other'
|
300
|
+
end
|
301
|
+
|
302
|
+
join(other, keys, type: :inner)
|
303
|
+
end
|
304
|
+
|
305
|
+
# Select records appearing in self or other.
|
306
|
+
# - Same as `#join` with `type: :full_outer` when keys in self are same with other.
|
307
|
+
# - A kind of set operations.
|
308
|
+
#
|
309
|
+
# @macro join_before
|
310
|
+
#
|
311
|
+
# @return [DataFrame] Joined dataframe.
|
312
|
+
#
|
313
|
+
def union(other)
|
314
|
+
unless keys == other.keys.map(&:to_sym)
|
315
|
+
raise DataFrameArgumentError, 'keys are not same with self and other'
|
316
|
+
end
|
317
|
+
|
318
|
+
join(other, keys, type: :full_outer)
|
319
|
+
end
|
320
|
+
|
321
|
+
# Select records appearing in self but not in other.
|
322
|
+
# - Same as `#join` with `type: :left_anti` when keys in self are same with other.
|
323
|
+
# - A kind of set operations.
|
324
|
+
#
|
325
|
+
# @macro join_before
|
326
|
+
#
|
327
|
+
# @return [DataFrame] Joined dataframe.
|
328
|
+
#
|
329
|
+
def difference(other)
|
330
|
+
unless keys == other.keys.map(&:to_sym)
|
331
|
+
raise DataFrameArgumentError, 'keys are not same with self and other'
|
332
|
+
end
|
333
|
+
|
334
|
+
join(other, keys, type: :left_anti)
|
335
|
+
end
|
336
|
+
|
337
|
+
alias_method :setdiff, :difference
|
338
|
+
|
339
|
+
# Join another DataFrame or Table to self.
|
340
|
+
#
|
341
|
+
# @overload join(other, type: :inner, suffix: '.1')
|
342
|
+
#
|
343
|
+
# If `join_key` is not specified, common keys in self and other are used
|
344
|
+
# (natural keys). Returns joined dataframe.
|
345
|
+
#
|
346
|
+
# @!macro join_common_type
|
347
|
+
# @param type [:left_semi, :right_semi, :left_anti, :right_anti, :inner,
|
348
|
+
# left_outer, :right_outer, :full_outer] type of join.
|
349
|
+
#
|
350
|
+
# @macro join_before
|
351
|
+
# @macro join_common_type
|
352
|
+
# @macro join_after
|
353
|
+
#
|
354
|
+
# @overload join(other, join_keys, type: :inner, suffix: '.1')
|
355
|
+
#
|
356
|
+
# @macro join_before
|
357
|
+
# @macro join_key_in_array
|
358
|
+
# @macro join_common_type
|
359
|
+
# @macro join_after
|
360
|
+
#
|
361
|
+
# @overload join(other, join_key_pairs, type: :inner, suffix: '.1')
|
362
|
+
#
|
363
|
+
# @macro join_before
|
364
|
+
# @macro join_key_in_hash
|
365
|
+
# @macro join_common_type
|
366
|
+
# @macro join_after
|
367
|
+
#
|
368
|
+
def join(other, join_keys = nil, type: :inner, suffix: '.1')
|
369
|
+
case other
|
370
|
+
when DataFrame
|
371
|
+
other = other.table
|
372
|
+
when Arrow::Table
|
373
|
+
# Nop
|
374
|
+
else
|
375
|
+
raise DataFrameArgumentError, 'other must be a DataFrame or an Arrow::Table'
|
376
|
+
end
|
377
|
+
|
378
|
+
table_keys = table.keys
|
379
|
+
other_keys = other.keys
|
380
|
+
type = type.to_sym
|
381
|
+
|
382
|
+
# natural keys (implicit common keys)
|
383
|
+
join_keys ||= table_keys.intersection(other_keys)
|
384
|
+
|
385
|
+
# This is not necessary if additional procedure is contributed to Red Arrow.
|
386
|
+
if join_keys.is_a?(Hash)
|
387
|
+
left_keys = join_keys[:left]
|
388
|
+
right_keys = join_keys[:right]
|
389
|
+
else
|
390
|
+
left_keys = join_keys
|
391
|
+
right_keys = join_keys
|
392
|
+
end
|
393
|
+
left_keys = Array(left_keys).map(&:to_s)
|
394
|
+
right_keys = Array(right_keys).map(&:to_s)
|
395
|
+
|
396
|
+
case type
|
397
|
+
when :full_outer, :left_semi, :left_anti, :right_semi, :right_anti
|
398
|
+
left_outputs = nil
|
399
|
+
right_outputs = nil
|
400
|
+
when :inner, :left_outer
|
401
|
+
left_outputs = table_keys
|
402
|
+
right_outputs = other_keys - right_keys
|
403
|
+
when :right_outer
|
404
|
+
left_outputs = table_keys - left_keys
|
405
|
+
right_outputs = other_keys
|
406
|
+
end
|
407
|
+
|
408
|
+
# Should we rescue errors in Arrow::Table#join for usability ?
|
409
|
+
joined_table =
|
410
|
+
table.join(other, join_keys,
|
411
|
+
type: type,
|
412
|
+
left_outputs: left_outputs,
|
413
|
+
right_outputs: right_outputs)
|
414
|
+
|
415
|
+
case type
|
416
|
+
when :inner, :left_outer, :left_semi, :left_anti, :right_semi, :right_anti
|
417
|
+
if joined_table.keys.uniq!
|
418
|
+
DataFrame.create(rename_table(joined_table, n_keys, suffix))
|
419
|
+
else
|
420
|
+
DataFrame.create(joined_table)
|
421
|
+
end
|
422
|
+
when :full_outer
|
423
|
+
renamed_table = rename_table(joined_table, n_keys, suffix)
|
424
|
+
renamed_keys = renamed_table.keys
|
425
|
+
dropper = []
|
426
|
+
DataFrame.create(renamed_table).assign do |df|
|
427
|
+
left_keys.map do |left_key|
|
428
|
+
i_left_key = renamed_keys.index(left_key)
|
429
|
+
right_key = renamed_keys[i_left_key + table_keys.size]
|
430
|
+
dropper << right_key
|
431
|
+
[left_key.to_sym, merge_array(df[left_key].data, df[right_key].data)]
|
432
|
+
end
|
433
|
+
end.drop(dropper)
|
434
|
+
when :right_outer
|
435
|
+
if joined_table.keys.uniq!
|
436
|
+
DataFrame.create(rename_table(joined_table, left_outputs.size, suffix))
|
437
|
+
else
|
438
|
+
DataFrame.create(joined_table)
|
439
|
+
end.pick do
|
440
|
+
[right_keys, keys.map(&:to_s) - right_keys]
|
441
|
+
end
|
442
|
+
end
|
443
|
+
end
|
444
|
+
|
445
|
+
private
|
446
|
+
|
447
|
+
# Rename duplicate keys by suffix
|
448
|
+
def rename_table(joined_table, n_keys, suffix)
|
449
|
+
joined_keys = joined_table.keys
|
450
|
+
other_keys = joined_keys[n_keys..]
|
451
|
+
|
452
|
+
dup_keys = joined_keys.tally.select { |_, v| v > 1 }.keys
|
453
|
+
renamed_right_keys =
|
454
|
+
other_keys.map do |key|
|
455
|
+
if dup_keys.include?(key)
|
456
|
+
new_key = nil
|
457
|
+
loop do
|
458
|
+
new_key = "#{key}#{suffix}"
|
459
|
+
break unless joined_keys.include?(new_key)
|
460
|
+
|
461
|
+
s = suffix.succ
|
462
|
+
raise DataFrameArgumentError, "suffix #{suffix} is invalid" if s == suffix
|
463
|
+
|
464
|
+
suffix = s
|
465
|
+
end
|
466
|
+
new_key
|
467
|
+
else
|
468
|
+
key
|
469
|
+
end
|
470
|
+
end
|
471
|
+
joined_keys[n_keys..] = renamed_right_keys
|
472
|
+
|
473
|
+
fields =
|
474
|
+
joined_keys.map.with_index do |k, i|
|
475
|
+
Arrow::Field.new(k, joined_table[i].data_type)
|
476
|
+
end
|
477
|
+
Arrow::Table.new(Arrow::Schema.new(fields), joined_table.columns)
|
478
|
+
end
|
479
|
+
|
480
|
+
# Merge two Arrow::Arrays
|
481
|
+
def merge_array(array1, array2)
|
482
|
+
t = Arrow::Function.find(:is_null).execute([array1])
|
483
|
+
Arrow::Function.find(:if_else).execute([t, array2, array1]).value
|
484
|
+
end
|
485
|
+
end
|
486
|
+
end
|
@@ -93,7 +93,8 @@ module RedAmber
|
|
93
93
|
levels = tallys.map(&:size)
|
94
94
|
type_groups = @table.columns.map { |column| type_group(column.data_type) }
|
95
95
|
quoted_keys = keys.map(&:inspect)
|
96
|
-
headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
|
96
|
+
headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
|
97
|
+
data: 'data_preview' }
|
97
98
|
header_format = make_header_format(levels, headers, quoted_keys)
|
98
99
|
|
99
100
|
sio = StringIO.new # output string buffer
|
@@ -174,6 +175,8 @@ module RedAmber
|
|
174
175
|
end
|
175
176
|
|
176
177
|
def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
|
178
|
+
return " #{keys.join(' ')}\n (Empty Vectors)\n" if size.zero?
|
179
|
+
|
177
180
|
original = self
|
178
181
|
indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
|
179
182
|
df = slice(indices).assign do
|
@@ -199,7 +202,8 @@ module RedAmber
|
|
199
202
|
vectors.each_with_object({}) do |v, assigner|
|
200
203
|
vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
|
201
204
|
.replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
|
202
|
-
assigner[v.key] =
|
205
|
+
assigner[v.key] =
|
206
|
+
original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
|
203
207
|
end
|
204
208
|
end
|
205
209
|
|
@@ -263,8 +267,6 @@ module RedAmber
|
|
263
267
|
format('%g', element)
|
264
268
|
in Integer
|
265
269
|
format('%d', element)
|
266
|
-
else
|
267
|
-
element
|
268
270
|
end
|
269
271
|
end
|
270
272
|
end
|
@@ -18,7 +18,7 @@ module RedAmber
|
|
18
18
|
# @return [RedAmber::Vector] Sorted indices in Vector
|
19
19
|
def sort_indices(*sort_keys)
|
20
20
|
indices = @table.sort_indices(sort_keys.flatten)
|
21
|
-
Vector.
|
21
|
+
Vector.create(indices)
|
22
22
|
end
|
23
23
|
|
24
24
|
# @return [RedAmber::DataFrame] Sorted DataFrame
|
@@ -32,7 +32,7 @@ module RedAmber
|
|
32
32
|
|
33
33
|
def new_dataframe_by(index_array)
|
34
34
|
t = Arrow::Function.find(:take).execute([@table, index_array]).value
|
35
|
-
|
35
|
+
DataFrame.create(t)
|
36
36
|
end
|
37
37
|
end
|
38
38
|
end
|
@@ -17,14 +17,17 @@ module RedAmber
|
|
17
17
|
end
|
18
18
|
|
19
19
|
# Save DataFrame
|
20
|
+
#
|
21
|
+
# @return [DataFrame] self.
|
20
22
|
def save(output, options = {})
|
21
23
|
@table.save(output, options)
|
24
|
+
self
|
22
25
|
end
|
23
26
|
|
24
27
|
# Save and reload to cast automatically
|
25
28
|
# Via tsv format file temporally as default
|
26
29
|
#
|
27
|
-
#
|
30
|
+
# @note experimental feature
|
28
31
|
def auto_cast(format: :tsv)
|
29
32
|
return self if empty?
|
30
33
|
|
@@ -8,11 +8,14 @@ module RedAmber
|
|
8
8
|
# @param key [Symbol] key of the index column
|
9
9
|
# to transepose into keys.
|
10
10
|
# If it is not specified, keys[0] is used.
|
11
|
-
# @param
|
12
|
-
# If it is not specified, :NAME is used.
|
11
|
+
# @param name [Symbol] key name of transposed index column.
|
12
|
+
# If it is not specified, :NAME is used.
|
13
|
+
# If it already exists, :NAME1 or :NAME1.succ is used.
|
13
14
|
# @return [DataFrame] trnsposed DataFrame
|
14
15
|
def transpose(key: keys.first, name: :NAME)
|
15
|
-
|
16
|
+
unless keys.include?(key)
|
17
|
+
raise DataFrameArgumentError, "Self does not include: #{key}"
|
18
|
+
end
|
16
19
|
|
17
20
|
# Find unused name
|
18
21
|
new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
|
@@ -35,14 +38,24 @@ module RedAmber
|
|
35
38
|
# @param value [Symbol, String] key of the column which is come **from values**.
|
36
39
|
# @return [DataFrame] long DataFrame.
|
37
40
|
def to_long(*keep_keys, name: :NAME, value: :VALUE)
|
41
|
+
warn('[Info] No key to keep is specified.') if keep_keys.empty?
|
42
|
+
|
38
43
|
not_included = keep_keys - keys
|
39
|
-
|
44
|
+
unless not_included.empty?
|
45
|
+
raise DataFrameArgumentError, "Not have keys #{not_included}"
|
46
|
+
end
|
40
47
|
|
41
48
|
name = name.to_sym
|
42
|
-
|
49
|
+
if keep_keys.include?(name)
|
50
|
+
raise DataFrameArgumentError,
|
51
|
+
"Can't specify the key: #{name} for the column from keys."
|
52
|
+
end
|
43
53
|
|
44
54
|
value = value.to_sym
|
45
|
-
|
55
|
+
if keep_keys.include?(value)
|
56
|
+
raise DataFrameArgumentError,
|
57
|
+
"Can't specify the key: #{value} for the column from values."
|
58
|
+
end
|
46
59
|
|
47
60
|
hash = Hash.new { |h, k| h[k] = [] }
|
48
61
|
l = keys.size - keep_keys.size
|
@@ -62,15 +75,27 @@ module RedAmber
|
|
62
75
|
|
63
76
|
# Reshape long DataFrame to a wide DataFrame.
|
64
77
|
#
|
65
|
-
# @param name [Symbol, String]
|
66
|
-
#
|
78
|
+
# @param name [Symbol, String]
|
79
|
+
# key of the column which will be expanded **to key names**.
|
80
|
+
# @param value [Symbol, String]
|
81
|
+
# key of the column which will be expanded **to values**.
|
67
82
|
# @return [DataFrame] wide DataFrame.
|
68
83
|
def to_wide(name: :NAME, value: :VALUE)
|
69
84
|
name = name.to_sym
|
70
|
-
|
85
|
+
unless keys.include?(name)
|
86
|
+
raise DataFrameArgumentError,
|
87
|
+
"You are going to keep the key: #{name}. " \
|
88
|
+
'You may need to specify the column name ' \
|
89
|
+
'that gives the new keys by `:name` option.'
|
90
|
+
end
|
71
91
|
|
72
92
|
value = value.to_sym
|
73
|
-
|
93
|
+
unless keys.include?(value)
|
94
|
+
raise DataFrameArgumentError,
|
95
|
+
"You are going to keep the key: #{value}. " \
|
96
|
+
'You may need to specify the column name ' \
|
97
|
+
'that gives the new values by `:value` option.'
|
98
|
+
end
|
74
99
|
|
75
100
|
hash = Hash.new { |h, k| h[k] = {} }
|
76
101
|
keep_keys = keys - [name, value]
|