red_amber 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +114 -39
- data/CHANGELOG.md +203 -31
- data/Gemfile +5 -2
- data/README.md +62 -29
- data/benchmark/basic.yml +86 -0
- data/benchmark/combine.yml +62 -0
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/drop_nil.yml +15 -3
- data/benchmark/group.yml +39 -0
- data/benchmark/reshape.yml +31 -0
- data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
- data/benchmark/rover/flights.yml +23 -0
- data/benchmark/rover/penguins.yml +23 -0
- data/benchmark/rover/planes.yml +23 -0
- data/benchmark/rover/weather.yml +23 -0
- data/benchmark/vector.yml +60 -0
- data/doc/DataFrame.md +335 -53
- data/doc/Vector.md +91 -0
- data/doc/image/dataframe/join.png +0 -0
- data/doc/image/dataframe/set_and_bind.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/lib/red_amber/data_frame.rb +167 -51
- data/lib/red_amber/data_frame_combinable.rb +486 -0
- data/lib/red_amber/data_frame_displayable.rb +6 -4
- data/lib/red_amber/data_frame_indexable.rb +2 -2
- data/lib/red_amber/data_frame_loadsave.rb +4 -1
- data/lib/red_amber/data_frame_reshaping.rb +35 -10
- data/lib/red_amber/data_frame_selectable.rb +221 -116
- data/lib/red_amber/data_frame_variable_operation.rb +146 -82
- data/lib/red_amber/group.rb +108 -18
- data/lib/red_amber/helper.rb +53 -43
- data/lib/red_amber/refinements.rb +199 -0
- data/lib/red_amber/vector.rb +56 -46
- data/lib/red_amber/vector_functions.rb +23 -83
- data/lib/red_amber/vector_selectable.rb +116 -69
- data/lib/red_amber/vector_updatable.rb +189 -65
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +3 -0
- data/red_amber.gemspec +4 -3
- metadata +24 -10
@@ -0,0 +1,486 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# mix-in for the class DataFrame
|
5
|
+
module DataFrameCombinable
|
6
|
+
# Refinements for Arrow::Table
|
7
|
+
using RefineArrowTable
|
8
|
+
|
9
|
+
# Concatenate other dataframe onto the bottom.
|
10
|
+
#
|
11
|
+
# @param other [DataFrame, Arrow::Table, Array<DataFrame, Arrow::Table>]
|
12
|
+
# DataFrame/Table to concatenate onto the bottom of self.
|
13
|
+
# @return [DataFrame]
|
14
|
+
# Concatenated dataframe.
|
15
|
+
def concatenate(*other)
|
16
|
+
case other
|
17
|
+
in [] | [nil] | [[]]
|
18
|
+
return self
|
19
|
+
in [Array => array]
|
20
|
+
# Nop
|
21
|
+
else
|
22
|
+
array = other
|
23
|
+
end
|
24
|
+
|
25
|
+
table_array = array.map do |e|
|
26
|
+
case e
|
27
|
+
when Arrow::Table
|
28
|
+
e
|
29
|
+
when DataFrame
|
30
|
+
e.table
|
31
|
+
else
|
32
|
+
raise DataFrameArgumentError, "#{e} is not a Table or a DataFrame"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
DataFrame.create(table.concatenate(table_array))
|
37
|
+
end
|
38
|
+
|
39
|
+
alias_method :concat, :concatenate
|
40
|
+
alias_method :bind_rows, :concatenate
|
41
|
+
|
42
|
+
# Merge other DataFrame or Table from other.
|
43
|
+
# - Self and other must have same size.
|
44
|
+
# - Self and other do not share the same key.
|
45
|
+
# - If they share any keys, raise Error.
|
46
|
+
# @param other [DataFrame, Arrow::Table, Array<DataFrame, Arrow::Table>]
|
47
|
+
# DataFrame/Table to concatenate.
|
48
|
+
# @return [DataFrame]
|
49
|
+
# Merged dataframe.
|
50
|
+
def merge(*other)
|
51
|
+
case other
|
52
|
+
in [] | [nil] | [[]]
|
53
|
+
return self
|
54
|
+
in [Array => array]
|
55
|
+
# Nop
|
56
|
+
else
|
57
|
+
array = other
|
58
|
+
end
|
59
|
+
|
60
|
+
hash = array.each_with_object({}) do |e, h|
|
61
|
+
df =
|
62
|
+
case e
|
63
|
+
when Arrow::Table
|
64
|
+
DataFrame.create(e)
|
65
|
+
when DataFrame
|
66
|
+
e
|
67
|
+
else
|
68
|
+
raise DataFrameArgumentError, "#{e} is not a Table or a DataFrame"
|
69
|
+
end
|
70
|
+
|
71
|
+
if size != df.size
|
72
|
+
raise DataFrameArgumentError, "#{e} do not have same size as self"
|
73
|
+
end
|
74
|
+
|
75
|
+
k = keys.intersection(df.keys).any?
|
76
|
+
raise DataFrameArgumentError, "There are some shared keys: #{k}" if k
|
77
|
+
|
78
|
+
h.merge!(df.to_h)
|
79
|
+
end
|
80
|
+
|
81
|
+
assign(hash)
|
82
|
+
end
|
83
|
+
|
84
|
+
alias_method :bind_cols, :merge
|
85
|
+
|
86
|
+
# Mutating joins (#inner_join, #full_join, #left_join, #right_join)
|
87
|
+
|
88
|
+
# Join another DataFrame or Table, leaving only the matching records.
|
89
|
+
# - Same as `#join` with `type: :inner`
|
90
|
+
# - A kind of mutating join.
|
91
|
+
#
|
92
|
+
# @!macro join_before
|
93
|
+
# @param other [DataFrame, Arrow::Table]
|
94
|
+
# A DataFrame or a Table to be joined with self.
|
95
|
+
#
|
96
|
+
# @!macro join_after
|
97
|
+
# @param suffix [#succ]
|
98
|
+
# a suffix to rename keys when key names conflict as a result of join.
|
99
|
+
# `suffix` must be responsible to `#succ`.
|
100
|
+
# @return [DataFrame]
|
101
|
+
# Joined dataframe.
|
102
|
+
#
|
103
|
+
# @!macro join_key_in_array
|
104
|
+
# @param join_keys [String, Symbol, Array<String, Symbol>]
|
105
|
+
# A key or keys to match.
|
106
|
+
#
|
107
|
+
# @!macro join_key_in_hash
|
108
|
+
# @param join_key_pairs [Hash]
|
109
|
+
# Pairs of a key name or key names to match in left and right.
|
110
|
+
# @option join_key_pairs [String, Symbol, Array<String, Symbol>] :left
|
111
|
+
# Join keys in `self`.
|
112
|
+
# @option join_key_pairs [String, Symbol, Array<String, Symbol>] :right
|
113
|
+
# Join keys in `other`.
|
114
|
+
#
|
115
|
+
# @overload inner_join(other, suffix: '.1')
|
116
|
+
# If `join_key` is not specified, common keys in self and other are used
|
117
|
+
# (natural keys). Returns joined dataframe.
|
118
|
+
#
|
119
|
+
# @macro join_before
|
120
|
+
# @macro join_after
|
121
|
+
#
|
122
|
+
# @overload inner_join(other, join_keys, suffix: '.1')
|
123
|
+
#
|
124
|
+
# @macro join_before
|
125
|
+
# @macro join_key_in_array
|
126
|
+
# @macro join_after
|
127
|
+
#
|
128
|
+
# @overload inner_join(other, join_key_pairs, suffix: '.1')
|
129
|
+
#
|
130
|
+
# @macro join_before
|
131
|
+
# @macro join_key_in_hash
|
132
|
+
# @macro join_after
|
133
|
+
#
|
134
|
+
def inner_join(other, join_keys = nil, suffix: '.1')
|
135
|
+
join(other, join_keys, type: :inner, suffix: suffix)
|
136
|
+
end
|
137
|
+
|
138
|
+
# Join another DataFrame or Table, leaving all records.
|
139
|
+
# - Same as `#join` with `type: :full_outer`
|
140
|
+
# - A kind of mutating join.
|
141
|
+
#
|
142
|
+
# @overload full_join(other, suffix: '.1')
|
143
|
+
# If `join_key` is not specified, common keys in self and other are used
|
144
|
+
# (natural keys). Returns joined dataframe.
|
145
|
+
#
|
146
|
+
# @macro join_before
|
147
|
+
# @macro join_after
|
148
|
+
#
|
149
|
+
# @overload full_join(other, join_keys, suffix: '.1')
|
150
|
+
#
|
151
|
+
# @macro join_before
|
152
|
+
# @macro join_key_in_array
|
153
|
+
# @macro join_after
|
154
|
+
#
|
155
|
+
# @overload full_join(other, join_key_pairs, suffix: '.1')
|
156
|
+
#
|
157
|
+
# @macro join_before
|
158
|
+
# @macro join_key_in_hash
|
159
|
+
# @macro join_after
|
160
|
+
#
|
161
|
+
def full_join(other, join_keys = nil, suffix: '.1')
|
162
|
+
join(other, join_keys, type: :full_outer, suffix: suffix)
|
163
|
+
end
|
164
|
+
|
165
|
+
alias_method :outer_join, :full_join
|
166
|
+
|
167
|
+
# Join matching values to self from other.
|
168
|
+
# - Same as `#join` with `type: :left_outer`
|
169
|
+
# - A kind of mutating join.
|
170
|
+
#
|
171
|
+
# @overload left_join(other, suffix: '.1')
|
172
|
+
# If `join_key` is not specified, common keys in self and other are used
|
173
|
+
# (natural keys). Returns joined dataframe.
|
174
|
+
#
|
175
|
+
# @macro join_before
|
176
|
+
# @macro join_after
|
177
|
+
#
|
178
|
+
# @overload left_join(other, join_keys, suffix: '.1')
|
179
|
+
#
|
180
|
+
# @macro join_before
|
181
|
+
# @macro join_key_in_array
|
182
|
+
# @macro join_after
|
183
|
+
#
|
184
|
+
# @overload left_join(other, join_key_pairs, suffix: '.1')
|
185
|
+
#
|
186
|
+
# @macro join_before
|
187
|
+
# @macro join_key_in_hash
|
188
|
+
# @macro join_after
|
189
|
+
#
|
190
|
+
def left_join(other, join_keys = nil, suffix: '.1')
|
191
|
+
join(other, join_keys, type: :left_outer, suffix: suffix)
|
192
|
+
end
|
193
|
+
|
194
|
+
# Join matching values from self to other.
|
195
|
+
# - Same as `#join` with `type: :right_outer`
|
196
|
+
# - A kind of mutating join.
|
197
|
+
#
|
198
|
+
# @overload right_join(other, suffix: '.1')
|
199
|
+
# If `join_key` is not specified, common keys in self and other are used
|
200
|
+
# (natural keys). Returns joined dataframe.
|
201
|
+
#
|
202
|
+
# @macro join_before
|
203
|
+
# @macro join_after
|
204
|
+
#
|
205
|
+
# @overload right_join(other, join_keys, suffix: '.1')
|
206
|
+
#
|
207
|
+
# @macro join_before
|
208
|
+
# @macro join_key_in_array
|
209
|
+
# @macro join_after
|
210
|
+
#
|
211
|
+
# @overload right_join(other, join_key_pairs, suffix: '.1')
|
212
|
+
#
|
213
|
+
# @macro join_before
|
214
|
+
# @macro join_key_in_hash
|
215
|
+
# @macro join_after
|
216
|
+
#
|
217
|
+
def right_join(other, join_keys = nil, suffix: '.1')
|
218
|
+
join(other, join_keys, type: :right_outer, suffix: suffix)
|
219
|
+
end
|
220
|
+
|
221
|
+
# Filtering joins (#semi_join, #anti_join)
|
222
|
+
|
223
|
+
# Return records of self that have a match in other.
|
224
|
+
# - Same as `#join` with `type: :left_semi`
|
225
|
+
# - A kind of filtering join.
|
226
|
+
#
|
227
|
+
# @overload semi_join(other, suffix: '.1')
|
228
|
+
# If `join_key` is not specified, common keys in self and other are used
|
229
|
+
# (natural keys). Returns joined dataframe.
|
230
|
+
#
|
231
|
+
# @macro join_before
|
232
|
+
# @macro join_after
|
233
|
+
#
|
234
|
+
# @overload semi_join(other, join_keys, suffix: '.1')
|
235
|
+
#
|
236
|
+
# @macro join_before
|
237
|
+
# @macro join_key_in_array
|
238
|
+
# @macro join_after
|
239
|
+
#
|
240
|
+
# @overload semi_join(other, join_key_pairs, suffix: '.1')
|
241
|
+
#
|
242
|
+
# @macro join_before
|
243
|
+
# @macro join_key_in_hash
|
244
|
+
# @macro join_after
|
245
|
+
#
|
246
|
+
def semi_join(other, join_keys = nil, suffix: '.1')
|
247
|
+
join(other, join_keys, type: :left_semi, suffix: suffix)
|
248
|
+
end
|
249
|
+
|
250
|
+
# Return records of self that do not have a match in other.
|
251
|
+
# - Same as `#join` with `type: :left_anti`
|
252
|
+
# - A kind of filtering join.
|
253
|
+
#
|
254
|
+
# @overload anti_join(other, suffix: '.1')
|
255
|
+
# If `join_key` is not specified, common keys in self and other are used
|
256
|
+
# (natural keys). Returns joined dataframe.
|
257
|
+
#
|
258
|
+
# @macro join_before
|
259
|
+
# @macro join_after
|
260
|
+
#
|
261
|
+
# @overload anti_join(other, join_keys, suffix: '.1')
|
262
|
+
#
|
263
|
+
# @macro join_before
|
264
|
+
# @macro join_key_in_array
|
265
|
+
# @macro join_after
|
266
|
+
#
|
267
|
+
# @overload anti_join(other, join_key_pairs, suffix: '.1')
|
268
|
+
#
|
269
|
+
# @macro join_before
|
270
|
+
# @macro join_key_in_hash
|
271
|
+
# @macro join_after
|
272
|
+
#
|
273
|
+
def anti_join(other, join_keys = nil, suffix: '.1')
|
274
|
+
join(other, join_keys, type: :left_anti, suffix: suffix)
|
275
|
+
end
|
276
|
+
|
277
|
+
# Set operations (#intersect, #union, #difference, #set_operable?)
|
278
|
+
|
279
|
+
# Check if set operation with self and other is possible.
|
280
|
+
#
|
281
|
+
# @macro join_before
|
282
|
+
#
|
283
|
+
# @return [Boolean] true if set operation is possible.
|
284
|
+
#
|
285
|
+
def set_operable?(other) # rubocop:disable Naming/AccessorMethodName
|
286
|
+
keys == other.keys.map(&:to_sym)
|
287
|
+
end
|
288
|
+
|
289
|
+
# Select records appearing in both self and other.
|
290
|
+
# - Same as `#join` with `type: :inner` when keys in self are same with other.
|
291
|
+
# - A kind of set operations.
|
292
|
+
#
|
293
|
+
# @macro join_before
|
294
|
+
#
|
295
|
+
# @return [DataFrame] Joined dataframe.
|
296
|
+
#
|
297
|
+
def intersect(other)
|
298
|
+
unless keys == other.keys.map(&:to_sym)
|
299
|
+
raise DataFrameArgumentError, 'keys are not same with self and other'
|
300
|
+
end
|
301
|
+
|
302
|
+
join(other, keys, type: :inner)
|
303
|
+
end
|
304
|
+
|
305
|
+
# Select records appearing in self or other.
|
306
|
+
# - Same as `#join` with `type: :full_outer` when keys in self are same with other.
|
307
|
+
# - A kind of set operations.
|
308
|
+
#
|
309
|
+
# @macro join_before
|
310
|
+
#
|
311
|
+
# @return [DataFrame] Joined dataframe.
|
312
|
+
#
|
313
|
+
def union(other)
|
314
|
+
unless keys == other.keys.map(&:to_sym)
|
315
|
+
raise DataFrameArgumentError, 'keys are not same with self and other'
|
316
|
+
end
|
317
|
+
|
318
|
+
join(other, keys, type: :full_outer)
|
319
|
+
end
|
320
|
+
|
321
|
+
# Select records appearing in self but not in other.
|
322
|
+
# - Same as `#join` with `type: :left_anti` when keys in self are same with other.
|
323
|
+
# - A kind of set operations.
|
324
|
+
#
|
325
|
+
# @macro join_before
|
326
|
+
#
|
327
|
+
# @return [DataFrame] Joined dataframe.
|
328
|
+
#
|
329
|
+
def difference(other)
|
330
|
+
unless keys == other.keys.map(&:to_sym)
|
331
|
+
raise DataFrameArgumentError, 'keys are not same with self and other'
|
332
|
+
end
|
333
|
+
|
334
|
+
join(other, keys, type: :left_anti)
|
335
|
+
end
|
336
|
+
|
337
|
+
alias_method :setdiff, :difference
|
338
|
+
|
339
|
+
# Join another DataFrame or Table to self.
|
340
|
+
#
|
341
|
+
# @overload join(other, type: :inner, suffix: '.1')
|
342
|
+
#
|
343
|
+
# If `join_key` is not specified, common keys in self and other are used
|
344
|
+
# (natural keys). Returns joined dataframe.
|
345
|
+
#
|
346
|
+
# @!macro join_common_type
|
347
|
+
# @param type [:left_semi, :right_semi, :left_anti, :right_anti, :inner,
|
348
|
+
# left_outer, :right_outer, :full_outer] type of join.
|
349
|
+
#
|
350
|
+
# @macro join_before
|
351
|
+
# @macro join_common_type
|
352
|
+
# @macro join_after
|
353
|
+
#
|
354
|
+
# @overload join(other, join_keys, type: :inner, suffix: '.1')
|
355
|
+
#
|
356
|
+
# @macro join_before
|
357
|
+
# @macro join_key_in_array
|
358
|
+
# @macro join_common_type
|
359
|
+
# @macro join_after
|
360
|
+
#
|
361
|
+
# @overload join(other, join_key_pairs, type: :inner, suffix: '.1')
|
362
|
+
#
|
363
|
+
# @macro join_before
|
364
|
+
# @macro join_key_in_hash
|
365
|
+
# @macro join_common_type
|
366
|
+
# @macro join_after
|
367
|
+
#
|
368
|
+
def join(other, join_keys = nil, type: :inner, suffix: '.1')
|
369
|
+
case other
|
370
|
+
when DataFrame
|
371
|
+
other = other.table
|
372
|
+
when Arrow::Table
|
373
|
+
# Nop
|
374
|
+
else
|
375
|
+
raise DataFrameArgumentError, 'other must be a DataFrame or an Arrow::Table'
|
376
|
+
end
|
377
|
+
|
378
|
+
table_keys = table.keys
|
379
|
+
other_keys = other.keys
|
380
|
+
type = type.to_sym
|
381
|
+
|
382
|
+
# natural keys (implicit common keys)
|
383
|
+
join_keys ||= table_keys.intersection(other_keys)
|
384
|
+
|
385
|
+
# This is not necessary if additional procedure is contributed to Red Arrow.
|
386
|
+
if join_keys.is_a?(Hash)
|
387
|
+
left_keys = join_keys[:left]
|
388
|
+
right_keys = join_keys[:right]
|
389
|
+
else
|
390
|
+
left_keys = join_keys
|
391
|
+
right_keys = join_keys
|
392
|
+
end
|
393
|
+
left_keys = Array(left_keys).map(&:to_s)
|
394
|
+
right_keys = Array(right_keys).map(&:to_s)
|
395
|
+
|
396
|
+
case type
|
397
|
+
when :full_outer, :left_semi, :left_anti, :right_semi, :right_anti
|
398
|
+
left_outputs = nil
|
399
|
+
right_outputs = nil
|
400
|
+
when :inner, :left_outer
|
401
|
+
left_outputs = table_keys
|
402
|
+
right_outputs = other_keys - right_keys
|
403
|
+
when :right_outer
|
404
|
+
left_outputs = table_keys - left_keys
|
405
|
+
right_outputs = other_keys
|
406
|
+
end
|
407
|
+
|
408
|
+
# Should we rescue errors in Arrow::Table#join for usability ?
|
409
|
+
joined_table =
|
410
|
+
table.join(other, join_keys,
|
411
|
+
type: type,
|
412
|
+
left_outputs: left_outputs,
|
413
|
+
right_outputs: right_outputs)
|
414
|
+
|
415
|
+
case type
|
416
|
+
when :inner, :left_outer, :left_semi, :left_anti, :right_semi, :right_anti
|
417
|
+
if joined_table.keys.uniq!
|
418
|
+
DataFrame.create(rename_table(joined_table, n_keys, suffix))
|
419
|
+
else
|
420
|
+
DataFrame.create(joined_table)
|
421
|
+
end
|
422
|
+
when :full_outer
|
423
|
+
renamed_table = rename_table(joined_table, n_keys, suffix)
|
424
|
+
renamed_keys = renamed_table.keys
|
425
|
+
dropper = []
|
426
|
+
DataFrame.create(renamed_table).assign do |df|
|
427
|
+
left_keys.map do |left_key|
|
428
|
+
i_left_key = renamed_keys.index(left_key)
|
429
|
+
right_key = renamed_keys[i_left_key + table_keys.size]
|
430
|
+
dropper << right_key
|
431
|
+
[left_key.to_sym, merge_array(df[left_key].data, df[right_key].data)]
|
432
|
+
end
|
433
|
+
end.drop(dropper)
|
434
|
+
when :right_outer
|
435
|
+
if joined_table.keys.uniq!
|
436
|
+
DataFrame.create(rename_table(joined_table, left_outputs.size, suffix))
|
437
|
+
else
|
438
|
+
DataFrame.create(joined_table)
|
439
|
+
end.pick do
|
440
|
+
[right_keys, keys.map(&:to_s) - right_keys]
|
441
|
+
end
|
442
|
+
end
|
443
|
+
end
|
444
|
+
|
445
|
+
private
|
446
|
+
|
447
|
+
# Rename duplicate keys by suffix
|
448
|
+
def rename_table(joined_table, n_keys, suffix)
|
449
|
+
joined_keys = joined_table.keys
|
450
|
+
other_keys = joined_keys[n_keys..]
|
451
|
+
|
452
|
+
dup_keys = joined_keys.tally.select { |_, v| v > 1 }.keys
|
453
|
+
renamed_right_keys =
|
454
|
+
other_keys.map do |key|
|
455
|
+
if dup_keys.include?(key)
|
456
|
+
new_key = nil
|
457
|
+
loop do
|
458
|
+
new_key = "#{key}#{suffix}"
|
459
|
+
break unless joined_keys.include?(new_key)
|
460
|
+
|
461
|
+
s = suffix.succ
|
462
|
+
raise DataFrameArgumentError, "suffix #{suffix} is invalid" if s == suffix
|
463
|
+
|
464
|
+
suffix = s
|
465
|
+
end
|
466
|
+
new_key
|
467
|
+
else
|
468
|
+
key
|
469
|
+
end
|
470
|
+
end
|
471
|
+
joined_keys[n_keys..] = renamed_right_keys
|
472
|
+
|
473
|
+
fields =
|
474
|
+
joined_keys.map.with_index do |k, i|
|
475
|
+
Arrow::Field.new(k, joined_table[i].data_type)
|
476
|
+
end
|
477
|
+
Arrow::Table.new(Arrow::Schema.new(fields), joined_table.columns)
|
478
|
+
end
|
479
|
+
|
480
|
+
# Merge two Arrow::Arrays
|
481
|
+
def merge_array(array1, array2)
|
482
|
+
t = Arrow::Function.find(:is_null).execute([array1])
|
483
|
+
Arrow::Function.find(:if_else).execute([t, array2, array1]).value
|
484
|
+
end
|
485
|
+
end
|
486
|
+
end
|
@@ -93,7 +93,8 @@ module RedAmber
|
|
93
93
|
levels = tallys.map(&:size)
|
94
94
|
type_groups = @table.columns.map { |column| type_group(column.data_type) }
|
95
95
|
quoted_keys = keys.map(&:inspect)
|
96
|
-
headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
|
96
|
+
headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
|
97
|
+
data: 'data_preview' }
|
97
98
|
header_format = make_header_format(levels, headers, quoted_keys)
|
98
99
|
|
99
100
|
sio = StringIO.new # output string buffer
|
@@ -174,6 +175,8 @@ module RedAmber
|
|
174
175
|
end
|
175
176
|
|
176
177
|
def format_table(width: 80, head: 5, tail: 3, n_digit: 2)
|
178
|
+
return " #{keys.join(' ')}\n (Empty Vectors)\n" if size.zero?
|
179
|
+
|
177
180
|
original = self
|
178
181
|
indices = size > head + tail ? [*0..head, *(size - tail)...size] : [*0...size]
|
179
182
|
df = slice(indices).assign do
|
@@ -199,7 +202,8 @@ module RedAmber
|
|
199
202
|
vectors.each_with_object({}) do |v, assigner|
|
200
203
|
vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
|
201
204
|
.replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
|
202
|
-
assigner[v.key] =
|
205
|
+
assigner[v.key] =
|
206
|
+
original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
|
203
207
|
end
|
204
208
|
end
|
205
209
|
|
@@ -263,8 +267,6 @@ module RedAmber
|
|
263
267
|
format('%g', element)
|
264
268
|
in Integer
|
265
269
|
format('%d', element)
|
266
|
-
else
|
267
|
-
element
|
268
270
|
end
|
269
271
|
end
|
270
272
|
end
|
@@ -18,7 +18,7 @@ module RedAmber
|
|
18
18
|
# @return [RedAmber::Vector] Sorted indices in Vector
|
19
19
|
def sort_indices(*sort_keys)
|
20
20
|
indices = @table.sort_indices(sort_keys.flatten)
|
21
|
-
Vector.
|
21
|
+
Vector.create(indices)
|
22
22
|
end
|
23
23
|
|
24
24
|
# @return [RedAmber::DataFrame] Sorted DataFrame
|
@@ -32,7 +32,7 @@ module RedAmber
|
|
32
32
|
|
33
33
|
def new_dataframe_by(index_array)
|
34
34
|
t = Arrow::Function.find(:take).execute([@table, index_array]).value
|
35
|
-
|
35
|
+
DataFrame.create(t)
|
36
36
|
end
|
37
37
|
end
|
38
38
|
end
|
@@ -17,14 +17,17 @@ module RedAmber
|
|
17
17
|
end
|
18
18
|
|
19
19
|
# Save DataFrame
|
20
|
+
#
|
21
|
+
# @return [DataFrame] self.
|
20
22
|
def save(output, options = {})
|
21
23
|
@table.save(output, options)
|
24
|
+
self
|
22
25
|
end
|
23
26
|
|
24
27
|
# Save and reload to cast automatically
|
25
28
|
# Via tsv format file temporally as default
|
26
29
|
#
|
27
|
-
#
|
30
|
+
# @note experimental feature
|
28
31
|
def auto_cast(format: :tsv)
|
29
32
|
return self if empty?
|
30
33
|
|
@@ -8,11 +8,14 @@ module RedAmber
|
|
8
8
|
# @param key [Symbol] key of the index column
|
9
9
|
# to transepose into keys.
|
10
10
|
# If it is not specified, keys[0] is used.
|
11
|
-
# @param
|
12
|
-
# If it is not specified, :NAME is used.
|
11
|
+
# @param name [Symbol] key name of transposed index column.
|
12
|
+
# If it is not specified, :NAME is used.
|
13
|
+
# If it already exists, :NAME1 or :NAME1.succ is used.
|
13
14
|
# @return [DataFrame] trnsposed DataFrame
|
14
15
|
def transpose(key: keys.first, name: :NAME)
|
15
|
-
|
16
|
+
unless keys.include?(key)
|
17
|
+
raise DataFrameArgumentError, "Self does not include: #{key}"
|
18
|
+
end
|
16
19
|
|
17
20
|
# Find unused name
|
18
21
|
new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
|
@@ -35,14 +38,24 @@ module RedAmber
|
|
35
38
|
# @param value [Symbol, String] key of the column which is come **from values**.
|
36
39
|
# @return [DataFrame] long DataFrame.
|
37
40
|
def to_long(*keep_keys, name: :NAME, value: :VALUE)
|
41
|
+
warn('[Info] No key to keep is specified.') if keep_keys.empty?
|
42
|
+
|
38
43
|
not_included = keep_keys - keys
|
39
|
-
|
44
|
+
unless not_included.empty?
|
45
|
+
raise DataFrameArgumentError, "Not have keys #{not_included}"
|
46
|
+
end
|
40
47
|
|
41
48
|
name = name.to_sym
|
42
|
-
|
49
|
+
if keep_keys.include?(name)
|
50
|
+
raise DataFrameArgumentError,
|
51
|
+
"Can't specify the key: #{name} for the column from keys."
|
52
|
+
end
|
43
53
|
|
44
54
|
value = value.to_sym
|
45
|
-
|
55
|
+
if keep_keys.include?(value)
|
56
|
+
raise DataFrameArgumentError,
|
57
|
+
"Can't specify the key: #{value} for the column from values."
|
58
|
+
end
|
46
59
|
|
47
60
|
hash = Hash.new { |h, k| h[k] = [] }
|
48
61
|
l = keys.size - keep_keys.size
|
@@ -62,15 +75,27 @@ module RedAmber
|
|
62
75
|
|
63
76
|
# Reshape long DataFrame to a wide DataFrame.
|
64
77
|
#
|
65
|
-
# @param name [Symbol, String]
|
66
|
-
#
|
78
|
+
# @param name [Symbol, String]
|
79
|
+
# key of the column which will be expanded **to key names**.
|
80
|
+
# @param value [Symbol, String]
|
81
|
+
# key of the column which will be expanded **to values**.
|
67
82
|
# @return [DataFrame] wide DataFrame.
|
68
83
|
def to_wide(name: :NAME, value: :VALUE)
|
69
84
|
name = name.to_sym
|
70
|
-
|
85
|
+
unless keys.include?(name)
|
86
|
+
raise DataFrameArgumentError,
|
87
|
+
"You are going to keep the key: #{name}. " \
|
88
|
+
'You may need to specify the column name ' \
|
89
|
+
'that gives the new keys by `:name` option.'
|
90
|
+
end
|
71
91
|
|
72
92
|
value = value.to_sym
|
73
|
-
|
93
|
+
unless keys.include?(value)
|
94
|
+
raise DataFrameArgumentError,
|
95
|
+
"You are going to keep the key: #{value}. " \
|
96
|
+
'You may need to specify the column name ' \
|
97
|
+
'that gives the new values by `:value` option.'
|
98
|
+
end
|
74
99
|
|
75
100
|
hash = Hash.new { |h, k| h[k] = {} }
|
76
101
|
keep_keys = keys - [name, value]
|