red_amber 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +111 -48
- data/CHANGELOG.md +90 -1
- data/Gemfile +1 -0
- data/README.md +42 -25
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +60 -0
- data/doc/DataFrame.md +3 -0
- data/doc/Vector.md +88 -0
- data/lib/red_amber/data_frame.rb +161 -46
- data/lib/red_amber/data_frame_combinable.rb +304 -101
- data/lib/red_amber/data_frame_displayable.rb +4 -4
- data/lib/red_amber/data_frame_indexable.rb +2 -2
- data/lib/red_amber/data_frame_loadsave.rb +4 -1
- data/lib/red_amber/data_frame_reshaping.rb +35 -10
- data/lib/red_amber/data_frame_selectable.rb +221 -116
- data/lib/red_amber/data_frame_variable_operation.rb +146 -82
- data/lib/red_amber/group.rb +16 -7
- data/lib/red_amber/helper.rb +53 -31
- data/lib/red_amber/refinements.rb +199 -0
- data/lib/red_amber/vector.rb +55 -52
- data/lib/red_amber/vector_functions.rb +23 -75
- data/lib/red_amber/vector_selectable.rb +116 -69
- data/lib/red_amber/vector_updatable.rb +136 -7
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +3 -2
- metadata +11 -8
@@ -3,6 +3,9 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# mix-in for the class DataFrame
|
5
5
|
module DataFrameCombinable
|
6
|
+
# Refinements for Arrow::Table
|
7
|
+
using RefineArrowTable
|
8
|
+
|
6
9
|
# Concatenate other dataframe onto the bottom.
|
7
10
|
#
|
8
11
|
# @param other [DataFrame, Arrow::Table, Array<DataFrame, Arrow::Table>]
|
@@ -30,7 +33,7 @@ module RedAmber
|
|
30
33
|
end
|
31
34
|
end
|
32
35
|
|
33
|
-
DataFrame.
|
36
|
+
DataFrame.create(table.concatenate(table_array))
|
34
37
|
end
|
35
38
|
|
36
39
|
alias_method :concat, :concatenate
|
@@ -58,14 +61,16 @@ module RedAmber
|
|
58
61
|
df =
|
59
62
|
case e
|
60
63
|
when Arrow::Table
|
61
|
-
DataFrame.
|
64
|
+
DataFrame.create(e)
|
62
65
|
when DataFrame
|
63
66
|
e
|
64
67
|
else
|
65
68
|
raise DataFrameArgumentError, "#{e} is not a Table or a DataFrame"
|
66
69
|
end
|
67
70
|
|
68
|
-
|
71
|
+
if size != df.size
|
72
|
+
raise DataFrameArgumentError, "#{e} do not have same size as self"
|
73
|
+
end
|
69
74
|
|
70
75
|
k = keys.intersection(df.keys).any?
|
71
76
|
raise DataFrameArgumentError, "There are some shared keys: #{k}" if k
|
@@ -78,23 +83,80 @@ module RedAmber
|
|
78
83
|
|
79
84
|
alias_method :bind_cols, :merge
|
80
85
|
|
81
|
-
# Mutating joins
|
86
|
+
# Mutating joins (#inner_join, #full_join, #left_join, #right_join)
|
82
87
|
|
83
|
-
# Join
|
88
|
+
# Join another DataFrame or Table, leaving only the matching records.
|
89
|
+
# - Same as `#join` with `type: :inner`
|
90
|
+
# - A kind of mutating join.
|
84
91
|
#
|
85
|
-
#
|
86
|
-
#
|
87
|
-
#
|
92
|
+
# @!macro join_before
|
93
|
+
# @param other [DataFrame, Arrow::Table]
|
94
|
+
# A DataFrame or a Table to be joined with self.
|
95
|
+
#
|
96
|
+
# @!macro join_after
|
97
|
+
# @param suffix [#succ]
|
98
|
+
# a suffix to rename keys when key names conflict as a result of join.
|
99
|
+
# `suffix` must be responsible to `#succ`.
|
100
|
+
# @return [DataFrame]
|
101
|
+
# Joined dataframe.
|
102
|
+
#
|
103
|
+
# @!macro join_key_in_array
|
104
|
+
# @param join_keys [String, Symbol, Array<String, Symbol>]
|
105
|
+
# A key or keys to match.
|
106
|
+
#
|
107
|
+
# @!macro join_key_in_hash
|
108
|
+
# @param join_key_pairs [Hash]
|
109
|
+
# Pairs of a key name or key names to match in left and right.
|
110
|
+
# @option join_key_pairs [String, Symbol, Array<String, Symbol>] :left
|
111
|
+
# Join keys in `self`.
|
112
|
+
# @option join_key_pairs [String, Symbol, Array<String, Symbol>] :right
|
113
|
+
# Join keys in `other`.
|
114
|
+
#
|
115
|
+
# @overload inner_join(other, suffix: '.1')
|
116
|
+
# If `join_key` is not specified, common keys in self and other are used
|
117
|
+
# (natural keys). Returns joined dataframe.
|
118
|
+
#
|
119
|
+
# @macro join_before
|
120
|
+
# @macro join_after
|
121
|
+
#
|
122
|
+
# @overload inner_join(other, join_keys, suffix: '.1')
|
123
|
+
#
|
124
|
+
# @macro join_before
|
125
|
+
# @macro join_key_in_array
|
126
|
+
# @macro join_after
|
127
|
+
#
|
128
|
+
# @overload inner_join(other, join_key_pairs, suffix: '.1')
|
129
|
+
#
|
130
|
+
# @macro join_before
|
131
|
+
# @macro join_key_in_hash
|
132
|
+
# @macro join_after
|
88
133
|
#
|
89
134
|
def inner_join(other, join_keys = nil, suffix: '.1')
|
90
135
|
join(other, join_keys, type: :inner, suffix: suffix)
|
91
136
|
end
|
92
137
|
|
93
|
-
# Join
|
138
|
+
# Join another DataFrame or Table, leaving all records.
|
139
|
+
# - Same as `#join` with `type: :full_outer`
|
140
|
+
# - A kind of mutating join.
|
94
141
|
#
|
95
|
-
# @
|
96
|
-
#
|
97
|
-
#
|
142
|
+
# @overload full_join(other, suffix: '.1')
|
143
|
+
# If `join_key` is not specified, common keys in self and other are used
|
144
|
+
# (natural keys). Returns joined dataframe.
|
145
|
+
#
|
146
|
+
# @macro join_before
|
147
|
+
# @macro join_after
|
148
|
+
#
|
149
|
+
# @overload full_join(other, join_keys, suffix: '.1')
|
150
|
+
#
|
151
|
+
# @macro join_before
|
152
|
+
# @macro join_key_in_array
|
153
|
+
# @macro join_after
|
154
|
+
#
|
155
|
+
# @overload full_join(other, join_key_pairs, suffix: '.1')
|
156
|
+
#
|
157
|
+
# @macro join_before
|
158
|
+
# @macro join_key_in_hash
|
159
|
+
# @macro join_after
|
98
160
|
#
|
99
161
|
def full_join(other, join_keys = nil, suffix: '.1')
|
100
162
|
join(other, join_keys, type: :full_outer, suffix: suffix)
|
@@ -103,181 +165,322 @@ module RedAmber
|
|
103
165
|
alias_method :outer_join, :full_join
|
104
166
|
|
105
167
|
# Join matching values to self from other.
|
168
|
+
# - Same as `#join` with `type: :left_outer`
|
169
|
+
# - A kind of mutating join.
|
106
170
|
#
|
107
|
-
# @
|
108
|
-
#
|
109
|
-
#
|
171
|
+
# @overload left_join(other, suffix: '.1')
|
172
|
+
# If `join_key` is not specified, common keys in self and other are used
|
173
|
+
# (natural keys). Returns joined dataframe.
|
174
|
+
#
|
175
|
+
# @macro join_before
|
176
|
+
# @macro join_after
|
177
|
+
#
|
178
|
+
# @overload left_join(other, join_keys, suffix: '.1')
|
179
|
+
#
|
180
|
+
# @macro join_before
|
181
|
+
# @macro join_key_in_array
|
182
|
+
# @macro join_after
|
183
|
+
#
|
184
|
+
# @overload left_join(other, join_key_pairs, suffix: '.1')
|
185
|
+
#
|
186
|
+
# @macro join_before
|
187
|
+
# @macro join_key_in_hash
|
188
|
+
# @macro join_after
|
110
189
|
#
|
111
190
|
def left_join(other, join_keys = nil, suffix: '.1')
|
112
191
|
join(other, join_keys, type: :left_outer, suffix: suffix)
|
113
192
|
end
|
114
193
|
|
115
194
|
# Join matching values from self to other.
|
195
|
+
# - Same as `#join` with `type: :right_outer`
|
196
|
+
# - A kind of mutating join.
|
116
197
|
#
|
117
|
-
# @
|
118
|
-
#
|
119
|
-
#
|
198
|
+
# @overload right_join(other, suffix: '.1')
|
199
|
+
# If `join_key` is not specified, common keys in self and other are used
|
200
|
+
# (natural keys). Returns joined dataframe.
|
201
|
+
#
|
202
|
+
# @macro join_before
|
203
|
+
# @macro join_after
|
204
|
+
#
|
205
|
+
# @overload right_join(other, join_keys, suffix: '.1')
|
206
|
+
#
|
207
|
+
# @macro join_before
|
208
|
+
# @macro join_key_in_array
|
209
|
+
# @macro join_after
|
210
|
+
#
|
211
|
+
# @overload right_join(other, join_key_pairs, suffix: '.1')
|
212
|
+
#
|
213
|
+
# @macro join_before
|
214
|
+
# @macro join_key_in_hash
|
215
|
+
# @macro join_after
|
120
216
|
#
|
121
217
|
def right_join(other, join_keys = nil, suffix: '.1')
|
122
218
|
join(other, join_keys, type: :right_outer, suffix: suffix)
|
123
219
|
end
|
124
220
|
|
125
|
-
# Filtering joins
|
221
|
+
# Filtering joins (#semi_join, #anti_join)
|
126
222
|
|
127
223
|
# Return records of self that have a match in other.
|
224
|
+
# - Same as `#join` with `type: :left_semi`
|
225
|
+
# - A kind of filtering join.
|
128
226
|
#
|
129
|
-
# @
|
130
|
-
#
|
131
|
-
#
|
227
|
+
# @overload semi_join(other, suffix: '.1')
|
228
|
+
# If `join_key` is not specified, common keys in self and other are used
|
229
|
+
# (natural keys). Returns joined dataframe.
|
230
|
+
#
|
231
|
+
# @macro join_before
|
232
|
+
# @macro join_after
|
233
|
+
#
|
234
|
+
# @overload semi_join(other, join_keys, suffix: '.1')
|
235
|
+
#
|
236
|
+
# @macro join_before
|
237
|
+
# @macro join_key_in_array
|
238
|
+
# @macro join_after
|
239
|
+
#
|
240
|
+
# @overload semi_join(other, join_key_pairs, suffix: '.1')
|
241
|
+
#
|
242
|
+
# @macro join_before
|
243
|
+
# @macro join_key_in_hash
|
244
|
+
# @macro join_after
|
132
245
|
#
|
133
246
|
def semi_join(other, join_keys = nil, suffix: '.1')
|
134
247
|
join(other, join_keys, type: :left_semi, suffix: suffix)
|
135
248
|
end
|
136
249
|
|
137
250
|
# Return records of self that do not have a match in other.
|
251
|
+
# - Same as `#join` with `type: :left_anti`
|
252
|
+
# - A kind of filtering join.
|
138
253
|
#
|
139
|
-
# @
|
140
|
-
#
|
141
|
-
#
|
254
|
+
# @overload anti_join(other, suffix: '.1')
|
255
|
+
# If `join_key` is not specified, common keys in self and other are used
|
256
|
+
# (natural keys). Returns joined dataframe.
|
257
|
+
#
|
258
|
+
# @macro join_before
|
259
|
+
# @macro join_after
|
260
|
+
#
|
261
|
+
# @overload anti_join(other, join_keys, suffix: '.1')
|
262
|
+
#
|
263
|
+
# @macro join_before
|
264
|
+
# @macro join_key_in_array
|
265
|
+
# @macro join_after
|
266
|
+
#
|
267
|
+
# @overload anti_join(other, join_key_pairs, suffix: '.1')
|
268
|
+
#
|
269
|
+
# @macro join_before
|
270
|
+
# @macro join_key_in_hash
|
271
|
+
# @macro join_after
|
142
272
|
#
|
143
273
|
def anti_join(other, join_keys = nil, suffix: '.1')
|
144
274
|
join(other, join_keys, type: :left_anti, suffix: suffix)
|
145
275
|
end
|
146
276
|
|
147
|
-
# Set operations
|
277
|
+
# Set operations (#intersect, #union, #difference, #set_operable?)
|
148
278
|
|
149
279
|
# Check if set operation with self and other is possible.
|
150
280
|
#
|
151
|
-
# @
|
281
|
+
# @macro join_before
|
282
|
+
#
|
152
283
|
# @return [Boolean] true if set operation is possible.
|
153
284
|
#
|
154
285
|
def set_operable?(other) # rubocop:disable Naming/AccessorMethodName
|
155
|
-
|
156
|
-
keys == other.keys
|
286
|
+
keys == other.keys.map(&:to_sym)
|
157
287
|
end
|
158
288
|
|
159
289
|
# Select records appearing in both self and other.
|
290
|
+
# - Same as `#join` with `type: :inner` when keys in self are same with other.
|
291
|
+
# - A kind of set operations.
|
292
|
+
#
|
293
|
+
# @macro join_before
|
160
294
|
#
|
161
|
-
# @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
|
162
295
|
# @return [DataFrame] Joined dataframe.
|
163
296
|
#
|
164
297
|
def intersect(other)
|
165
|
-
|
166
|
-
|
298
|
+
unless keys == other.keys.map(&:to_sym)
|
299
|
+
raise DataFrameArgumentError, 'keys are not same with self and other'
|
300
|
+
end
|
167
301
|
|
168
302
|
join(other, keys, type: :inner)
|
169
303
|
end
|
170
304
|
|
171
305
|
# Select records appearing in self or other.
|
306
|
+
# - Same as `#join` with `type: :full_outer` when keys in self are same with other.
|
307
|
+
# - A kind of set operations.
|
308
|
+
#
|
309
|
+
# @macro join_before
|
172
310
|
#
|
173
|
-
# @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
|
174
311
|
# @return [DataFrame] Joined dataframe.
|
175
312
|
#
|
176
313
|
def union(other)
|
177
|
-
|
178
|
-
|
314
|
+
unless keys == other.keys.map(&:to_sym)
|
315
|
+
raise DataFrameArgumentError, 'keys are not same with self and other'
|
316
|
+
end
|
179
317
|
|
180
318
|
join(other, keys, type: :full_outer)
|
181
319
|
end
|
182
320
|
|
183
321
|
# Select records appearing in self but not in other.
|
322
|
+
# - Same as `#join` with `type: :left_anti` when keys in self are same with other.
|
323
|
+
# - A kind of set operations.
|
324
|
+
#
|
325
|
+
# @macro join_before
|
184
326
|
#
|
185
|
-
# @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
|
186
327
|
# @return [DataFrame] Joined dataframe.
|
187
328
|
#
|
188
329
|
def difference(other)
|
189
|
-
|
190
|
-
|
330
|
+
unless keys == other.keys.map(&:to_sym)
|
331
|
+
raise DataFrameArgumentError, 'keys are not same with self and other'
|
332
|
+
end
|
191
333
|
|
192
334
|
join(other, keys, type: :left_anti)
|
193
335
|
end
|
194
336
|
|
195
337
|
alias_method :setdiff, :difference
|
196
338
|
|
197
|
-
#
|
198
|
-
|
199
|
-
# Join other dataframe
|
339
|
+
# Join another DataFrame or Table to self.
|
200
340
|
#
|
201
|
-
# @
|
202
|
-
#
|
203
|
-
#
|
341
|
+
# @overload join(other, type: :inner, suffix: '.1')
|
342
|
+
#
|
343
|
+
# If `join_key` is not specified, common keys in self and other are used
|
344
|
+
# (natural keys). Returns joined dataframe.
|
345
|
+
#
|
346
|
+
# @!macro join_common_type
|
347
|
+
# @param type [:left_semi, :right_semi, :left_anti, :right_anti, :inner,
|
348
|
+
# left_outer, :right_outer, :full_outer] type of join.
|
204
349
|
#
|
205
|
-
#
|
206
|
-
#
|
207
|
-
|
350
|
+
# @macro join_before
|
351
|
+
# @macro join_common_type
|
352
|
+
# @macro join_after
|
353
|
+
#
|
354
|
+
# @overload join(other, join_keys, type: :inner, suffix: '.1')
|
355
|
+
#
|
356
|
+
# @macro join_before
|
357
|
+
# @macro join_key_in_array
|
358
|
+
# @macro join_common_type
|
359
|
+
# @macro join_after
|
360
|
+
#
|
361
|
+
# @overload join(other, join_key_pairs, type: :inner, suffix: '.1')
|
362
|
+
#
|
363
|
+
# @macro join_before
|
364
|
+
# @macro join_key_in_hash
|
365
|
+
# @macro join_common_type
|
366
|
+
# @macro join_after
|
367
|
+
#
|
368
|
+
def join(other, join_keys = nil, type: :inner, suffix: '.1')
|
208
369
|
case other
|
209
370
|
when DataFrame
|
210
|
-
|
371
|
+
other = other.table
|
211
372
|
when Arrow::Table
|
212
|
-
|
373
|
+
# Nop
|
213
374
|
else
|
214
375
|
raise DataFrameArgumentError, 'other must be a DataFrame or an Arrow::Table'
|
215
376
|
end
|
216
377
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
join_keys =
|
222
|
-
if join_keys
|
223
|
-
Array(join_keys).map(&:to_sym)
|
224
|
-
else
|
225
|
-
natural_keys
|
226
|
-
end
|
227
|
-
return self if join_keys.empty?
|
378
|
+
table_keys = table.keys
|
379
|
+
other_keys = other.keys
|
380
|
+
type = type.to_sym
|
228
381
|
|
229
|
-
#
|
230
|
-
|
231
|
-
unless remainer_keys.empty?
|
232
|
-
renamer = remainer_keys.each_with_object({}) do |key, hash|
|
233
|
-
new_key = nil
|
234
|
-
loop do
|
235
|
-
new_key = "#{key}#{suffix}".to_sym
|
236
|
-
break unless keys.include?(new_key)
|
382
|
+
# natural keys (implicit common keys)
|
383
|
+
join_keys ||= table_keys.intersection(other_keys)
|
237
384
|
|
238
|
-
|
239
|
-
|
385
|
+
# This is not necessary if additional procedure is contributed to Red Arrow.
|
386
|
+
if join_keys.is_a?(Hash)
|
387
|
+
left_keys = join_keys[:left]
|
388
|
+
right_keys = join_keys[:right]
|
389
|
+
else
|
390
|
+
left_keys = join_keys
|
391
|
+
right_keys = join_keys
|
392
|
+
end
|
393
|
+
left_keys = Array(left_keys).map(&:to_s)
|
394
|
+
right_keys = Array(right_keys).map(&:to_s)
|
240
395
|
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
396
|
+
case type
|
397
|
+
when :full_outer, :left_semi, :left_anti, :right_semi, :right_anti
|
398
|
+
left_outputs = nil
|
399
|
+
right_outputs = nil
|
400
|
+
when :inner, :left_outer
|
401
|
+
left_outputs = table_keys
|
402
|
+
right_outputs = other_keys - right_keys
|
403
|
+
when :right_outer
|
404
|
+
left_outputs = table_keys - left_keys
|
405
|
+
right_outputs = other_keys
|
246
406
|
end
|
247
407
|
|
248
|
-
#
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
408
|
+
# Should we rescue errors in Arrow::Table#join for usability ?
|
409
|
+
joined_table =
|
410
|
+
table.join(other, join_keys,
|
411
|
+
type: type,
|
412
|
+
left_outputs: left_outputs,
|
413
|
+
right_outputs: right_outputs)
|
254
414
|
|
255
415
|
case type
|
256
|
-
when :left_semi, :left_anti, :right_semi, :right_anti
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
416
|
+
when :inner, :left_outer, :left_semi, :left_anti, :right_semi, :right_anti
|
417
|
+
if joined_table.keys.uniq!
|
418
|
+
DataFrame.create(rename_table(joined_table, n_keys, suffix))
|
419
|
+
else
|
420
|
+
DataFrame.create(joined_table)
|
421
|
+
end
|
422
|
+
when :full_outer
|
423
|
+
renamed_table = rename_table(joined_table, n_keys, suffix)
|
424
|
+
renamed_keys = renamed_table.keys
|
425
|
+
dropper = []
|
426
|
+
DataFrame.create(renamed_table).assign do |df|
|
427
|
+
left_keys.map do |left_key|
|
428
|
+
i_left_key = renamed_keys.index(left_key)
|
429
|
+
right_key = renamed_keys[i_left_key + table_keys.size]
|
430
|
+
dropper << right_key
|
431
|
+
[left_key.to_sym, merge_array(df[left_key].data, df[right_key].data)]
|
432
|
+
end
|
433
|
+
end.drop(dropper)
|
434
|
+
when :right_outer
|
435
|
+
if joined_table.keys.uniq!
|
436
|
+
DataFrame.create(rename_table(joined_table, left_outputs.size, suffix))
|
437
|
+
else
|
438
|
+
DataFrame.create(joined_table)
|
439
|
+
end.pick do
|
440
|
+
[right_keys, keys.map(&:to_s) - right_keys]
|
441
|
+
end
|
264
442
|
end
|
265
|
-
DataFrame.new(table_output[selected_indexes])
|
266
|
-
.assign(*join_keys) { merged_columns }
|
267
443
|
end
|
268
444
|
|
269
445
|
private
|
270
446
|
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
447
|
+
# Rename duplicate keys by suffix
|
448
|
+
def rename_table(joined_table, n_keys, suffix)
|
449
|
+
joined_keys = joined_table.keys
|
450
|
+
other_keys = joined_keys[n_keys..]
|
451
|
+
|
452
|
+
dup_keys = joined_keys.tally.select { |_, v| v > 1 }.keys
|
453
|
+
renamed_right_keys =
|
454
|
+
other_keys.map do |key|
|
455
|
+
if dup_keys.include?(key)
|
456
|
+
new_key = nil
|
457
|
+
loop do
|
458
|
+
new_key = "#{key}#{suffix}"
|
459
|
+
break unless joined_keys.include?(new_key)
|
460
|
+
|
461
|
+
s = suffix.succ
|
462
|
+
raise DataFrameArgumentError, "suffix #{suffix} is invalid" if s == suffix
|
463
|
+
|
464
|
+
suffix = s
|
465
|
+
end
|
466
|
+
new_key
|
467
|
+
else
|
468
|
+
key
|
469
|
+
end
|
470
|
+
end
|
471
|
+
joined_keys[n_keys..] = renamed_right_keys
|
472
|
+
|
473
|
+
fields =
|
474
|
+
joined_keys.map.with_index do |k, i|
|
475
|
+
Arrow::Field.new(k, joined_table[i].data_type)
|
476
|
+
end
|
477
|
+
Arrow::Table.new(Arrow::Schema.new(fields), joined_table.columns)
|
478
|
+
end
|
479
|
+
|
480
|
+
# Merge two Arrow::Arrays
|
481
|
+
def merge_array(array1, array2)
|
482
|
+
t = Arrow::Function.find(:is_null).execute([array1])
|
483
|
+
Arrow::Function.find(:if_else).execute([t, array2, array1]).value
|
281
484
|
end
|
282
485
|
end
|
283
486
|
end
|
@@ -93,7 +93,8 @@ module RedAmber
|
|
93
93
|
levels = tallys.map(&:size)
|
94
94
|
type_groups = @table.columns.map { |column| type_group(column.data_type) }
|
95
95
|
quoted_keys = keys.map(&:inspect)
|
96
|
-
headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
|
96
|
+
headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
|
97
|
+
data: 'data_preview' }
|
97
98
|
header_format = make_header_format(levels, headers, quoted_keys)
|
98
99
|
|
99
100
|
sio = StringIO.new # output string buffer
|
@@ -201,7 +202,8 @@ module RedAmber
|
|
201
202
|
vectors.each_with_object({}) do |v, assigner|
|
202
203
|
vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
|
203
204
|
.replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
|
204
|
-
assigner[v.key] =
|
205
|
+
assigner[v.key] =
|
206
|
+
original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
|
205
207
|
end
|
206
208
|
end
|
207
209
|
|
@@ -265,8 +267,6 @@ module RedAmber
|
|
265
267
|
format('%g', element)
|
266
268
|
in Integer
|
267
269
|
format('%d', element)
|
268
|
-
else
|
269
|
-
element
|
270
270
|
end
|
271
271
|
end
|
272
272
|
end
|
@@ -18,7 +18,7 @@ module RedAmber
|
|
18
18
|
# @return [RedAmber::Vector] Sorted indices in Vector
|
19
19
|
def sort_indices(*sort_keys)
|
20
20
|
indices = @table.sort_indices(sort_keys.flatten)
|
21
|
-
Vector.
|
21
|
+
Vector.create(indices)
|
22
22
|
end
|
23
23
|
|
24
24
|
# @return [RedAmber::DataFrame] Sorted DataFrame
|
@@ -32,7 +32,7 @@ module RedAmber
|
|
32
32
|
|
33
33
|
def new_dataframe_by(index_array)
|
34
34
|
t = Arrow::Function.find(:take).execute([@table, index_array]).value
|
35
|
-
|
35
|
+
DataFrame.create(t)
|
36
36
|
end
|
37
37
|
end
|
38
38
|
end
|
@@ -17,14 +17,17 @@ module RedAmber
|
|
17
17
|
end
|
18
18
|
|
19
19
|
# Save DataFrame
|
20
|
+
#
|
21
|
+
# @return [DataFrame] self.
|
20
22
|
def save(output, options = {})
|
21
23
|
@table.save(output, options)
|
24
|
+
self
|
22
25
|
end
|
23
26
|
|
24
27
|
# Save and reload to cast automatically
|
25
28
|
# Via tsv format file temporally as default
|
26
29
|
#
|
27
|
-
#
|
30
|
+
# @note experimental feature
|
28
31
|
def auto_cast(format: :tsv)
|
29
32
|
return self if empty?
|
30
33
|
|