red_amber 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +111 -48
- data/CHANGELOG.md +90 -1
- data/Gemfile +1 -0
- data/README.md +42 -25
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +60 -0
- data/doc/DataFrame.md +3 -0
- data/doc/Vector.md +88 -0
- data/lib/red_amber/data_frame.rb +161 -46
- data/lib/red_amber/data_frame_combinable.rb +304 -101
- data/lib/red_amber/data_frame_displayable.rb +4 -4
- data/lib/red_amber/data_frame_indexable.rb +2 -2
- data/lib/red_amber/data_frame_loadsave.rb +4 -1
- data/lib/red_amber/data_frame_reshaping.rb +35 -10
- data/lib/red_amber/data_frame_selectable.rb +221 -116
- data/lib/red_amber/data_frame_variable_operation.rb +146 -82
- data/lib/red_amber/group.rb +16 -7
- data/lib/red_amber/helper.rb +53 -31
- data/lib/red_amber/refinements.rb +199 -0
- data/lib/red_amber/vector.rb +55 -52
- data/lib/red_amber/vector_functions.rb +23 -75
- data/lib/red_amber/vector_selectable.rb +116 -69
- data/lib/red_amber/vector_updatable.rb +136 -7
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +3 -2
- metadata +11 -8
@@ -3,6 +3,9 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# mix-in for the class DataFrame
|
5
5
|
module DataFrameCombinable
|
6
|
+
# Refinements for Arrow::Table
|
7
|
+
using RefineArrowTable
|
8
|
+
|
6
9
|
# Concatenate other dataframe onto the bottom.
|
7
10
|
#
|
8
11
|
# @param other [DataFrame, Arrow::Table, Array<DataFrame, Arrow::Table>]
|
@@ -30,7 +33,7 @@ module RedAmber
|
|
30
33
|
end
|
31
34
|
end
|
32
35
|
|
33
|
-
DataFrame.
|
36
|
+
DataFrame.create(table.concatenate(table_array))
|
34
37
|
end
|
35
38
|
|
36
39
|
alias_method :concat, :concatenate
|
@@ -58,14 +61,16 @@ module RedAmber
|
|
58
61
|
df =
|
59
62
|
case e
|
60
63
|
when Arrow::Table
|
61
|
-
DataFrame.
|
64
|
+
DataFrame.create(e)
|
62
65
|
when DataFrame
|
63
66
|
e
|
64
67
|
else
|
65
68
|
raise DataFrameArgumentError, "#{e} is not a Table or a DataFrame"
|
66
69
|
end
|
67
70
|
|
68
|
-
|
71
|
+
if size != df.size
|
72
|
+
raise DataFrameArgumentError, "#{e} do not have same size as self"
|
73
|
+
end
|
69
74
|
|
70
75
|
k = keys.intersection(df.keys).any?
|
71
76
|
raise DataFrameArgumentError, "There are some shared keys: #{k}" if k
|
@@ -78,23 +83,80 @@ module RedAmber
|
|
78
83
|
|
79
84
|
alias_method :bind_cols, :merge
|
80
85
|
|
81
|
-
# Mutating joins
|
86
|
+
# Mutating joins (#inner_join, #full_join, #left_join, #right_join)
|
82
87
|
|
83
|
-
# Join
|
88
|
+
# Join another DataFrame or Table, leaving only the matching records.
|
89
|
+
# - Same as `#join` with `type: :inner`
|
90
|
+
# - A kind of mutating join.
|
84
91
|
#
|
85
|
-
#
|
86
|
-
#
|
87
|
-
#
|
92
|
+
# @!macro join_before
|
93
|
+
# @param other [DataFrame, Arrow::Table]
|
94
|
+
# A DataFrame or a Table to be joined with self.
|
95
|
+
#
|
96
|
+
# @!macro join_after
|
97
|
+
# @param suffix [#succ]
|
98
|
+
# a suffix to rename keys when key names conflict as a result of join.
|
99
|
+
# `suffix` must be responsible to `#succ`.
|
100
|
+
# @return [DataFrame]
|
101
|
+
# Joined dataframe.
|
102
|
+
#
|
103
|
+
# @!macro join_key_in_array
|
104
|
+
# @param join_keys [String, Symbol, Array<String, Symbol>]
|
105
|
+
# A key or keys to match.
|
106
|
+
#
|
107
|
+
# @!macro join_key_in_hash
|
108
|
+
# @param join_key_pairs [Hash]
|
109
|
+
# Pairs of a key name or key names to match in left and right.
|
110
|
+
# @option join_key_pairs [String, Symbol, Array<String, Symbol>] :left
|
111
|
+
# Join keys in `self`.
|
112
|
+
# @option join_key_pairs [String, Symbol, Array<String, Symbol>] :right
|
113
|
+
# Join keys in `other`.
|
114
|
+
#
|
115
|
+
# @overload inner_join(other, suffix: '.1')
|
116
|
+
# If `join_key` is not specified, common keys in self and other are used
|
117
|
+
# (natural keys). Returns joined dataframe.
|
118
|
+
#
|
119
|
+
# @macro join_before
|
120
|
+
# @macro join_after
|
121
|
+
#
|
122
|
+
# @overload inner_join(other, join_keys, suffix: '.1')
|
123
|
+
#
|
124
|
+
# @macro join_before
|
125
|
+
# @macro join_key_in_array
|
126
|
+
# @macro join_after
|
127
|
+
#
|
128
|
+
# @overload inner_join(other, join_key_pairs, suffix: '.1')
|
129
|
+
#
|
130
|
+
# @macro join_before
|
131
|
+
# @macro join_key_in_hash
|
132
|
+
# @macro join_after
|
88
133
|
#
|
89
134
|
def inner_join(other, join_keys = nil, suffix: '.1')
|
90
135
|
join(other, join_keys, type: :inner, suffix: suffix)
|
91
136
|
end
|
92
137
|
|
93
|
-
# Join
|
138
|
+
# Join another DataFrame or Table, leaving all records.
|
139
|
+
# - Same as `#join` with `type: :full_outer`
|
140
|
+
# - A kind of mutating join.
|
94
141
|
#
|
95
|
-
# @
|
96
|
-
#
|
97
|
-
#
|
142
|
+
# @overload full_join(other, suffix: '.1')
|
143
|
+
# If `join_key` is not specified, common keys in self and other are used
|
144
|
+
# (natural keys). Returns joined dataframe.
|
145
|
+
#
|
146
|
+
# @macro join_before
|
147
|
+
# @macro join_after
|
148
|
+
#
|
149
|
+
# @overload full_join(other, join_keys, suffix: '.1')
|
150
|
+
#
|
151
|
+
# @macro join_before
|
152
|
+
# @macro join_key_in_array
|
153
|
+
# @macro join_after
|
154
|
+
#
|
155
|
+
# @overload full_join(other, join_key_pairs, suffix: '.1')
|
156
|
+
#
|
157
|
+
# @macro join_before
|
158
|
+
# @macro join_key_in_hash
|
159
|
+
# @macro join_after
|
98
160
|
#
|
99
161
|
def full_join(other, join_keys = nil, suffix: '.1')
|
100
162
|
join(other, join_keys, type: :full_outer, suffix: suffix)
|
@@ -103,181 +165,322 @@ module RedAmber
|
|
103
165
|
alias_method :outer_join, :full_join
|
104
166
|
|
105
167
|
# Join matching values to self from other.
|
168
|
+
# - Same as `#join` with `type: :left_outer`
|
169
|
+
# - A kind of mutating join.
|
106
170
|
#
|
107
|
-
# @
|
108
|
-
#
|
109
|
-
#
|
171
|
+
# @overload left_join(other, suffix: '.1')
|
172
|
+
# If `join_key` is not specified, common keys in self and other are used
|
173
|
+
# (natural keys). Returns joined dataframe.
|
174
|
+
#
|
175
|
+
# @macro join_before
|
176
|
+
# @macro join_after
|
177
|
+
#
|
178
|
+
# @overload left_join(other, join_keys, suffix: '.1')
|
179
|
+
#
|
180
|
+
# @macro join_before
|
181
|
+
# @macro join_key_in_array
|
182
|
+
# @macro join_after
|
183
|
+
#
|
184
|
+
# @overload left_join(other, join_key_pairs, suffix: '.1')
|
185
|
+
#
|
186
|
+
# @macro join_before
|
187
|
+
# @macro join_key_in_hash
|
188
|
+
# @macro join_after
|
110
189
|
#
|
111
190
|
def left_join(other, join_keys = nil, suffix: '.1')
|
112
191
|
join(other, join_keys, type: :left_outer, suffix: suffix)
|
113
192
|
end
|
114
193
|
|
115
194
|
# Join matching values from self to other.
|
195
|
+
# - Same as `#join` with `type: :right_outer`
|
196
|
+
# - A kind of mutating join.
|
116
197
|
#
|
117
|
-
# @
|
118
|
-
#
|
119
|
-
#
|
198
|
+
# @overload right_join(other, suffix: '.1')
|
199
|
+
# If `join_key` is not specified, common keys in self and other are used
|
200
|
+
# (natural keys). Returns joined dataframe.
|
201
|
+
#
|
202
|
+
# @macro join_before
|
203
|
+
# @macro join_after
|
204
|
+
#
|
205
|
+
# @overload right_join(other, join_keys, suffix: '.1')
|
206
|
+
#
|
207
|
+
# @macro join_before
|
208
|
+
# @macro join_key_in_array
|
209
|
+
# @macro join_after
|
210
|
+
#
|
211
|
+
# @overload right_join(other, join_key_pairs, suffix: '.1')
|
212
|
+
#
|
213
|
+
# @macro join_before
|
214
|
+
# @macro join_key_in_hash
|
215
|
+
# @macro join_after
|
120
216
|
#
|
121
217
|
def right_join(other, join_keys = nil, suffix: '.1')
|
122
218
|
join(other, join_keys, type: :right_outer, suffix: suffix)
|
123
219
|
end
|
124
220
|
|
125
|
-
# Filtering joins
|
221
|
+
# Filtering joins (#semi_join, #anti_join)
|
126
222
|
|
127
223
|
# Return records of self that have a match in other.
|
224
|
+
# - Same as `#join` with `type: :left_semi`
|
225
|
+
# - A kind of filtering join.
|
128
226
|
#
|
129
|
-
# @
|
130
|
-
#
|
131
|
-
#
|
227
|
+
# @overload semi_join(other, suffix: '.1')
|
228
|
+
# If `join_key` is not specified, common keys in self and other are used
|
229
|
+
# (natural keys). Returns joined dataframe.
|
230
|
+
#
|
231
|
+
# @macro join_before
|
232
|
+
# @macro join_after
|
233
|
+
#
|
234
|
+
# @overload semi_join(other, join_keys, suffix: '.1')
|
235
|
+
#
|
236
|
+
# @macro join_before
|
237
|
+
# @macro join_key_in_array
|
238
|
+
# @macro join_after
|
239
|
+
#
|
240
|
+
# @overload semi_join(other, join_key_pairs, suffix: '.1')
|
241
|
+
#
|
242
|
+
# @macro join_before
|
243
|
+
# @macro join_key_in_hash
|
244
|
+
# @macro join_after
|
132
245
|
#
|
133
246
|
def semi_join(other, join_keys = nil, suffix: '.1')
|
134
247
|
join(other, join_keys, type: :left_semi, suffix: suffix)
|
135
248
|
end
|
136
249
|
|
137
250
|
# Return records of self that do not have a match in other.
|
251
|
+
# - Same as `#join` with `type: :left_anti`
|
252
|
+
# - A kind of filtering join.
|
138
253
|
#
|
139
|
-
# @
|
140
|
-
#
|
141
|
-
#
|
254
|
+
# @overload anti_join(other, suffix: '.1')
|
255
|
+
# If `join_key` is not specified, common keys in self and other are used
|
256
|
+
# (natural keys). Returns joined dataframe.
|
257
|
+
#
|
258
|
+
# @macro join_before
|
259
|
+
# @macro join_after
|
260
|
+
#
|
261
|
+
# @overload anti_join(other, join_keys, suffix: '.1')
|
262
|
+
#
|
263
|
+
# @macro join_before
|
264
|
+
# @macro join_key_in_array
|
265
|
+
# @macro join_after
|
266
|
+
#
|
267
|
+
# @overload anti_join(other, join_key_pairs, suffix: '.1')
|
268
|
+
#
|
269
|
+
# @macro join_before
|
270
|
+
# @macro join_key_in_hash
|
271
|
+
# @macro join_after
|
142
272
|
#
|
143
273
|
def anti_join(other, join_keys = nil, suffix: '.1')
|
144
274
|
join(other, join_keys, type: :left_anti, suffix: suffix)
|
145
275
|
end
|
146
276
|
|
147
|
-
# Set operations
|
277
|
+
# Set operations (#intersect, #union, #difference, #set_operable?)
|
148
278
|
|
149
279
|
# Check if set operation with self and other is possible.
|
150
280
|
#
|
151
|
-
# @
|
281
|
+
# @macro join_before
|
282
|
+
#
|
152
283
|
# @return [Boolean] true if set operation is possible.
|
153
284
|
#
|
154
285
|
def set_operable?(other) # rubocop:disable Naming/AccessorMethodName
|
155
|
-
|
156
|
-
keys == other.keys
|
286
|
+
keys == other.keys.map(&:to_sym)
|
157
287
|
end
|
158
288
|
|
159
289
|
# Select records appearing in both self and other.
|
290
|
+
# - Same as `#join` with `type: :inner` when keys in self are same with other.
|
291
|
+
# - A kind of set operations.
|
292
|
+
#
|
293
|
+
# @macro join_before
|
160
294
|
#
|
161
|
-
# @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
|
162
295
|
# @return [DataFrame] Joined dataframe.
|
163
296
|
#
|
164
297
|
def intersect(other)
|
165
|
-
|
166
|
-
|
298
|
+
unless keys == other.keys.map(&:to_sym)
|
299
|
+
raise DataFrameArgumentError, 'keys are not same with self and other'
|
300
|
+
end
|
167
301
|
|
168
302
|
join(other, keys, type: :inner)
|
169
303
|
end
|
170
304
|
|
171
305
|
# Select records appearing in self or other.
|
306
|
+
# - Same as `#join` with `type: :full_outer` when keys in self are same with other.
|
307
|
+
# - A kind of set operations.
|
308
|
+
#
|
309
|
+
# @macro join_before
|
172
310
|
#
|
173
|
-
# @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
|
174
311
|
# @return [DataFrame] Joined dataframe.
|
175
312
|
#
|
176
313
|
def union(other)
|
177
|
-
|
178
|
-
|
314
|
+
unless keys == other.keys.map(&:to_sym)
|
315
|
+
raise DataFrameArgumentError, 'keys are not same with self and other'
|
316
|
+
end
|
179
317
|
|
180
318
|
join(other, keys, type: :full_outer)
|
181
319
|
end
|
182
320
|
|
183
321
|
# Select records appearing in self but not in other.
|
322
|
+
# - Same as `#join` with `type: :left_anti` when keys in self are same with other.
|
323
|
+
# - A kind of set operations.
|
324
|
+
#
|
325
|
+
# @macro join_before
|
184
326
|
#
|
185
|
-
# @param other [DataFrame, Arrow::Table] DataFrame/Table to be joined with self.
|
186
327
|
# @return [DataFrame] Joined dataframe.
|
187
328
|
#
|
188
329
|
def difference(other)
|
189
|
-
|
190
|
-
|
330
|
+
unless keys == other.keys.map(&:to_sym)
|
331
|
+
raise DataFrameArgumentError, 'keys are not same with self and other'
|
332
|
+
end
|
191
333
|
|
192
334
|
join(other, keys, type: :left_anti)
|
193
335
|
end
|
194
336
|
|
195
337
|
alias_method :setdiff, :difference
|
196
338
|
|
197
|
-
#
|
198
|
-
|
199
|
-
# Join other dataframe
|
339
|
+
# Join another DataFrame or Table to self.
|
200
340
|
#
|
201
|
-
# @
|
202
|
-
#
|
203
|
-
#
|
341
|
+
# @overload join(other, type: :inner, suffix: '.1')
|
342
|
+
#
|
343
|
+
# If `join_key` is not specified, common keys in self and other are used
|
344
|
+
# (natural keys). Returns joined dataframe.
|
345
|
+
#
|
346
|
+
# @!macro join_common_type
|
347
|
+
# @param type [:left_semi, :right_semi, :left_anti, :right_anti, :inner,
|
348
|
+
# left_outer, :right_outer, :full_outer] type of join.
|
204
349
|
#
|
205
|
-
#
|
206
|
-
#
|
207
|
-
|
350
|
+
# @macro join_before
|
351
|
+
# @macro join_common_type
|
352
|
+
# @macro join_after
|
353
|
+
#
|
354
|
+
# @overload join(other, join_keys, type: :inner, suffix: '.1')
|
355
|
+
#
|
356
|
+
# @macro join_before
|
357
|
+
# @macro join_key_in_array
|
358
|
+
# @macro join_common_type
|
359
|
+
# @macro join_after
|
360
|
+
#
|
361
|
+
# @overload join(other, join_key_pairs, type: :inner, suffix: '.1')
|
362
|
+
#
|
363
|
+
# @macro join_before
|
364
|
+
# @macro join_key_in_hash
|
365
|
+
# @macro join_common_type
|
366
|
+
# @macro join_after
|
367
|
+
#
|
368
|
+
def join(other, join_keys = nil, type: :inner, suffix: '.1')
|
208
369
|
case other
|
209
370
|
when DataFrame
|
210
|
-
|
371
|
+
other = other.table
|
211
372
|
when Arrow::Table
|
212
|
-
|
373
|
+
# Nop
|
213
374
|
else
|
214
375
|
raise DataFrameArgumentError, 'other must be a DataFrame or an Arrow::Table'
|
215
376
|
end
|
216
377
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
join_keys =
|
222
|
-
if join_keys
|
223
|
-
Array(join_keys).map(&:to_sym)
|
224
|
-
else
|
225
|
-
natural_keys
|
226
|
-
end
|
227
|
-
return self if join_keys.empty?
|
378
|
+
table_keys = table.keys
|
379
|
+
other_keys = other.keys
|
380
|
+
type = type.to_sym
|
228
381
|
|
229
|
-
#
|
230
|
-
|
231
|
-
unless remainer_keys.empty?
|
232
|
-
renamer = remainer_keys.each_with_object({}) do |key, hash|
|
233
|
-
new_key = nil
|
234
|
-
loop do
|
235
|
-
new_key = "#{key}#{suffix}".to_sym
|
236
|
-
break unless keys.include?(new_key)
|
382
|
+
# natural keys (implicit common keys)
|
383
|
+
join_keys ||= table_keys.intersection(other_keys)
|
237
384
|
|
238
|
-
|
239
|
-
|
385
|
+
# This is not necessary if additional procedure is contributed to Red Arrow.
|
386
|
+
if join_keys.is_a?(Hash)
|
387
|
+
left_keys = join_keys[:left]
|
388
|
+
right_keys = join_keys[:right]
|
389
|
+
else
|
390
|
+
left_keys = join_keys
|
391
|
+
right_keys = join_keys
|
392
|
+
end
|
393
|
+
left_keys = Array(left_keys).map(&:to_s)
|
394
|
+
right_keys = Array(right_keys).map(&:to_s)
|
240
395
|
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
396
|
+
case type
|
397
|
+
when :full_outer, :left_semi, :left_anti, :right_semi, :right_anti
|
398
|
+
left_outputs = nil
|
399
|
+
right_outputs = nil
|
400
|
+
when :inner, :left_outer
|
401
|
+
left_outputs = table_keys
|
402
|
+
right_outputs = other_keys - right_keys
|
403
|
+
when :right_outer
|
404
|
+
left_outputs = table_keys - left_keys
|
405
|
+
right_outputs = other_keys
|
246
406
|
end
|
247
407
|
|
248
|
-
#
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
408
|
+
# Should we rescue errors in Arrow::Table#join for usability ?
|
409
|
+
joined_table =
|
410
|
+
table.join(other, join_keys,
|
411
|
+
type: type,
|
412
|
+
left_outputs: left_outputs,
|
413
|
+
right_outputs: right_outputs)
|
254
414
|
|
255
415
|
case type
|
256
|
-
when :left_semi, :left_anti, :right_semi, :right_anti
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
416
|
+
when :inner, :left_outer, :left_semi, :left_anti, :right_semi, :right_anti
|
417
|
+
if joined_table.keys.uniq!
|
418
|
+
DataFrame.create(rename_table(joined_table, n_keys, suffix))
|
419
|
+
else
|
420
|
+
DataFrame.create(joined_table)
|
421
|
+
end
|
422
|
+
when :full_outer
|
423
|
+
renamed_table = rename_table(joined_table, n_keys, suffix)
|
424
|
+
renamed_keys = renamed_table.keys
|
425
|
+
dropper = []
|
426
|
+
DataFrame.create(renamed_table).assign do |df|
|
427
|
+
left_keys.map do |left_key|
|
428
|
+
i_left_key = renamed_keys.index(left_key)
|
429
|
+
right_key = renamed_keys[i_left_key + table_keys.size]
|
430
|
+
dropper << right_key
|
431
|
+
[left_key.to_sym, merge_array(df[left_key].data, df[right_key].data)]
|
432
|
+
end
|
433
|
+
end.drop(dropper)
|
434
|
+
when :right_outer
|
435
|
+
if joined_table.keys.uniq!
|
436
|
+
DataFrame.create(rename_table(joined_table, left_outputs.size, suffix))
|
437
|
+
else
|
438
|
+
DataFrame.create(joined_table)
|
439
|
+
end.pick do
|
440
|
+
[right_keys, keys.map(&:to_s) - right_keys]
|
441
|
+
end
|
264
442
|
end
|
265
|
-
DataFrame.new(table_output[selected_indexes])
|
266
|
-
.assign(*join_keys) { merged_columns }
|
267
443
|
end
|
268
444
|
|
269
445
|
private
|
270
446
|
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
447
|
+
# Rename duplicate keys by suffix
|
448
|
+
def rename_table(joined_table, n_keys, suffix)
|
449
|
+
joined_keys = joined_table.keys
|
450
|
+
other_keys = joined_keys[n_keys..]
|
451
|
+
|
452
|
+
dup_keys = joined_keys.tally.select { |_, v| v > 1 }.keys
|
453
|
+
renamed_right_keys =
|
454
|
+
other_keys.map do |key|
|
455
|
+
if dup_keys.include?(key)
|
456
|
+
new_key = nil
|
457
|
+
loop do
|
458
|
+
new_key = "#{key}#{suffix}"
|
459
|
+
break unless joined_keys.include?(new_key)
|
460
|
+
|
461
|
+
s = suffix.succ
|
462
|
+
raise DataFrameArgumentError, "suffix #{suffix} is invalid" if s == suffix
|
463
|
+
|
464
|
+
suffix = s
|
465
|
+
end
|
466
|
+
new_key
|
467
|
+
else
|
468
|
+
key
|
469
|
+
end
|
470
|
+
end
|
471
|
+
joined_keys[n_keys..] = renamed_right_keys
|
472
|
+
|
473
|
+
fields =
|
474
|
+
joined_keys.map.with_index do |k, i|
|
475
|
+
Arrow::Field.new(k, joined_table[i].data_type)
|
476
|
+
end
|
477
|
+
Arrow::Table.new(Arrow::Schema.new(fields), joined_table.columns)
|
478
|
+
end
|
479
|
+
|
480
|
+
# Merge two Arrow::Arrays
|
481
|
+
def merge_array(array1, array2)
|
482
|
+
t = Arrow::Function.find(:is_null).execute([array1])
|
483
|
+
Arrow::Function.find(:if_else).execute([t, array2, array1]).value
|
281
484
|
end
|
282
485
|
end
|
283
486
|
end
|
@@ -93,7 +93,8 @@ module RedAmber
|
|
93
93
|
levels = tallys.map(&:size)
|
94
94
|
type_groups = @table.columns.map { |column| type_group(column.data_type) }
|
95
95
|
quoted_keys = keys.map(&:inspect)
|
96
|
-
headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
|
96
|
+
headers = { idx: '#', key: 'key', type: 'type', levels: 'level',
|
97
|
+
data: 'data_preview' }
|
97
98
|
header_format = make_header_format(levels, headers, quoted_keys)
|
98
99
|
|
99
100
|
sio = StringIO.new # output string buffer
|
@@ -201,7 +202,8 @@ module RedAmber
|
|
201
202
|
vectors.each_with_object({}) do |v, assigner|
|
202
203
|
vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
|
203
204
|
.replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
|
204
|
-
assigner[v.key] =
|
205
|
+
assigner[v.key] =
|
206
|
+
original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
|
205
207
|
end
|
206
208
|
end
|
207
209
|
|
@@ -265,8 +267,6 @@ module RedAmber
|
|
265
267
|
format('%g', element)
|
266
268
|
in Integer
|
267
269
|
format('%d', element)
|
268
|
-
else
|
269
|
-
element
|
270
270
|
end
|
271
271
|
end
|
272
272
|
end
|
@@ -18,7 +18,7 @@ module RedAmber
|
|
18
18
|
# @return [RedAmber::Vector] Sorted indices in Vector
|
19
19
|
def sort_indices(*sort_keys)
|
20
20
|
indices = @table.sort_indices(sort_keys.flatten)
|
21
|
-
Vector.
|
21
|
+
Vector.create(indices)
|
22
22
|
end
|
23
23
|
|
24
24
|
# @return [RedAmber::DataFrame] Sorted DataFrame
|
@@ -32,7 +32,7 @@ module RedAmber
|
|
32
32
|
|
33
33
|
def new_dataframe_by(index_array)
|
34
34
|
t = Arrow::Function.find(:take).execute([@table, index_array]).value
|
35
|
-
|
35
|
+
DataFrame.create(t)
|
36
36
|
end
|
37
37
|
end
|
38
38
|
end
|
@@ -17,14 +17,17 @@ module RedAmber
|
|
17
17
|
end
|
18
18
|
|
19
19
|
# Save DataFrame
|
20
|
+
#
|
21
|
+
# @return [DataFrame] self.
|
20
22
|
def save(output, options = {})
|
21
23
|
@table.save(output, options)
|
24
|
+
self
|
22
25
|
end
|
23
26
|
|
24
27
|
# Save and reload to cast automatically
|
25
28
|
# Via tsv format file temporally as default
|
26
29
|
#
|
27
|
-
#
|
30
|
+
# @note experimental feature
|
28
31
|
def auto_cast(format: :tsv)
|
29
32
|
return self if empty?
|
30
33
|
|