red_amber 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +111 -48
- data/CHANGELOG.md +90 -1
- data/Gemfile +1 -0
- data/README.md +42 -25
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +60 -0
- data/doc/DataFrame.md +3 -0
- data/doc/Vector.md +88 -0
- data/lib/red_amber/data_frame.rb +161 -46
- data/lib/red_amber/data_frame_combinable.rb +304 -101
- data/lib/red_amber/data_frame_displayable.rb +4 -4
- data/lib/red_amber/data_frame_indexable.rb +2 -2
- data/lib/red_amber/data_frame_loadsave.rb +4 -1
- data/lib/red_amber/data_frame_reshaping.rb +35 -10
- data/lib/red_amber/data_frame_selectable.rb +221 -116
- data/lib/red_amber/data_frame_variable_operation.rb +146 -82
- data/lib/red_amber/group.rb +16 -7
- data/lib/red_amber/helper.rb +53 -31
- data/lib/red_amber/refinements.rb +199 -0
- data/lib/red_amber/vector.rb +55 -52
- data/lib/red_amber/vector_functions.rb +23 -75
- data/lib/red_amber/vector_selectable.rb +116 -69
- data/lib/red_amber/vector_updatable.rb +136 -7
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +3 -2
- metadata +11 -8
@@ -3,71 +3,145 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# mix-ins for the class DataFrame
|
5
5
|
module DataFrameVariableOperation
|
6
|
-
#
|
6
|
+
# Array is refined
|
7
|
+
using RefineArray
|
8
|
+
|
9
|
+
# Pick up variables (columns) to create a new DataFrame
|
10
|
+
#
|
11
|
+
# @note DataFrame#pick creates a DataFrame with single key.
|
12
|
+
# DataFrame#[] creates a Vector if single key is specified.
|
13
|
+
#
|
14
|
+
# @overload pick(keys)
|
15
|
+
# Pick variables by Symbols or Strings.
|
16
|
+
#
|
17
|
+
# @param keys [Symbol, String, <Symbol, String>]
|
18
|
+
# key name(s) of variables to pick.
|
19
|
+
# @return [DataFrame]
|
20
|
+
# Picked DataFrame.
|
21
|
+
#
|
22
|
+
# @overload pick(booleans)
|
23
|
+
# Pick variables by booleans.
|
24
|
+
#
|
25
|
+
# @param booleans [<true, false, nil>]
|
26
|
+
# boolean array to pick variables at true.
|
27
|
+
# @return [DataFrame]
|
28
|
+
# Picked DataFrame.
|
29
|
+
#
|
30
|
+
# @overload pick(indices)
|
31
|
+
# Pick variables by column indices.
|
32
|
+
#
|
33
|
+
# @param indices [Integer, Float, Range<Integer>, Vector, Arrow::Array]
|
34
|
+
# numeric array to pick variables by column index.
|
35
|
+
# @return [DataFrame]
|
36
|
+
# Picked DataFrame.
|
37
|
+
#
|
7
38
|
def pick(*args, &block)
|
8
|
-
picker = args
|
9
39
|
if block
|
10
|
-
|
40
|
+
unless args.empty?
|
41
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and block.'
|
42
|
+
end
|
11
43
|
|
12
|
-
|
44
|
+
args = [instance_eval(&block)]
|
13
45
|
end
|
14
|
-
picker.flatten!
|
15
|
-
return DataFrame.new if picker.empty? || picker == [nil]
|
16
|
-
|
17
|
-
key_vector = Vector.new(keys)
|
18
|
-
vec = parse_to_vector(picker, vsize: n_keys)
|
19
|
-
|
20
|
-
ary =
|
21
|
-
if vec.boolean?
|
22
|
-
key_vector.filter(*vec).to_a
|
23
|
-
elsif vec.numeric?
|
24
|
-
key_vector.take(*vec).to_a
|
25
|
-
elsif vec.string? || vec.dictionary?
|
26
|
-
vec.to_a
|
27
|
-
else
|
28
|
-
raise DataFrameArgumentError, "Invalid argument #{args}"
|
29
|
-
end
|
30
46
|
|
31
|
-
|
32
|
-
|
33
|
-
|
47
|
+
case args
|
48
|
+
in [] | [nil]
|
49
|
+
return DataFrame.new
|
50
|
+
in [*] if args.symbols?
|
51
|
+
return DataFrame.create(@table.select_columns(*args))
|
52
|
+
in [*] if args.booleans?
|
53
|
+
picker = keys.select_by_booleans(args)
|
54
|
+
return DataFrame.create(@table.select_columns(*picker))
|
55
|
+
in [(Vector | Arrow::Array | Arrow::ChunkedArray) => a]
|
56
|
+
picker = a.to_a
|
57
|
+
else
|
58
|
+
picker = parse_args(args, n_keys)
|
59
|
+
end
|
60
|
+
|
61
|
+
return DataFrame.new if picker.compact.empty?
|
62
|
+
|
63
|
+
if picker.booleans?
|
64
|
+
picker = keys.select_by_booleans(picker)
|
65
|
+
return DataFrame.create(@table.select_columns(*picker))
|
66
|
+
end
|
67
|
+
picker.compact!
|
68
|
+
raise DataFrameArgumentError, "some keys are duplicated: #{args}" if picker.uniq!
|
69
|
+
|
70
|
+
DataFrame.create(@table.select_columns(*picker))
|
34
71
|
end
|
35
72
|
|
36
|
-
#
|
73
|
+
# Drop some variables (columns) to create a remainer DataFrame
|
74
|
+
#
|
75
|
+
# @note DataFrame#drop creates a DataFrame even if it is a single column.
|
76
|
+
#
|
77
|
+
# @overload drop(keys)
|
78
|
+
# Drop variables by Symbols or Strings.
|
79
|
+
#
|
80
|
+
# @param keys [Symbol, String, <Symbol, String>]
|
81
|
+
# key name(s) of variables to drop.
|
82
|
+
# @return [DataFrame]
|
83
|
+
# Remainer DataFrame.
|
84
|
+
#
|
85
|
+
# @overload drop(booleans)
|
86
|
+
# Drop variables by booleans.
|
87
|
+
#
|
88
|
+
# @param booleans [<true, false, nil>]
|
89
|
+
# boolean array of variables to drop at true.
|
90
|
+
# @return [DataFrame]
|
91
|
+
# Remainer DataFrame.
|
92
|
+
#
|
93
|
+
# @overload drop(indices)
|
94
|
+
# Pick variables by column indices.
|
95
|
+
#
|
96
|
+
# @param indices [Integer, Float, Range<Integer>, Vector, Arrow::Array]
|
97
|
+
# numeric array of variables to drop by column index.
|
98
|
+
# @return [DataFrame]
|
99
|
+
# Remainer DataFrame.
|
100
|
+
#
|
37
101
|
def drop(*args, &block)
|
38
|
-
dropper = args
|
39
102
|
if block
|
40
|
-
|
103
|
+
unless args.empty?
|
104
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and block.'
|
105
|
+
end
|
41
106
|
|
42
|
-
|
107
|
+
args = [instance_eval(&block)]
|
43
108
|
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
keys - key_vector.take(*vec).each.map(&:to_sym) # Array
|
54
|
-
elsif vec.string? || vec.dictionary?
|
55
|
-
keys - vec.to_a.map { _1&.to_sym } # Array
|
109
|
+
return self if args.empty? || empty?
|
110
|
+
|
111
|
+
picker =
|
112
|
+
if args.symbols?
|
113
|
+
keys - args
|
114
|
+
elsif args.booleans?
|
115
|
+
keys.reject_by_booleans(args)
|
116
|
+
elsif args.integers?
|
117
|
+
keys.reject_by_indices(args)
|
56
118
|
else
|
57
|
-
|
119
|
+
dropper = parse_args(args, n_keys)
|
120
|
+
if dropper.booleans?
|
121
|
+
keys.reject_by_booleans(dropper)
|
122
|
+
elsif dropper.symbols?
|
123
|
+
keys - dropper
|
124
|
+
else
|
125
|
+
dropper.compact!
|
126
|
+
unless dropper.integers?
|
127
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
128
|
+
end
|
129
|
+
|
130
|
+
keys.reject_by_indices(dropper)
|
131
|
+
end
|
58
132
|
end
|
59
133
|
|
60
|
-
return DataFrame.new if
|
134
|
+
return DataFrame.new if picker.empty?
|
61
135
|
|
62
|
-
|
63
|
-
# DataFrame#drop creates a DataFrame with single key.
|
64
|
-
DataFrame.new(@table[ary])
|
136
|
+
DataFrame.create(@table.select_columns(*picker))
|
65
137
|
end
|
66
138
|
|
67
139
|
# rename variables to create a new DataFrame
|
68
140
|
def rename(*renamer, &block)
|
69
141
|
if block
|
70
|
-
|
142
|
+
unless renamer.empty?
|
143
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and a block'
|
144
|
+
end
|
71
145
|
|
72
146
|
renamer = [instance_eval(&block)]
|
73
147
|
end
|
@@ -90,35 +164,23 @@ module RedAmber
|
|
90
164
|
|
91
165
|
# assign variables to create a new DataFrame
|
92
166
|
def assign(*assigner, &block)
|
93
|
-
|
94
|
-
return self if appender.is_a?(DataFrame)
|
95
|
-
|
96
|
-
append_to_fields_and_arrays(appender, fields, arrays, append_to_left: false) unless appender.empty?
|
97
|
-
|
98
|
-
DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
|
167
|
+
assign_update(*assigner, append_to_left: false, &block)
|
99
168
|
end
|
100
169
|
|
101
170
|
def assign_left(*assigner, &block)
|
102
|
-
|
103
|
-
return self if appender.is_a?(DataFrame)
|
104
|
-
|
105
|
-
append_to_fields_and_arrays(appender, fields, arrays, append_to_left: true) unless appender.empty?
|
106
|
-
|
107
|
-
DataFrame.new(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
|
171
|
+
assign_update(*assigner, append_to_left: true, &block)
|
108
172
|
end
|
109
173
|
|
110
174
|
private
|
111
175
|
|
112
|
-
def assign_update(*assigner, &block)
|
176
|
+
def assign_update(*assigner, append_to_left: false, &block)
|
113
177
|
if block
|
114
178
|
assigner_from_block = instance_eval(&block)
|
115
179
|
assigner =
|
116
|
-
|
117
|
-
|
180
|
+
case assigner_from_block
|
181
|
+
in _ if assigner.empty? # block only
|
118
182
|
[assigner_from_block]
|
119
|
-
|
120
|
-
# assigner_from_block in [Array, *]
|
121
|
-
elsif multiple_assigner?(assigner_from_block)
|
183
|
+
in [Vector, *] | [Array, *] | [Arrow::Array, *]
|
122
184
|
assigner.zip(assigner_from_block)
|
123
185
|
else
|
124
186
|
assigner.zip([assigner_from_block])
|
@@ -128,10 +190,10 @@ module RedAmber
|
|
128
190
|
case assigner
|
129
191
|
in [] | [nil] | [{}] | [[]]
|
130
192
|
return self
|
131
|
-
in [Hash => key_array_pairs]
|
132
|
-
# noop
|
133
193
|
in [(Symbol | String) => key, (Vector | Array | Arrow::Array) => array]
|
134
194
|
key_array_pairs = { key => array }
|
195
|
+
in [Hash => key_array_pairs]
|
196
|
+
# noop
|
135
197
|
in [Array => array_in_array]
|
136
198
|
key_array_pairs = try_convert_to_hash(array_in_array)
|
137
199
|
in [Array, *] => array_in_array1
|
@@ -151,20 +213,27 @@ module RedAmber
|
|
151
213
|
appender[key] = array
|
152
214
|
end
|
153
215
|
end
|
154
|
-
|
216
|
+
fields, arrays = *update_fields_and_arrays(updater)
|
217
|
+
return self if appender.is_a?(DataFrame)
|
218
|
+
|
219
|
+
unless appender.empty?
|
220
|
+
append_to_fields_and_arrays(appender, fields, arrays, append_to_left)
|
221
|
+
end
|
222
|
+
|
223
|
+
DataFrame.create(Arrow::Table.new(Arrow::Schema.new(fields), arrays))
|
155
224
|
end
|
156
225
|
|
157
226
|
def try_convert_to_hash(array)
|
158
227
|
array.to_h
|
159
228
|
rescue TypeError
|
160
229
|
[array].to_h
|
161
|
-
rescue TypeError # rubocop:disable Lint/DuplicateRescueException
|
162
|
-
raise DataFrameArgumentError, "Invalid argument in Array #{array}"
|
163
230
|
end
|
164
231
|
|
165
232
|
def rename_by_hash(key_pairs)
|
166
233
|
not_existing_keys = key_pairs.keys - keys
|
167
|
-
|
234
|
+
unless not_existing_keys.empty?
|
235
|
+
raise DataFrameArgumentError, "Not existing: #{not_existing_keys}"
|
236
|
+
end
|
168
237
|
|
169
238
|
fields =
|
170
239
|
keys.map do |key|
|
@@ -175,7 +244,7 @@ module RedAmber
|
|
175
244
|
@table.schema[key]
|
176
245
|
end
|
177
246
|
end
|
178
|
-
DataFrame.
|
247
|
+
DataFrame.create(Arrow::Table.new(Arrow::Schema.new(fields), @table.columns))
|
179
248
|
end
|
180
249
|
|
181
250
|
def update_fields_and_arrays(updater)
|
@@ -185,7 +254,9 @@ module RedAmber
|
|
185
254
|
data = updater[key]
|
186
255
|
next unless data
|
187
256
|
|
188
|
-
|
257
|
+
if data.size != size
|
258
|
+
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})"
|
259
|
+
end
|
189
260
|
|
190
261
|
a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
|
191
262
|
fields[i] = Arrow::Field.new(key, a.value_data_type)
|
@@ -194,10 +265,12 @@ module RedAmber
|
|
194
265
|
[fields, arrays]
|
195
266
|
end
|
196
267
|
|
197
|
-
def append_to_fields_and_arrays(appender, fields, arrays, append_to_left
|
268
|
+
def append_to_fields_and_arrays(appender, fields, arrays, append_to_left)
|
198
269
|
enum = append_to_left ? appender.reverse_each : appender.each
|
199
270
|
enum.each do |key, data|
|
200
|
-
|
271
|
+
if data.size != size
|
272
|
+
raise DataFrameArgumentError, "Data size mismatch (#{data.size} != #{size})"
|
273
|
+
end
|
201
274
|
|
202
275
|
a = Arrow::Array.new(data.is_a?(Vector) ? data.to_a : data)
|
203
276
|
|
@@ -210,14 +283,5 @@ module RedAmber
|
|
210
283
|
end
|
211
284
|
end
|
212
285
|
end
|
213
|
-
|
214
|
-
def multiple_assigner?(assigner)
|
215
|
-
case assigner
|
216
|
-
in [Vector, *] | [Array, *] | [Arrow::Array, *]
|
217
|
-
true
|
218
|
-
else
|
219
|
-
false
|
220
|
-
end
|
221
|
-
end
|
222
286
|
end
|
223
287
|
end
|
data/lib/red_amber/group.rb
CHANGED
@@ -5,6 +5,8 @@ module RedAmber
|
|
5
5
|
class Group
|
6
6
|
include Enumerable # This feature is experimental
|
7
7
|
|
8
|
+
using RefineArrowTable
|
9
|
+
|
8
10
|
# Creates a new Group object.
|
9
11
|
#
|
10
12
|
# @param dataframe [DataFrame] dataframe to be grouped.
|
@@ -18,7 +20,6 @@ module RedAmber
|
|
18
20
|
d = @group_keys - @dataframe.keys
|
19
21
|
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless d.empty?
|
20
22
|
|
21
|
-
@filters = @group_counts = @base_table = nil
|
22
23
|
@group = @dataframe.table.group(*@group_keys)
|
23
24
|
end
|
24
25
|
|
@@ -29,11 +30,14 @@ module RedAmber
|
|
29
30
|
define_method(function) do |*summary_keys|
|
30
31
|
summary_keys = Array(summary_keys).flatten
|
31
32
|
d = summary_keys - @dataframe.keys
|
32
|
-
|
33
|
+
unless summary_keys.empty? || d.empty?
|
34
|
+
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}."
|
35
|
+
end
|
33
36
|
|
34
|
-
table = @group.aggregate(*build_aggregation_keys("hash_#{function}",
|
35
|
-
|
36
|
-
|
37
|
+
table = @group.aggregate(*build_aggregation_keys("hash_#{function}",
|
38
|
+
summary_keys))
|
39
|
+
g = @group_keys.map(&:to_s)
|
40
|
+
DataFrame.new(table[g + (table.keys - g)])
|
37
41
|
end
|
38
42
|
end
|
39
43
|
|
@@ -76,7 +80,7 @@ module RedAmber
|
|
76
80
|
end
|
77
81
|
|
78
82
|
def group_count
|
79
|
-
DataFrame.
|
83
|
+
DataFrame.create(add_columns_to_table(base_table, [:group_count], [group_counts]))
|
80
84
|
end
|
81
85
|
|
82
86
|
def inspect
|
@@ -95,6 +99,11 @@ module RedAmber
|
|
95
99
|
end
|
96
100
|
end
|
97
101
|
|
102
|
+
# experimental
|
103
|
+
def agg_sum(*summary_keys)
|
104
|
+
call_aggregating_function(:sum, summary_keys, _options = nil)
|
105
|
+
end
|
106
|
+
|
98
107
|
private
|
99
108
|
|
100
109
|
def build_aggregation_keys(function_name, summary_keys)
|
@@ -105,7 +114,7 @@ module RedAmber
|
|
105
114
|
end
|
106
115
|
end
|
107
116
|
|
108
|
-
# @group_counts.sum == @dataframe.size
|
117
|
+
# @note `@group_counts.sum == @dataframe.size``
|
109
118
|
def group_counts
|
110
119
|
@group_counts ||= filters.map(&:sum)
|
111
120
|
end
|
data/lib/red_amber/helper.rb
CHANGED
@@ -5,46 +5,68 @@ module RedAmber
|
|
5
5
|
module Helper
|
6
6
|
private
|
7
7
|
|
8
|
+
# If num is larger than 1 return 's' to be plural.
|
9
|
+
#
|
10
|
+
# @param num [Numeric] some number.
|
11
|
+
# @return ['s', ''] return 's' if num is larger than 1.
|
12
|
+
# Otherwise return ''.
|
8
13
|
def pl(num)
|
9
14
|
num > 1 ? 's' : ''
|
10
15
|
end
|
11
16
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
17
|
+
# Parse the argments in an Array
|
18
|
+
# and returns a parsed Array.
|
19
|
+
#
|
20
|
+
# @param args
|
21
|
+
# [<Integer, Symbol, true, false, nil, Array, Range, Enumerator, String, Float>]
|
22
|
+
# arguments.
|
23
|
+
# @param array_size [Integer] size of target Array to use in a endless Range.
|
24
|
+
# @return [<Integer, Symbol, true, false, nil>] parsed flat Array.
|
25
|
+
# @note This method is recursively called to parse.
|
26
|
+
def parse_args(args, array_size)
|
27
|
+
args.flat_map do |elem|
|
28
|
+
case elem
|
29
|
+
when Integer, Symbol, NilClass, TrueClass, FalseClass
|
30
|
+
elem
|
31
|
+
when Array
|
32
|
+
parse_args(elem, array_size)
|
33
|
+
when Range
|
34
|
+
parse_range(elem, array_size)
|
35
|
+
when Enumerator
|
36
|
+
parse_args(Array(elem), array_size)
|
37
|
+
when String
|
38
|
+
elem.to_sym
|
39
|
+
when Float
|
40
|
+
elem.floor.to_i
|
41
|
+
else
|
42
|
+
Array(elem)
|
43
|
+
end
|
19
44
|
end
|
20
|
-
Vector.new(a)
|
21
45
|
end
|
22
46
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
Array(0...vsize)[elem]
|
39
|
-
elsif bg.nil? && en.nil?
|
40
|
-
Array(0...vsize)
|
41
|
-
else
|
42
|
-
Array(elem)
|
47
|
+
# Parse a Range to an Array
|
48
|
+
#
|
49
|
+
# @param range [Range] Range to parse.
|
50
|
+
# @param array_size [Integer] size of target Array to use in a endless Range.
|
51
|
+
# @return [Array<Integer, Symbol, String>] parsed Array.
|
52
|
+
def parse_range(range, array_size)
|
53
|
+
bg = range.begin
|
54
|
+
en = range.end
|
55
|
+
if [bg, en].any?(Integer)
|
56
|
+
bg += array_size if bg&.negative?
|
57
|
+
en += array_size if en&.negative?
|
58
|
+
en -= 1 if en.is_a?(Integer) && range.exclude_end?
|
59
|
+
if bg&.negative? || (en && en >= array_size)
|
60
|
+
raise IndexError, "Index out of range: #{range} for 0..#{array_size - 1}"
|
43
61
|
end
|
44
|
-
|
45
|
-
|
62
|
+
|
63
|
+
Array(0...array_size)[range]
|
64
|
+
elsif bg.nil?
|
65
|
+
raise DataFrameArgumentError, "Cannot use beginless Range: #{range}"
|
66
|
+
elsif en.nil?
|
67
|
+
raise DataFrameArgumentError, "Cannot use endless Range: #{range}"
|
46
68
|
else
|
47
|
-
Array
|
69
|
+
Array(range)
|
48
70
|
end
|
49
71
|
end
|
50
72
|
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# Add additional capabilities to Hash
|
5
|
+
module RefineHash
|
6
|
+
refine Hash do
|
7
|
+
# Convert self to an Arrow::Table
|
8
|
+
def to_arrow
|
9
|
+
Arrow::Table.new(self)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# Add additional capabilities to Array-like classes
|
15
|
+
module RefineArrayLike
|
16
|
+
refine Array do
|
17
|
+
def to_arrow_array
|
18
|
+
Arrow::Array.new(self)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
refine Range do
|
23
|
+
def to_arrow_array
|
24
|
+
Arrow::Array.new(Array(self))
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# common methods for Arrow::Array and Arrow::ChunkedArray
|
29
|
+
# Refinement#include is deprecated and will be removed in Ruby 3.2
|
30
|
+
refine Arrow::Array do
|
31
|
+
def to_arrow_array
|
32
|
+
self
|
33
|
+
end
|
34
|
+
|
35
|
+
def type_class
|
36
|
+
value_data_type.class
|
37
|
+
end
|
38
|
+
|
39
|
+
def boolean?
|
40
|
+
value_data_type.instance_of?(Arrow::BooleanDataType)
|
41
|
+
end
|
42
|
+
|
43
|
+
def numeric?
|
44
|
+
value_data_type.class < Arrow::NumericDataType
|
45
|
+
end
|
46
|
+
|
47
|
+
def float?
|
48
|
+
value_data_type.class < Arrow::FloatingPointDataType
|
49
|
+
end
|
50
|
+
|
51
|
+
def integer?
|
52
|
+
value_data_type.class < Arrow::IntegerDataType
|
53
|
+
end
|
54
|
+
|
55
|
+
def list?
|
56
|
+
is_a? Arrow::ListArray
|
57
|
+
end
|
58
|
+
|
59
|
+
def unsigned_integer?
|
60
|
+
value_data_type.instance_of?(Arrow::UInt8DataType) ||
|
61
|
+
value_data_type.instance_of?(Arrow::UInt16DataType) ||
|
62
|
+
value_data_type.instance_of?(Arrow::UInt32DataType) ||
|
63
|
+
value_data_type.instance_of?(Arrow::UInt64DataType)
|
64
|
+
end
|
65
|
+
|
66
|
+
def string?
|
67
|
+
value_data_type.instance_of?(Arrow::StringDataType)
|
68
|
+
end
|
69
|
+
|
70
|
+
def dictionary?
|
71
|
+
value_data_type.instance_of?(Arrow::DictionaryDataType)
|
72
|
+
end
|
73
|
+
|
74
|
+
def temporal?
|
75
|
+
value_data_type.class < Arrow::TemporalDataType
|
76
|
+
end
|
77
|
+
|
78
|
+
def primitive_invert
|
79
|
+
n = Arrow::Function.find(:is_null).execute([self])
|
80
|
+
i = Arrow::Function.find(:if_else).execute([n, false, self])
|
81
|
+
Arrow::Function.find(:invert).execute([i]).value
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
refine Arrow::ChunkedArray do
|
86
|
+
def to_arrow_array
|
87
|
+
self
|
88
|
+
end
|
89
|
+
|
90
|
+
def type_class
|
91
|
+
value_data_type.class
|
92
|
+
end
|
93
|
+
|
94
|
+
def boolean?
|
95
|
+
value_data_type.instance_of?(Arrow::BooleanDataType)
|
96
|
+
end
|
97
|
+
|
98
|
+
def numeric?
|
99
|
+
value_data_type.class < Arrow::NumericDataType
|
100
|
+
end
|
101
|
+
|
102
|
+
def float?
|
103
|
+
value_data_type.class < Arrow::FloatingPointDataType
|
104
|
+
end
|
105
|
+
|
106
|
+
def integer?
|
107
|
+
value_data_type.class < Arrow::IntegerDataType
|
108
|
+
end
|
109
|
+
|
110
|
+
def unsigned_integer?
|
111
|
+
value_data_type.instance_of?(Arrow::UInt8DataType) ||
|
112
|
+
value_data_type.instance_of?(Arrow::UInt16DataType) ||
|
113
|
+
value_data_type.instance_of?(Arrow::UInt32DataType) ||
|
114
|
+
value_data_type.instance_of?(Arrow::UInt64DataType)
|
115
|
+
end
|
116
|
+
|
117
|
+
def string?
|
118
|
+
value_data_type.instance_of?(Arrow::StringDataType)
|
119
|
+
end
|
120
|
+
|
121
|
+
def dictionary?
|
122
|
+
value_data_type.instance_of?(Arrow::DictionaryDataType)
|
123
|
+
end
|
124
|
+
|
125
|
+
def temporal?
|
126
|
+
value_data_type.class < Arrow::TemporalDataType
|
127
|
+
end
|
128
|
+
|
129
|
+
def list?
|
130
|
+
value_type.nick == 'list'
|
131
|
+
end
|
132
|
+
|
133
|
+
def primitive_invert
|
134
|
+
n = Arrow::Function.find(:is_null).execute([self])
|
135
|
+
i = Arrow::Function.find(:if_else).execute([n, false, self])
|
136
|
+
Arrow::Function.find(:invert).execute([i]).value
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Add additional capabilities to Arrow::Table
|
142
|
+
module RefineArrowTable
|
143
|
+
refine Arrow::Table do
|
144
|
+
def keys
|
145
|
+
columns.map(&:name)
|
146
|
+
end
|
147
|
+
|
148
|
+
def key?(key)
|
149
|
+
keys.include?(key)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# Add additional capabilities to Array
|
155
|
+
module RefineArray
|
156
|
+
refine Array do
|
157
|
+
def integers?
|
158
|
+
all? { |e| e.is_a?(Integer) } # rubocop:disable Performance/RedundantEqualityComparisonBlock
|
159
|
+
end
|
160
|
+
|
161
|
+
def booleans?
|
162
|
+
all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
|
163
|
+
end
|
164
|
+
|
165
|
+
def symbols?
|
166
|
+
all? { |e| e.is_a?(Symbol) } # rubocop:disable Performance/RedundantEqualityComparisonBlock
|
167
|
+
end
|
168
|
+
|
169
|
+
def strings?
|
170
|
+
all? { |e| e.is_a?(String) } # rubocop:disable Performance/RedundantEqualityComparisonBlock
|
171
|
+
end
|
172
|
+
|
173
|
+
def symbols_or_strings?
|
174
|
+
all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
|
175
|
+
end
|
176
|
+
|
177
|
+
# convert booleans to indices
|
178
|
+
def booleans_to_indices
|
179
|
+
(0...size).select.with_index { |_, i| self[i] }
|
180
|
+
end
|
181
|
+
|
182
|
+
# select elements by booleans
|
183
|
+
def select_by_booleans(booleans)
|
184
|
+
select.with_index { |_, i| booleans[i] }
|
185
|
+
end
|
186
|
+
|
187
|
+
# reject elements by booleans
|
188
|
+
def reject_by_booleans(booleans)
|
189
|
+
reject.with_index { |_, i| booleans[i] }
|
190
|
+
end
|
191
|
+
|
192
|
+
# reject elements by indices
|
193
|
+
# notice: order by indices is not considered.
|
194
|
+
def reject_by_indices(indices)
|
195
|
+
reject.with_index { |_, i| indices.include?(i) || indices.include?(i - size) }
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|