red_amber 0.2.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +133 -51
- data/.yardopts +2 -0
- data/CHANGELOG.md +203 -1
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +61 -45
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +63 -0
- data/doc/DataFrame.md +35 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +295 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +537 -68
- data/lib/red_amber/data_frame_combinable.rb +776 -123
- data/lib/red_amber/data_frame_displayable.rb +248 -18
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +81 -10
- data/lib/red_amber/data_frame_reshaping.rb +216 -21
- data/lib/red_amber/data_frame_selectable.rb +781 -120
- data/lib/red_amber/data_frame_variable_operation.rb +561 -85
- data/lib/red_amber/group.rb +195 -21
- data/lib/red_amber/helper.rb +114 -32
- data/lib/red_amber/refinements.rb +206 -0
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +435 -58
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +321 -69
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +397 -24
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +15 -1
- data/red_amber.gemspec +4 -3
- metadata +19 -11
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -294
data/lib/red_amber/group.rb
CHANGED
@@ -1,14 +1,72 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Group class
|
5
5
|
class Group
|
6
6
|
include Enumerable # This feature is experimental
|
7
7
|
|
8
|
+
using RefineArrowTable
|
9
|
+
|
10
|
+
# Source DataFrame.
|
11
|
+
#
|
12
|
+
# @return [DataFrame]
|
13
|
+
# source DataFrame.
|
14
|
+
#
|
15
|
+
attr_reader :dataframe
|
16
|
+
|
17
|
+
# Keys for grouping by value.
|
18
|
+
#
|
19
|
+
# @return [Array]
|
20
|
+
# group keys.
|
21
|
+
#
|
22
|
+
attr_reader :group_keys
|
23
|
+
|
24
|
+
class << self
|
25
|
+
private
|
26
|
+
|
27
|
+
# @!macro [attach] define_group_aggregation
|
28
|
+
# @!method $1(*summary_keys)
|
29
|
+
# Group aggregation function `$1`.
|
30
|
+
# @param summary_keys [Array<Symbol, String>]
|
31
|
+
# summary keys.
|
32
|
+
# @return [DataFrame]
|
33
|
+
# aggregated DataFrame
|
34
|
+
#
|
35
|
+
def define_group_aggregation(function)
|
36
|
+
define_method(function) do |*summary_keys|
|
37
|
+
summary_keys = Array(summary_keys).flatten
|
38
|
+
d = summary_keys - @dataframe.keys
|
39
|
+
unless summary_keys.empty? || d.empty?
|
40
|
+
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}."
|
41
|
+
end
|
42
|
+
|
43
|
+
table = @group.aggregate(*build_aggregation_keys("hash_#{function}",
|
44
|
+
summary_keys))
|
45
|
+
g = @group_keys.map(&:to_s)
|
46
|
+
DataFrame.new(table[g + (table.keys - g)])
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
8
51
|
# Creates a new Group object.
|
9
52
|
#
|
10
|
-
# @param dataframe [DataFrame]
|
11
|
-
#
|
53
|
+
# @param dataframe [DataFrame]
|
54
|
+
# dataframe to be grouped.
|
55
|
+
# @param group_keys [Array<Symbol, String>]
|
56
|
+
# keys for grouping.
|
57
|
+
# @return [Group]
|
58
|
+
# Group object.
|
59
|
+
# @example
|
60
|
+
# Group.new(penguins, :species)
|
61
|
+
#
|
62
|
+
# # =>
|
63
|
+
# #<RedAmber::Group : 0x000000000000f410>
|
64
|
+
# species group_count
|
65
|
+
# <string> <uint8>
|
66
|
+
# 0 Adelie 152
|
67
|
+
# 1 Chinstrap 68
|
68
|
+
# 2 Gentoo 124
|
69
|
+
#
|
12
70
|
def initialize(dataframe, *group_keys)
|
13
71
|
@dataframe = dataframe
|
14
72
|
@group_keys = group_keys.flatten
|
@@ -18,25 +76,10 @@ module RedAmber
|
|
18
76
|
d = @group_keys - @dataframe.keys
|
19
77
|
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless d.empty?
|
20
78
|
|
21
|
-
@filters = @group_counts = @base_table = nil
|
22
79
|
@group = @dataframe.table.group(*@group_keys)
|
23
80
|
end
|
24
81
|
|
25
|
-
|
26
|
-
|
27
|
-
functions = %i[count sum product mean min max stddev variance]
|
28
|
-
functions.each do |function|
|
29
|
-
define_method(function) do |*summary_keys|
|
30
|
-
summary_keys = Array(summary_keys).flatten
|
31
|
-
d = summary_keys - @dataframe.keys
|
32
|
-
raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
|
33
|
-
|
34
|
-
table = @group.aggregate(*build_aggregation_keys("hash_#{function}", summary_keys))
|
35
|
-
df = DataFrame.new(table)
|
36
|
-
df.pick(@group_keys, df.keys - @group_keys)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
82
|
+
define_group_aggregation(:count)
|
40
83
|
alias_method :__count, :count
|
41
84
|
private :__count
|
42
85
|
|
@@ -50,6 +93,26 @@ module RedAmber
|
|
50
93
|
end
|
51
94
|
end
|
52
95
|
|
96
|
+
define_group_aggregation(:sum)
|
97
|
+
|
98
|
+
define_group_aggregation(:product)
|
99
|
+
|
100
|
+
define_group_aggregation(:mean)
|
101
|
+
|
102
|
+
define_group_aggregation(:min)
|
103
|
+
|
104
|
+
define_group_aggregation(:max)
|
105
|
+
|
106
|
+
define_group_aggregation(:stddev)
|
107
|
+
|
108
|
+
define_group_aggregation(:variance)
|
109
|
+
|
110
|
+
# Returns Array of boolean filters to select each records in the Group.
|
111
|
+
#
|
112
|
+
# @api private
|
113
|
+
# @return [Array]
|
114
|
+
# an Array of boolean filter Vectors.
|
115
|
+
#
|
53
116
|
def filters
|
54
117
|
@filters ||= begin
|
55
118
|
first, *others = @group_keys.map do |key|
|
@@ -65,6 +128,25 @@ module RedAmber
|
|
65
128
|
end
|
66
129
|
end
|
67
130
|
|
131
|
+
# Iterates over each record group as a DataFrame or returns a Enumerator.
|
132
|
+
#
|
133
|
+
# @api private
|
134
|
+
# @overload each
|
135
|
+
# Returns a new Enumerator if no block given.
|
136
|
+
#
|
137
|
+
# @return [Enumerator]
|
138
|
+
# Enumerator of each group as a DataFrame.
|
139
|
+
#
|
140
|
+
# @overload each
|
141
|
+
# When a block given, passes each record group as a DataFrame to the block.
|
142
|
+
#
|
143
|
+
# @yieldparam df [DataFrame]
|
144
|
+
# passes each record group as a DataFrame by a block parameter.
|
145
|
+
# @yieldreturn [Object]
|
146
|
+
# evaluated result value from the block.
|
147
|
+
# @return [Integer]
|
148
|
+
# group size.
|
149
|
+
#
|
68
150
|
def each
|
69
151
|
filters
|
70
152
|
return enum_for(:each) unless block_given?
|
@@ -75,14 +157,98 @@ module RedAmber
|
|
75
157
|
@filters.size
|
76
158
|
end
|
77
159
|
|
160
|
+
# Returns each record group size as a DataFrame.
|
161
|
+
#
|
162
|
+
# @return [DataFrame]
|
163
|
+
# DataFrame consists of:
|
164
|
+
# - Group key columns.
|
165
|
+
# - Result columns by group aggregation.
|
166
|
+
# @example
|
167
|
+
# penguins.group(:species).group_count
|
168
|
+
#
|
169
|
+
# # =>
|
170
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003a70>
|
171
|
+
# species group_count
|
172
|
+
# <string> <uint8>
|
173
|
+
# 0 Adelie 152
|
174
|
+
# 1 Chinstrap 68
|
175
|
+
# 2 Gentoo 124
|
176
|
+
#
|
78
177
|
def group_count
|
79
|
-
DataFrame.
|
178
|
+
DataFrame.create(add_columns_to_table(base_table, [:group_count], [group_counts]))
|
80
179
|
end
|
81
180
|
|
181
|
+
# String representation of self.
|
182
|
+
#
|
183
|
+
# @return [String]
|
184
|
+
# show information of self as a String.
|
185
|
+
# @example
|
186
|
+
# puts penguins.group(:species).inspect
|
187
|
+
#
|
188
|
+
# # =>
|
189
|
+
# #<RedAmber::Group : 0x0000000000003a98>
|
190
|
+
# species group_count
|
191
|
+
# <string> <uint8>
|
192
|
+
# 0 Adelie 152
|
193
|
+
# 1 Chinstrap 68
|
194
|
+
# 2 Gentoo 124
|
195
|
+
#
|
82
196
|
def inspect
|
83
197
|
"#<#{self.class} : #{format('0x%016x', object_id)}>\n#{group_count}"
|
84
198
|
end
|
85
199
|
|
200
|
+
# Summarize Group by aggregation functions from the block.
|
201
|
+
#
|
202
|
+
# @yieldparam group [Group]
|
203
|
+
# passes group object self.
|
204
|
+
# @yieldreturn [DataFrame, Array<DataFrame>]
|
205
|
+
# an aggregated DataFrame or an array of aggregated DataFrames.
|
206
|
+
# @return [DataFrame]
|
207
|
+
# summarized DataFrame.
|
208
|
+
# @example Single function and single variable
|
209
|
+
# group = penguins.group(:species)
|
210
|
+
# group
|
211
|
+
#
|
212
|
+
# # =>
|
213
|
+
# #<RedAmber::Group : 0x000000000000c314>
|
214
|
+
# species group_count
|
215
|
+
# <string> <uint8>
|
216
|
+
# 0 Adelie 152
|
217
|
+
# 1 Chinstrap 68
|
218
|
+
# 2 Gentoo 124
|
219
|
+
#
|
220
|
+
# group.summarize { mean(:bill_length_mm) }
|
221
|
+
#
|
222
|
+
# # =>
|
223
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c364>
|
224
|
+
# species mean(bill_length_mm)
|
225
|
+
# <string> <double>
|
226
|
+
# 0 Adelie 38.79
|
227
|
+
# 1 Chinstrap 48.83
|
228
|
+
# 2 Gentoo 47.5
|
229
|
+
#
|
230
|
+
# @example Single function only
|
231
|
+
# group.summarize { mean }
|
232
|
+
#
|
233
|
+
# # =>
|
234
|
+
# #<RedAmber::DataFrame : 3 x 6 Vectors, 0x000000000000c350>
|
235
|
+
# species mean(bill_length_mm) mean(bill_depth_mm) ... mean(year)
|
236
|
+
# <string> <double> <double> ... <double>
|
237
|
+
# 0 Adelie 38.79 18.35 ... 2008.01
|
238
|
+
# 1 Chinstrap 48.83 18.42 ... 2007.97
|
239
|
+
# 2 Gentoo 47.5 14.98 ... 2008.08
|
240
|
+
#
|
241
|
+
# @example Multiple functions
|
242
|
+
# group.summarize { [min(:bill_length_mm), max(:bill_length_mm)] }
|
243
|
+
#
|
244
|
+
# # =>
|
245
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000c378>
|
246
|
+
# species min(bill_length_mm) max(bill_length_mm)
|
247
|
+
# <string> <double> <double>
|
248
|
+
# 0 Adelie 32.1 46.0
|
249
|
+
# 1 Chinstrap 40.9 58.0
|
250
|
+
# 2 Gentoo 40.9 59.6
|
251
|
+
#
|
86
252
|
def summarize(&block)
|
87
253
|
agg = instance_eval(&block)
|
88
254
|
case agg
|
@@ -95,6 +261,14 @@ module RedAmber
|
|
95
261
|
end
|
96
262
|
end
|
97
263
|
|
264
|
+
# Aggregating summary.
|
265
|
+
#
|
266
|
+
# @api private
|
267
|
+
#
|
268
|
+
def agg_sum(*summary_keys)
|
269
|
+
call_aggregating_function(:sum, summary_keys, _options = nil)
|
270
|
+
end
|
271
|
+
|
98
272
|
private
|
99
273
|
|
100
274
|
def build_aggregation_keys(function_name, summary_keys)
|
@@ -105,7 +279,7 @@ module RedAmber
|
|
105
279
|
end
|
106
280
|
end
|
107
281
|
|
108
|
-
# @group_counts.sum == @dataframe.size
|
282
|
+
# @note `@group_counts.sum == @dataframe.size``
|
109
283
|
def group_counts
|
110
284
|
@group_counts ||= filters.map(&:sum)
|
111
285
|
end
|
data/lib/red_amber/helper.rb
CHANGED
@@ -1,51 +1,133 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Mix-in for the class DataFrame
|
5
5
|
module Helper
|
6
6
|
private
|
7
7
|
|
8
|
+
# If num is larger than 1 return 's' to be plural.
|
9
|
+
#
|
10
|
+
# @param num [Numeric]
|
11
|
+
# some number.
|
12
|
+
# @return ['s', '']
|
13
|
+
# return 's' if num is larger than 1.
|
14
|
+
# Otherwise return ''.
|
15
|
+
#
|
8
16
|
def pl(num)
|
9
17
|
num > 1 ? 's' : ''
|
10
18
|
end
|
11
19
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
20
|
+
# Parse the argments in an Array and returns a parsed Array.
|
21
|
+
#
|
22
|
+
# @param args
|
23
|
+
# [<Integer, Symbol, true, false, nil, Array, Range, Enumerator, String, Float>]
|
24
|
+
# arguments.
|
25
|
+
# @param array_size [Integer]
|
26
|
+
# size of target Array to use in a endless Range.
|
27
|
+
# @return [<Integer, Symbol, true, false, nil>]
|
28
|
+
# parsed flat Array.
|
29
|
+
# @note This method is recursively called to parse.
|
30
|
+
#
|
31
|
+
def parse_args(args, array_size)
|
32
|
+
args.flat_map do |elem|
|
33
|
+
case elem
|
34
|
+
when Integer, Symbol, NilClass, TrueClass, FalseClass
|
35
|
+
elem
|
36
|
+
when Array
|
37
|
+
parse_args(elem, array_size)
|
38
|
+
when Range
|
39
|
+
parse_range(elem, array_size)
|
40
|
+
when Enumerator
|
41
|
+
parse_args(Array(elem), array_size)
|
42
|
+
when String
|
43
|
+
elem.to_sym
|
44
|
+
when Float
|
45
|
+
elem.floor.to_i
|
46
|
+
else
|
47
|
+
Array(elem)
|
48
|
+
end
|
19
49
|
end
|
20
|
-
Vector.new(a)
|
21
50
|
end
|
22
51
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
else
|
42
|
-
Array(elem)
|
52
|
+
# Parse a Range to an Array
|
53
|
+
#
|
54
|
+
# @param range [Range]
|
55
|
+
# range to parse.
|
56
|
+
# @param array_size [Integer]
|
57
|
+
# size of target Array to use in a endless Range.
|
58
|
+
# @return [Array<Integer, Symbol, String>]
|
59
|
+
# parsed Array.
|
60
|
+
#
|
61
|
+
def parse_range(range, array_size)
|
62
|
+
bg = range.begin
|
63
|
+
en = range.end
|
64
|
+
if [bg, en].any?(Integer)
|
65
|
+
bg += array_size if bg&.negative?
|
66
|
+
en += array_size if en&.negative?
|
67
|
+
en -= 1 if en.is_a?(Integer) && range.exclude_end?
|
68
|
+
if bg&.negative? || (en && en >= array_size)
|
69
|
+
raise IndexError, "Index out of range: #{range} for 0..#{array_size - 1}"
|
43
70
|
end
|
44
|
-
|
45
|
-
|
71
|
+
|
72
|
+
Array(0...array_size)[range]
|
73
|
+
elsif bg.nil?
|
74
|
+
raise DataFrameArgumentError, "Cannot use beginless Range: #{range}"
|
75
|
+
elsif en.nil?
|
76
|
+
raise DataFrameArgumentError, "Cannot use endless Range: #{range}"
|
46
77
|
else
|
47
|
-
Array
|
78
|
+
Array(range)
|
48
79
|
end
|
49
80
|
end
|
50
81
|
end
|
82
|
+
|
83
|
+
# rubocop:disable Layout/LineLength
|
84
|
+
|
85
|
+
# Helper for Arrow Functions
|
86
|
+
module ArrowFunction
|
87
|
+
module_function
|
88
|
+
|
89
|
+
# Find Arrow's compute function.
|
90
|
+
#
|
91
|
+
# {https://arrow.apache.org/docs/cpp/compute.html}
|
92
|
+
# @param function_name [Symbol]
|
93
|
+
# function name.
|
94
|
+
# @return [Arrow::Function]
|
95
|
+
# arrow compute function object.
|
96
|
+
# @example
|
97
|
+
# RedAmber::ArrowFunction.find(:array_sort_indices)
|
98
|
+
#
|
99
|
+
# # =>
|
100
|
+
# #<Arrow::Function:0x7fa8838a0d80 ptr=0x7fa87e9b7320 array_sort_indices(array, {order=Ascending, null_placement=AtEnd}): Return the indices that would sort an array>
|
101
|
+
#
|
102
|
+
def find(function_name)
|
103
|
+
Arrow::Function.find(function_name)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Show document of Arrow's compute function.
|
107
|
+
#
|
108
|
+
# @param function_name [Symbol]
|
109
|
+
# function name.
|
110
|
+
# @return [String]
|
111
|
+
# document of compute function object.
|
112
|
+
# @example
|
113
|
+
# puts RedAmber::ArrowFunction.arrow_doc(:array_sort_indices)
|
114
|
+
#
|
115
|
+
# # =>
|
116
|
+
# array_sort_indices(array, {order=Ascending, null_placement=AtEnd}): Return the indices that would sort an array
|
117
|
+
# ------------------
|
118
|
+
# This function computes an array of indices that define a stable sort
|
119
|
+
# of the input array. By default, Null values are considered greater
|
120
|
+
# than any other value and are therefore sorted at the end of the array.
|
121
|
+
# For floating-point types, NaNs are considered greater than any
|
122
|
+
# other non-null value, but smaller than null values.
|
123
|
+
#
|
124
|
+
# The handling of nulls and NaNs can be changed in ArraySortOptions.
|
125
|
+
#
|
126
|
+
def arrow_doc(function_name)
|
127
|
+
f = find(function_name)
|
128
|
+
"#{f}\n#{'-' * function_name.size}\n#{f.doc.description}"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# rubocop:enable Layout/LineLength
|
51
133
|
end
|
@@ -0,0 +1,206 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Namespace of RedAmber
|
4
|
+
module RedAmber
|
5
|
+
# Add additional capabilities to Hash
|
6
|
+
module RefineHash
|
7
|
+
refine Hash do
|
8
|
+
# Convert self to an Arrow::Table
|
9
|
+
def to_arrow
|
10
|
+
Arrow::Table.new(self)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# Add additional capabilities to Array-like classes
|
16
|
+
module RefineArrayLike
|
17
|
+
refine Array do
|
18
|
+
def to_arrow_array
|
19
|
+
Arrow::Array.new(self)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
refine Range do
|
24
|
+
def to_arrow_array
|
25
|
+
Arrow::Array.new(Array(self))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# common methods for Arrow::Array and Arrow::ChunkedArray
|
30
|
+
# Refinement#include is deprecated and will be removed in Ruby 3.2
|
31
|
+
refine Arrow::Array do
|
32
|
+
def to_arrow_array
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
def type_class
|
37
|
+
value_data_type.class
|
38
|
+
end
|
39
|
+
|
40
|
+
def boolean?
|
41
|
+
value_data_type.instance_of?(Arrow::BooleanDataType)
|
42
|
+
end
|
43
|
+
|
44
|
+
def numeric?
|
45
|
+
value_data_type.class < Arrow::NumericDataType
|
46
|
+
end
|
47
|
+
|
48
|
+
def float?
|
49
|
+
value_data_type.class < Arrow::FloatingPointDataType
|
50
|
+
end
|
51
|
+
|
52
|
+
def integer?
|
53
|
+
value_data_type.class < Arrow::IntegerDataType
|
54
|
+
end
|
55
|
+
|
56
|
+
def list?
|
57
|
+
is_a? Arrow::ListArray
|
58
|
+
end
|
59
|
+
|
60
|
+
def unsigned_integer?
|
61
|
+
value_data_type.instance_of?(Arrow::UInt8DataType) ||
|
62
|
+
value_data_type.instance_of?(Arrow::UInt16DataType) ||
|
63
|
+
value_data_type.instance_of?(Arrow::UInt32DataType) ||
|
64
|
+
value_data_type.instance_of?(Arrow::UInt64DataType)
|
65
|
+
end
|
66
|
+
|
67
|
+
def string?
|
68
|
+
value_data_type.instance_of?(Arrow::StringDataType)
|
69
|
+
end
|
70
|
+
|
71
|
+
def dictionary?
|
72
|
+
value_data_type.instance_of?(Arrow::DictionaryDataType)
|
73
|
+
end
|
74
|
+
|
75
|
+
def temporal?
|
76
|
+
value_data_type.class < Arrow::TemporalDataType
|
77
|
+
end
|
78
|
+
|
79
|
+
def primitive_invert
|
80
|
+
n = Arrow::Function.find(:is_null).execute([self])
|
81
|
+
i = Arrow::Function.find(:if_else).execute([n, false, self])
|
82
|
+
Arrow::Function.find(:invert).execute([i]).value
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
refine Arrow::ChunkedArray do
|
87
|
+
def to_arrow_array
|
88
|
+
self
|
89
|
+
end
|
90
|
+
|
91
|
+
def type_class
|
92
|
+
value_data_type.class
|
93
|
+
end
|
94
|
+
|
95
|
+
def boolean?
|
96
|
+
value_data_type.instance_of?(Arrow::BooleanDataType)
|
97
|
+
end
|
98
|
+
|
99
|
+
def numeric?
|
100
|
+
value_data_type.class < Arrow::NumericDataType
|
101
|
+
end
|
102
|
+
|
103
|
+
def float?
|
104
|
+
value_data_type.class < Arrow::FloatingPointDataType
|
105
|
+
end
|
106
|
+
|
107
|
+
def integer?
|
108
|
+
value_data_type.class < Arrow::IntegerDataType
|
109
|
+
end
|
110
|
+
|
111
|
+
def unsigned_integer?
|
112
|
+
value_data_type.instance_of?(Arrow::UInt8DataType) ||
|
113
|
+
value_data_type.instance_of?(Arrow::UInt16DataType) ||
|
114
|
+
value_data_type.instance_of?(Arrow::UInt32DataType) ||
|
115
|
+
value_data_type.instance_of?(Arrow::UInt64DataType)
|
116
|
+
end
|
117
|
+
|
118
|
+
def string?
|
119
|
+
value_data_type.instance_of?(Arrow::StringDataType)
|
120
|
+
end
|
121
|
+
|
122
|
+
def dictionary?
|
123
|
+
value_data_type.instance_of?(Arrow::DictionaryDataType)
|
124
|
+
end
|
125
|
+
|
126
|
+
def temporal?
|
127
|
+
value_data_type.class < Arrow::TemporalDataType
|
128
|
+
end
|
129
|
+
|
130
|
+
def list?
|
131
|
+
value_type.nick == 'list'
|
132
|
+
end
|
133
|
+
|
134
|
+
def primitive_invert
|
135
|
+
n = Arrow::Function.find(:is_null).execute([self])
|
136
|
+
i = Arrow::Function.find(:if_else).execute([n, false, self])
|
137
|
+
Arrow::Function.find(:invert).execute([i]).value
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Add additional capabilities to Arrow::Table
|
143
|
+
module RefineArrowTable
|
144
|
+
refine Arrow::Table do
|
145
|
+
def keys
|
146
|
+
columns.map(&:name)
|
147
|
+
end
|
148
|
+
|
149
|
+
def key?(key)
|
150
|
+
keys.include?(key)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
# Add additional capabilities to Array
|
156
|
+
module RefineArray
|
157
|
+
refine Array do
|
158
|
+
def integer?
|
159
|
+
all? { |e| e.is_a?(Integer) } # rubocop:disable Performance/RedundantEqualityComparisonBlock
|
160
|
+
end
|
161
|
+
|
162
|
+
def numeric?
|
163
|
+
all? { |e| e.is_a?(Numeric) } # rubocop:disable Performance/RedundantEqualityComparisonBlock
|
164
|
+
end
|
165
|
+
|
166
|
+
def boolean?
|
167
|
+
all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
|
168
|
+
end
|
169
|
+
|
170
|
+
def symbol?
|
171
|
+
all? { |e| e.is_a?(Symbol) } # rubocop:disable Performance/RedundantEqualityComparisonBlock
|
172
|
+
end
|
173
|
+
|
174
|
+
def string?
|
175
|
+
all? { |e| e.is_a?(String) } # rubocop:disable Performance/RedundantEqualityComparisonBlock
|
176
|
+
end
|
177
|
+
|
178
|
+
def symbol_or_string?
|
179
|
+
all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
|
180
|
+
end
|
181
|
+
|
182
|
+
# convert booleans to indices
|
183
|
+
def booleans_to_indices
|
184
|
+
(0...size).select.with_index { |_, i| self[i] }
|
185
|
+
end
|
186
|
+
|
187
|
+
# select elements by booleans
|
188
|
+
def select_by_booleans(booleans)
|
189
|
+
select.with_index { |_, i| booleans[i] }
|
190
|
+
end
|
191
|
+
|
192
|
+
# reject elements by booleans
|
193
|
+
def reject_by_booleans(booleans)
|
194
|
+
reject.with_index { |_, i| booleans[i] }
|
195
|
+
end
|
196
|
+
|
197
|
+
# reject elements by indices
|
198
|
+
# notice: order by indices is not considered.
|
199
|
+
def reject_by_indices(indices)
|
200
|
+
reject.with_index { |_, i| indices.include?(i) || indices.include?(i - size) }
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
private_constant :RefineArray, :RefineArrayLike, :RefineArrowTable, :RefineHash
|
206
|
+
end
|