red_amber 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +56 -22
- data/.yardopts +2 -0
- data/CHANGELOG.md +178 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +29 -30
- data/benchmark/basic.yml +7 -7
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -3
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +454 -85
- data/lib/red_amber/data_frame_combinable.rb +609 -115
- data/lib/red_amber/data_frame_displayable.rb +313 -34
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +623 -70
- data/lib/red_amber/data_frame_variable_operation.rb +452 -35
- data/lib/red_amber/group.rb +186 -22
- data/lib/red_amber/helper.rb +74 -14
- data/lib/red_amber/refinements.rb +26 -6
- data/lib/red_amber/subframes.rb +1101 -0
- data/lib/red_amber/vector.rb +362 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +506 -0
- data/lib/red_amber/vector_selectable.rb +265 -23
- data/lib/red_amber/vector_unary_element_wise.rb +529 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
@@ -4,7 +4,7 @@
|
|
4
4
|
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
5
|
|
6
6
|
module RedAmber
|
7
|
-
#
|
7
|
+
# Mix-in for class Vector
|
8
8
|
# Functions to select some data.
|
9
9
|
module VectorSelectable
|
10
10
|
using RefineArray
|
@@ -12,11 +12,14 @@ module RedAmber
|
|
12
12
|
|
13
13
|
# Select elements in the self by indices.
|
14
14
|
#
|
15
|
-
# @param indices [Array<Numeric>, Vector]
|
16
|
-
#
|
17
|
-
# @
|
15
|
+
# @param indices [Array<Numeric>, Vector]
|
16
|
+
# an array-like of indices.
|
17
|
+
# @yieldreturn [Array<Numeric>, Vector]
|
18
|
+
# an array-like of indices from the block.
|
19
|
+
# @return [Vector]
|
20
|
+
# vector by selected elements.
|
18
21
|
#
|
19
|
-
#
|
22
|
+
# TODO: support for the option `boundscheck: true`
|
20
23
|
def take(*indices, &block)
|
21
24
|
if block
|
22
25
|
unless indices.empty?
|
@@ -47,11 +50,14 @@ module RedAmber
|
|
47
50
|
|
48
51
|
# Select elements in the self by booleans.
|
49
52
|
#
|
50
|
-
# @param booleans [Array<true, false, nil>, Vector]
|
51
|
-
#
|
52
|
-
# @
|
53
|
+
# @param booleans [Array<true, false, nil>, Vector]
|
54
|
+
# an array-like of booleans.
|
55
|
+
# @yieldreturn [Array<true, false, nil>, Vector]
|
56
|
+
# an array-like of booleans from the block.
|
57
|
+
# @return [Vector]
|
58
|
+
# vector by selected elements.
|
53
59
|
#
|
54
|
-
#
|
60
|
+
# TODO: support for the option `null_selection_behavior: :drop`
|
55
61
|
def filter(*booleans, &block)
|
56
62
|
if block
|
57
63
|
unless booleans.empty?
|
@@ -87,9 +93,12 @@ module RedAmber
|
|
87
93
|
|
88
94
|
# Select elements in the self by indices or booleans.
|
89
95
|
#
|
90
|
-
# @param args [Array<Numeric, true, false, nil>, Vector]
|
91
|
-
#
|
92
|
-
# @
|
96
|
+
# @param args [Array<Numeric, true, false, nil>, Vector]
|
97
|
+
# specifier. Indices or booleans.
|
98
|
+
# @yieldparam [Array<Numeric, true, false, nil>, Vector]
|
99
|
+
# specifier. Indices or booleans.
|
100
|
+
# @return [scalar, Array]
|
101
|
+
# returns scalar or array.
|
93
102
|
#
|
94
103
|
def [](*args)
|
95
104
|
array =
|
@@ -119,31 +128,264 @@ module RedAmber
|
|
119
128
|
raise VectorArgumentError, "Invalid argument: #{args}"
|
120
129
|
end
|
121
130
|
|
122
|
-
#
|
131
|
+
# Check if elements of self are in the other values.
|
132
|
+
#
|
133
|
+
# @param values [Vector, Array, Arrow::Array, Arrow::ChunkedArray]
|
134
|
+
# values to test existence.
|
135
|
+
# @return [Vector]
|
136
|
+
# boolean Vector.
|
137
|
+
#
|
123
138
|
def is_in(*values)
|
124
|
-
|
125
|
-
|
126
|
-
array =
|
139
|
+
enum =
|
127
140
|
case values
|
128
|
-
in [
|
129
|
-
|
141
|
+
in [] | [[]] | [nil] |[[nil]]
|
142
|
+
return Vector.new([false] * size)
|
143
|
+
in [Vector | Arrow::Array | Arrow::ChunkedArray]
|
144
|
+
values[0].each
|
130
145
|
else
|
131
|
-
|
146
|
+
parse_args(values, size, symbolize: false)
|
132
147
|
end
|
133
|
-
|
134
|
-
Vector.create(self_data.is_in(array))
|
148
|
+
enum.filter_map { self == _1 unless _1.nil? }.reduce(&:|)
|
135
149
|
end
|
136
150
|
|
137
|
-
#
|
151
|
+
# Returns index of first matched position of element in self.
|
152
|
+
#
|
153
|
+
# @param element
|
154
|
+
# an element of self.
|
155
|
+
# @return [integer, nil]
|
156
|
+
# founded position of element. If it is not found, returns nil.
|
157
|
+
#
|
138
158
|
def index(element)
|
139
|
-
|
159
|
+
(0...size).find { |i| self[i] == element }
|
140
160
|
end
|
141
161
|
|
162
|
+
# Returns first element of self.
|
163
|
+
#
|
164
|
+
# @return
|
165
|
+
# the first element.
|
166
|
+
# @since 0.4.1
|
167
|
+
#
|
168
|
+
def first
|
169
|
+
data[0]
|
170
|
+
end
|
171
|
+
|
172
|
+
# Returns last element of self.
|
173
|
+
#
|
174
|
+
# @return
|
175
|
+
# the last element.
|
176
|
+
# @since 0.4.1
|
177
|
+
#
|
178
|
+
def last
|
179
|
+
data[-1]
|
180
|
+
end
|
181
|
+
|
182
|
+
# Drop nil in self and returns a new Vector as a result.
|
183
|
+
#
|
184
|
+
# @return [Vector]
|
185
|
+
# a Vector without nils.
|
186
|
+
#
|
142
187
|
def drop_nil
|
143
188
|
datum = find(:drop_null).execute([data])
|
144
189
|
Vector.create(datum.value)
|
145
190
|
end
|
146
191
|
|
192
|
+
# Arrange values in Vector.
|
193
|
+
#
|
194
|
+
# @param order [Symbol]
|
195
|
+
# sort order.
|
196
|
+
# - `:+`, `:ascending` or without argument will sort in increasing order.
|
197
|
+
# - `:-` or `:descending` will sort in decreasing order.
|
198
|
+
# @return [Vector]
|
199
|
+
# sorted Vector.
|
200
|
+
# @example Sort in increasing order (default)
|
201
|
+
# Vector.new(%w[B D A E C]).sort
|
202
|
+
# # same as #sort(:+)
|
203
|
+
# # same as #sort(:ascending)
|
204
|
+
#
|
205
|
+
# # =>
|
206
|
+
# #<RedAmber::Vector(:string, size=5):0x000000000000c134>
|
207
|
+
# ["A", "B", "C", "D", "E"]
|
208
|
+
#
|
209
|
+
# @example Sort in decreasing order
|
210
|
+
# Vector.new(%w[B D A E C]).sort(:-)
|
211
|
+
# # same as #sort(:descending)
|
212
|
+
#
|
213
|
+
# # =>
|
214
|
+
# #<RedAmber::Vector(:string, size=5):0x000000000000c148>
|
215
|
+
# ["E", "D", "C", "B", "A"]
|
216
|
+
#
|
217
|
+
# @since 0.4.0
|
218
|
+
#
|
219
|
+
def sort(order = :ascending)
|
220
|
+
order =
|
221
|
+
case order.to_sym
|
222
|
+
when :+, :ascending, :increasing
|
223
|
+
:ascending
|
224
|
+
when :-, :descending, :decreasing
|
225
|
+
:descending
|
226
|
+
else
|
227
|
+
raise VectorArgumentError, "illegal order option: #{order}"
|
228
|
+
end
|
229
|
+
take(sort_indices(order: order))
|
230
|
+
end
|
231
|
+
|
232
|
+
# Returns numerical rank of self.
|
233
|
+
# - Nil values are considered greater than any value.
|
234
|
+
# - NaN values are considered greater than any value but smaller than nil values.
|
235
|
+
# - Tiebreakers are ranked in order of appearance.
|
236
|
+
# - `RankOptions` in C++ function is not implemented in C GLib yet.
|
237
|
+
# This method is currently fixed to the default behavior.
|
238
|
+
#
|
239
|
+
# @return [Vector]
|
240
|
+
# 0-based rank of self (0...size in range).
|
241
|
+
# @example Rank of float Vector
|
242
|
+
# fv = Vector.new(0.1, nil, Float::NAN, 0.2, 0.1); fv
|
243
|
+
#
|
244
|
+
# # =>
|
245
|
+
# #<RedAmber::Vector(:double, size=5):0x000000000000c65c>
|
246
|
+
# [0.1, nil, NaN, 0.2, 0.1]
|
247
|
+
#
|
248
|
+
# fv.rank
|
249
|
+
#
|
250
|
+
# # =>
|
251
|
+
# #<RedAmber::Vector(:uint64, size=5):0x0000000000003868>
|
252
|
+
# [0, 4, 3, 2, 1]
|
253
|
+
#
|
254
|
+
# @example Rank of string Vector
|
255
|
+
# sv = Vector.new("A", "B", nil, "A", "C"); sv
|
256
|
+
#
|
257
|
+
# # =>
|
258
|
+
# #<RedAmber::Vector(:string, size=5):0x0000000000003854>
|
259
|
+
# ["A", "B", nil, "A", "C"]
|
260
|
+
#
|
261
|
+
# sv.rank
|
262
|
+
#
|
263
|
+
# # =>
|
264
|
+
# #<RedAmber::Vector(:uint64, size=5):0x0000000000003868>
|
265
|
+
# [0, 2, 4, 1, 3]
|
266
|
+
#
|
267
|
+
# @since 0.4.0
|
268
|
+
#
|
269
|
+
def rank
|
270
|
+
datum =
|
271
|
+
case data
|
272
|
+
when Arrow::ChunkedArray
|
273
|
+
Arrow::Function.find(:rank).execute([data.pack])
|
274
|
+
else
|
275
|
+
Arrow::Function.find(:rank).execute([data])
|
276
|
+
end
|
277
|
+
Vector.create(datum.value) - 1
|
278
|
+
end
|
279
|
+
|
280
|
+
# Pick up elements at random.
|
281
|
+
#
|
282
|
+
# @overload sample()
|
283
|
+
# Return a randomly selected element.
|
284
|
+
# This is one of an aggregation function.
|
285
|
+
#
|
286
|
+
# @return [scalar]
|
287
|
+
# one of an element in self.
|
288
|
+
# @example Sample a element
|
289
|
+
# v = Vector.new('A'..'H'); v
|
290
|
+
#
|
291
|
+
# # =>
|
292
|
+
# #<RedAmber::Vector(:string, size=8):0x0000000000011b20>
|
293
|
+
# ["A", "B", "C", "D", "E", "F", "G", "H"]
|
294
|
+
#
|
295
|
+
# v.sample
|
296
|
+
#
|
297
|
+
# # =>
|
298
|
+
# "C"
|
299
|
+
#
|
300
|
+
# @overload sample(n)
|
301
|
+
# Pick up n elements at random.
|
302
|
+
#
|
303
|
+
# @param n [Integer]
|
304
|
+
# positive number of elements to pick.
|
305
|
+
# If n is smaller or equal to size, elements are picked by non-repeating.
|
306
|
+
# If n is greater than `size`, elements are picked repeatedly.
|
307
|
+
# @return [Vector]
|
308
|
+
# sampled elements.
|
309
|
+
# If n == 1 (in case of `sample(1)`), it returns a Vector of size == 1
|
310
|
+
# not a scalar.
|
311
|
+
# @example Sample Vector in size 1
|
312
|
+
# v.sample(1)
|
313
|
+
#
|
314
|
+
# # =>
|
315
|
+
# #<RedAmber::Vector(:string, size=1):0x000000000001a3b0>
|
316
|
+
# ["H"]
|
317
|
+
#
|
318
|
+
# @example Sample same size of self: every element is picked in random order
|
319
|
+
# v.sample(8)
|
320
|
+
#
|
321
|
+
# # =>
|
322
|
+
# #<RedAmber::Vector(:string, size=8):0x000000000001bda0>
|
323
|
+
# ["H", "D", "B", "F", "E", "A", "G", "C"]
|
324
|
+
#
|
325
|
+
# @example Over sampling: "E" and "A" are sampled repeatedly
|
326
|
+
# v.sample(9)
|
327
|
+
#
|
328
|
+
# # =>
|
329
|
+
# #<RedAmber::Vector(:string, size=9):0x000000000001d790>
|
330
|
+
# ["E", "E", "A", "D", "H", "C", "A", "F", "H"]
|
331
|
+
#
|
332
|
+
# @overload sample(prop)
|
333
|
+
# Pick up elements by proportion `prop` at random.
|
334
|
+
#
|
335
|
+
# @param prop [Float]
|
336
|
+
# positive proportion of elements to pick.
|
337
|
+
# Absolute number of elements to pick:`prop*size` is rounded (by `half: :up``).
|
338
|
+
# If prop is smaller or equal to 1.0, elements are picked by non-repeating.
|
339
|
+
# If prop is greater than 1.0, some elements are picked repeatedly.
|
340
|
+
# @return [Vector]
|
341
|
+
# sampled elements.
|
342
|
+
# If picked element is only one, it returns a Vector of size == 1
|
343
|
+
# not a scalar.
|
344
|
+
# @example Sample same size of self: every element is picked in random order
|
345
|
+
# v.sample(1.0)
|
346
|
+
#
|
347
|
+
# # =>
|
348
|
+
# #<RedAmber::Vector(:string, size=8):0x000000000001bda0>
|
349
|
+
# ["D", "H", "F", "C", "A", "B", "E", "G"]
|
350
|
+
#
|
351
|
+
# @example 2 times over sampling
|
352
|
+
# v.sample(2.0)
|
353
|
+
#
|
354
|
+
# # =>
|
355
|
+
# #<RedAmber::Vector(:string, size=16):0x00000000000233e8>
|
356
|
+
# ["H", "B", "C", "B", "C", "A", "F", "A", "E", "C", "H", "F", "F", "A", ... ]
|
357
|
+
#
|
358
|
+
# @since 0.4.0
|
359
|
+
#
|
360
|
+
def sample(n_or_prop = nil)
|
361
|
+
require 'arrow-numo-narray'
|
362
|
+
|
363
|
+
return nil if size == 0
|
364
|
+
|
365
|
+
n_sample =
|
366
|
+
case n_or_prop
|
367
|
+
in Integer
|
368
|
+
n_or_prop
|
369
|
+
in Float
|
370
|
+
(n_or_prop * size).round
|
371
|
+
in nil
|
372
|
+
return to_a.sample
|
373
|
+
else
|
374
|
+
raise VectorArgumentError, "must specify Integer or Float but #{n_or_prop}"
|
375
|
+
end
|
376
|
+
if n_or_prop < 0
|
377
|
+
raise VectorArgumentError, '#sample does not accept negative number.'
|
378
|
+
end
|
379
|
+
return Vector.new([]) if n_sample == 0
|
380
|
+
|
381
|
+
over_sample = [8 * size, n_sample].max
|
382
|
+
over_size = n_sample > size ? n_sample / size * size * 2 : size
|
383
|
+
over_vector =
|
384
|
+
Vector.create(Numo::UInt32.new(over_size).rand(over_sample).to_arrow_array)
|
385
|
+
indices = over_vector.rank.take(*0...n_sample)
|
386
|
+
take(indices - ((indices / size) * size))
|
387
|
+
end
|
388
|
+
|
147
389
|
private
|
148
390
|
|
149
391
|
# Accepts indices by numeric Vector
|