red_amber 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +56 -22
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +29 -30
  8. data/benchmark/basic.yml +7 -7
  9. data/benchmark/combine.yml +3 -3
  10. data/benchmark/dataframe.yml +15 -9
  11. data/benchmark/group.yml +6 -6
  12. data/benchmark/reshape.yml +6 -6
  13. data/benchmark/vector.yml +6 -3
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +454 -85
  20. data/lib/red_amber/data_frame_combinable.rb +609 -115
  21. data/lib/red_amber/data_frame_displayable.rb +313 -34
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +623 -70
  26. data/lib/red_amber/data_frame_variable_operation.rb +452 -35
  27. data/lib/red_amber/group.rb +186 -22
  28. data/lib/red_amber/helper.rb +74 -14
  29. data/lib/red_amber/refinements.rb +26 -6
  30. data/lib/red_amber/subframes.rb +1101 -0
  31. data/lib/red_amber/vector.rb +362 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +506 -0
  34. data/lib/red_amber/vector_selectable.rb +265 -23
  35. data/lib/red_amber/vector_unary_element_wise.rb +529 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -4,7 +4,7 @@
4
4
  # reference: https://arrow.apache.org/docs/cpp/compute.html
5
5
 
6
6
  module RedAmber
7
- # mix-in for class Vector
7
+ # Mix-in for class Vector
8
8
  # Functions to select some data.
9
9
  module VectorSelectable
10
10
  using RefineArray
@@ -12,11 +12,14 @@ module RedAmber
12
12
 
13
13
  # Select elements in the self by indices.
14
14
  #
15
- # @param indices [Array<Numeric>, Vector] indices.
16
- # @yield [Array<Numeric>, Vector] indices.
17
- # @return [Vector] Vector by selected elements.
15
+ # @param indices [Array<Numeric>, Vector]
16
+ # an array-like of indices.
17
+ # @yieldreturn [Array<Numeric>, Vector]
18
+ # an array-like of indices from the block.
19
+ # @return [Vector]
20
+ # vector by selected elements.
18
21
  #
19
- # TODO: support for the option `boundscheck: true`
22
+ # TODO: support for the option `boundscheck: true`
20
23
  def take(*indices, &block)
21
24
  if block
22
25
  unless indices.empty?
@@ -47,11 +50,14 @@ module RedAmber
47
50
 
48
51
  # Select elements in the self by booleans.
49
52
  #
50
- # @param booleans [Array<true, false, nil>, Vector] booleans.
51
- # @yield [Array<true, false, nil>, Vector] booleans.
52
- # @return [Vector] Vector by selected elements.
53
+ # @param booleans [Array<true, false, nil>, Vector]
54
+ # an array-like of booleans.
55
+ # @yieldreturn [Array<true, false, nil>, Vector]
56
+ # an array-like of booleans from the block.
57
+ # @return [Vector]
58
+ # vector by selected elements.
53
59
  #
54
- # TODO: support for the option `null_selection_behavior: :drop`
60
+ # TODO: support for the option `null_selection_behavior: :drop`
55
61
  def filter(*booleans, &block)
56
62
  if block
57
63
  unless booleans.empty?
@@ -87,9 +93,12 @@ module RedAmber
87
93
 
88
94
  # Select elements in the self by indices or booleans.
89
95
  #
90
- # @param args [Array<Numeric, true, false, nil>, Vector] specifier.
91
- # @yield [Array<Numeric, true, false, nil>, Vector] specifier.
92
- # @return [scalar, Array] returns scalar or array.
96
+ # @param args [Array<Numeric, true, false, nil>, Vector]
97
+ # specifier. Indices or booleans.
98
+ # @yieldparam [Array<Numeric, true, false, nil>, Vector]
99
+ # specifier. Indices or booleans.
100
+ # @return [scalar, Array]
101
+ # returns scalar or array.
93
102
  #
94
103
  def [](*args)
95
104
  array =
@@ -119,31 +128,264 @@ module RedAmber
119
128
  raise VectorArgumentError, "Invalid argument: #{args}"
120
129
  end
121
130
 
122
- # @param values [Array, Arrow::Array, Vector]
131
+ # Check if elements of self are in the other values.
132
+ #
133
+ # @param values [Vector, Array, Arrow::Array, Arrow::ChunkedArray]
134
+ # values to test existence.
135
+ # @return [Vector]
136
+ # boolean Vector.
137
+ #
123
138
  def is_in(*values)
124
- self_data = chunked? ? data.pack : data
125
-
126
- array =
139
+ enum =
127
140
  case values
128
- in [Vector] | [Arrow::Array] | [Arrow::ChunkedArray]
129
- values[0].to_a
141
+ in [] | [[]] | [nil] |[[nil]]
142
+ return Vector.new([false] * size)
143
+ in [Vector | Arrow::Array | Arrow::ChunkedArray]
144
+ values[0].each
130
145
  else
131
- Array(values).flatten
146
+ parse_args(values, size, symbolize: false)
132
147
  end
133
-
134
- Vector.create(self_data.is_in(array))
148
+ enum.filter_map { self == _1 unless _1.nil? }.reduce(&:|)
135
149
  end
136
150
 
137
- # Arrow's support required
151
+ # Returns index of first matched position of element in self.
152
+ #
153
+ # @param element
154
+ # an element of self.
155
+ # @return [integer, nil]
156
+ # founded position of element. If it is not found, returns nil.
157
+ #
138
158
  def index(element)
139
- to_a.index(element)
159
+ (0...size).find { |i| self[i] == element }
140
160
  end
141
161
 
162
+ # Returns first element of self.
163
+ #
164
+ # @return
165
+ # the first element.
166
+ # @since 0.4.1
167
+ #
168
+ def first
169
+ data[0]
170
+ end
171
+
172
+ # Returns last element of self.
173
+ #
174
+ # @return
175
+ # the last element.
176
+ # @since 0.4.1
177
+ #
178
+ def last
179
+ data[-1]
180
+ end
181
+
182
+ # Drop nil in self and returns a new Vector as a result.
183
+ #
184
+ # @return [Vector]
185
+ # a Vector without nils.
186
+ #
142
187
  def drop_nil
143
188
  datum = find(:drop_null).execute([data])
144
189
  Vector.create(datum.value)
145
190
  end
146
191
 
192
+ # Arrange values in Vector.
193
+ #
194
+ # @param order [Symbol]
195
+ # sort order.
196
+ # - `:+`, `:ascending` or without argument will sort in increasing order.
197
+ # - `:-` or `:descending` will sort in decreasing order.
198
+ # @return [Vector]
199
+ # sorted Vector.
200
+ # @example Sort in increasing order (default)
201
+ # Vector.new(%w[B D A E C]).sort
202
+ # # same as #sort(:+)
203
+ # # same as #sort(:ascending)
204
+ #
205
+ # # =>
206
+ # #<RedAmber::Vector(:string, size=5):0x000000000000c134>
207
+ # ["A", "B", "C", "D", "E"]
208
+ #
209
+ # @example Sort in decreasing order
210
+ # Vector.new(%w[B D A E C]).sort(:-)
211
+ # # same as #sort(:descending)
212
+ #
213
+ # # =>
214
+ # #<RedAmber::Vector(:string, size=5):0x000000000000c148>
215
+ # ["E", "D", "C", "B", "A"]
216
+ #
217
+ # @since 0.4.0
218
+ #
219
+ def sort(order = :ascending)
220
+ order =
221
+ case order.to_sym
222
+ when :+, :ascending, :increasing
223
+ :ascending
224
+ when :-, :descending, :decreasing
225
+ :descending
226
+ else
227
+ raise VectorArgumentError, "illegal order option: #{order}"
228
+ end
229
+ take(sort_indices(order: order))
230
+ end
231
+
232
+ # Returns numerical rank of self.
233
+ # - Nil values are considered greater than any value.
234
+ # - NaN values are considered greater than any value but smaller than nil values.
235
+ # - Tiebreakers are ranked in order of appearance.
236
+ # - `RankOptions` in C++ function is not implemented in C GLib yet.
237
+ # This method is currently fixed to the default behavior.
238
+ #
239
+ # @return [Vector]
240
+ # 0-based rank of self (0...size in range).
241
+ # @example Rank of float Vector
242
+ # fv = Vector.new(0.1, nil, Float::NAN, 0.2, 0.1); fv
243
+ #
244
+ # # =>
245
+ # #<RedAmber::Vector(:double, size=5):0x000000000000c65c>
246
+ # [0.1, nil, NaN, 0.2, 0.1]
247
+ #
248
+ # fv.rank
249
+ #
250
+ # # =>
251
+ # #<RedAmber::Vector(:uint64, size=5):0x0000000000003868>
252
+ # [0, 4, 3, 2, 1]
253
+ #
254
+ # @example Rank of string Vector
255
+ # sv = Vector.new("A", "B", nil, "A", "C"); sv
256
+ #
257
+ # # =>
258
+ # #<RedAmber::Vector(:string, size=5):0x0000000000003854>
259
+ # ["A", "B", nil, "A", "C"]
260
+ #
261
+ # sv.rank
262
+ #
263
+ # # =>
264
+ # #<RedAmber::Vector(:uint64, size=5):0x0000000000003868>
265
+ # [0, 2, 4, 1, 3]
266
+ #
267
+ # @since 0.4.0
268
+ #
269
+ def rank
270
+ datum =
271
+ case data
272
+ when Arrow::ChunkedArray
273
+ Arrow::Function.find(:rank).execute([data.pack])
274
+ else
275
+ Arrow::Function.find(:rank).execute([data])
276
+ end
277
+ Vector.create(datum.value) - 1
278
+ end
279
+
280
+ # Pick up elements at random.
281
+ #
282
+ # @overload sample()
283
+ # Return a randomly selected element.
284
+ # This is one of an aggregation function.
285
+ #
286
+ # @return [scalar]
287
+ # one of an element in self.
288
+ # @example Sample a element
289
+ # v = Vector.new('A'..'H'); v
290
+ #
291
+ # # =>
292
+ # #<RedAmber::Vector(:string, size=8):0x0000000000011b20>
293
+ # ["A", "B", "C", "D", "E", "F", "G", "H"]
294
+ #
295
+ # v.sample
296
+ #
297
+ # # =>
298
+ # "C"
299
+ #
300
+ # @overload sample(n)
301
+ # Pick up n elements at random.
302
+ #
303
+ # @param n [Integer]
304
+ # positive number of elements to pick.
305
+ # If n is smaller or equal to size, elements are picked by non-repeating.
306
+ # If n is greater than `size`, elements are picked repeatedly.
307
+ # @return [Vector]
308
+ # sampled elements.
309
+ # If n == 1 (in case of `sample(1)`), it returns a Vector of size == 1
310
+ # not a scalar.
311
+ # @example Sample Vector in size 1
312
+ # v.sample(1)
313
+ #
314
+ # # =>
315
+ # #<RedAmber::Vector(:string, size=1):0x000000000001a3b0>
316
+ # ["H"]
317
+ #
318
+ # @example Sample same size of self: every element is picked in random order
319
+ # v.sample(8)
320
+ #
321
+ # # =>
322
+ # #<RedAmber::Vector(:string, size=8):0x000000000001bda0>
323
+ # ["H", "D", "B", "F", "E", "A", "G", "C"]
324
+ #
325
+ # @example Over sampling: "E" and "A" are sampled repeatedly
326
+ # v.sample(9)
327
+ #
328
+ # # =>
329
+ # #<RedAmber::Vector(:string, size=9):0x000000000001d790>
330
+ # ["E", "E", "A", "D", "H", "C", "A", "F", "H"]
331
+ #
332
+ # @overload sample(prop)
333
+ # Pick up elements by proportion `prop` at random.
334
+ #
335
+ # @param prop [Float]
336
+ # positive proportion of elements to pick.
337
+ # Absolute number of elements to pick:`prop*size` is rounded (by `half: :up``).
338
+ # If prop is smaller or equal to 1.0, elements are picked by non-repeating.
339
+ # If prop is greater than 1.0, some elements are picked repeatedly.
340
+ # @return [Vector]
341
+ # sampled elements.
342
+ # If picked element is only one, it returns a Vector of size == 1
343
+ # not a scalar.
344
+ # @example Sample same size of self: every element is picked in random order
345
+ # v.sample(1.0)
346
+ #
347
+ # # =>
348
+ # #<RedAmber::Vector(:string, size=8):0x000000000001bda0>
349
+ # ["D", "H", "F", "C", "A", "B", "E", "G"]
350
+ #
351
+ # @example 2 times over sampling
352
+ # v.sample(2.0)
353
+ #
354
+ # # =>
355
+ # #<RedAmber::Vector(:string, size=16):0x00000000000233e8>
356
+ # ["H", "B", "C", "B", "C", "A", "F", "A", "E", "C", "H", "F", "F", "A", ... ]
357
+ #
358
+ # @since 0.4.0
359
+ #
360
+ def sample(n_or_prop = nil)
361
+ require 'arrow-numo-narray'
362
+
363
+ return nil if size == 0
364
+
365
+ n_sample =
366
+ case n_or_prop
367
+ in Integer
368
+ n_or_prop
369
+ in Float
370
+ (n_or_prop * size).round
371
+ in nil
372
+ return to_a.sample
373
+ else
374
+ raise VectorArgumentError, "must specify Integer or Float but #{n_or_prop}"
375
+ end
376
+ if n_or_prop < 0
377
+ raise VectorArgumentError, '#sample does not accept negative number.'
378
+ end
379
+ return Vector.new([]) if n_sample == 0
380
+
381
+ over_sample = [8 * size, n_sample].max
382
+ over_size = n_sample > size ? n_sample / size * size * 2 : size
383
+ over_vector =
384
+ Vector.create(Numo::UInt32.new(over_size).rand(over_sample).to_arrow_array)
385
+ indices = over_vector.rank.take(*0...n_sample)
386
+ take(indices - ((indices / size) * size))
387
+ end
388
+
147
389
  private
148
390
 
149
391
  # Accepts indices by numeric Vector