red_amber 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -153,10 +153,23 @@ module RedAmber
153
153
  # @param element
154
154
  # an element of self.
155
155
  # @return [integer, nil]
156
- # founded position of element. If it is not found, returns nil.
156
+ # position of element. If it is not found, returns nil.
157
157
  #
158
158
  def index(element)
159
- (0...size).find { |i| self[i] == element }
159
+ if element.nil?
160
+ datum = find(:is_null).execute([data])
161
+ value = Arrow::Scalar.resolve(true, :boolean)
162
+ else
163
+ datum = data
164
+ value = Arrow::Scalar.resolve(element, type)
165
+ end
166
+ datum = find(:index).execute([datum], value: value)
167
+ index = get_scalar(datum)
168
+ if index.negative?
169
+ nil
170
+ else
171
+ index
172
+ end
160
173
  end
161
174
 
162
175
  # Returns first element of self.
@@ -229,15 +242,23 @@ module RedAmber
229
242
  take(sort_indices(order: order))
230
243
  end
231
244
 
232
- # Returns numerical rank of self.
245
+ # Returns 0-based numerical rank of self.
233
246
  # - Nil values are considered greater than any value.
234
247
  # - NaN values are considered greater than any value but smaller than nil values.
235
- # - Tiebreakers are ranked in order of appearance.
248
+ # - Tiebreakers are ranked in order of appearance by default or
249
+ # with `tie: :first` option.
236
250
  # - `RankOptions` in C++ function is not implemented in C GLib yet.
237
251
  # This method is currently fixed to the default behavior.
238
252
  #
253
+ # @param tie [:first, :min, :max, :dense]
254
+ # configure how ties between equal values are handled.
255
+ # - first: Ranks are assigned in order of when ties appear in the input.
256
+ # - min: Ties get the smallest possible rank in the sorted order.
257
+ # - max: Ties get the largest possible rank in the sorted order.
258
+ # - dense: The ranks span a dense [1, M] interval where M is the number
259
+ # of distinct values in the input.
239
260
  # @return [Vector]
240
- # 0-based rank of self (0...size in range).
261
+ # 0-based rank in uint64 of self (0...size in range).
241
262
  # @example Rank of float Vector
242
263
  # fv = Vector.new(0.1, nil, Float::NAN, 0.2, 0.1); fv
243
264
  #
@@ -245,7 +266,7 @@ module RedAmber
245
266
  # #<RedAmber::Vector(:double, size=5):0x000000000000c65c>
246
267
  # [0.1, nil, NaN, 0.2, 0.1]
247
268
  #
248
- # fv.rank
269
+ # fv.rank # or fv.rank(tie: :first)
249
270
  #
250
271
  # # =>
251
272
  # #<RedAmber::Vector(:uint64, size=5):0x0000000000003868>
@@ -264,20 +285,42 @@ module RedAmber
264
285
  # #<RedAmber::Vector(:uint64, size=5):0x0000000000003868>
265
286
  # [0, 2, 4, 1, 3]
266
287
  #
288
+ # @example Rank of Float Vector with tie: :min
289
+ # fv.rank(tie: :min)
290
+ #
291
+ # # =>
292
+ # #<RedAmber::Vector(:uint64, size=5):0x00000000001593ac>
293
+ # [0, 3, 3, 2, 0]
294
+ #
295
+ # @example Rank of Float Vector with tie: :max
296
+ # fv.rank(tie: :max)
297
+ #
298
+ # # =>
299
+ # #<RedAmber::Vector(:uint64, size=5):0x0000000000160d50>
300
+ # [1, 4, 4, 2, 1]
301
+ #
302
+ # @example Rank of Float Vector with tie: :dense
303
+ # fv.rank(tie: :dense)
304
+ #
305
+ # # =>
306
+ # #<RedAmber::Vector(:uint64, size=5):0x000000000016993c>
307
+ # [0, 2, 2, 1, 0]
308
+ #
267
309
  # @since 0.4.0
268
310
  #
269
- def rank
311
+ def rank(tie: :first)
270
312
  datum =
271
313
  case data
272
314
  when Arrow::ChunkedArray
273
- Arrow::Function.find(:rank).execute([data.pack])
315
+ find(:rank).execute([data.pack], tiebreaker: tie)
274
316
  else
275
- Arrow::Function.find(:rank).execute([data])
317
+ find(:rank).execute([data], tiebreaker: tie)
276
318
  end
277
- Vector.create(datum.value) - 1
319
+ Vector.create(find(:subtract).execute([datum, 1]).value)
278
320
  end
279
321
 
280
322
  # Pick up elements at random.
323
+ # @note This method requires 'arrow-numo-narray' gem.
281
324
  #
282
325
  # @overload sample()
283
326
  # Return a randomly selected element.
@@ -298,12 +341,12 @@ module RedAmber
298
341
  # "C"
299
342
  #
300
343
  # @overload sample(n)
301
- # Pick up n elements at random.
344
+ # Select n elements at random.
302
345
  #
303
346
  # @param n [Integer]
304
- # positive number of elements to pick.
305
- # If n is smaller or equal to size, elements are picked by non-repeating.
306
- # If n is greater than `size`, elements are picked repeatedly.
347
+ # positive number of elements to select.
348
+ # If n is smaller or equal to size, elements are selected by non-repeating.
349
+ # If n is greater than `size`, elements are selected repeatedly.
307
350
  # @return [Vector]
308
351
  # sampled elements.
309
352
  # If n == 1 (in case of `sample(1)`), it returns a Vector of size == 1
@@ -315,7 +358,7 @@ module RedAmber
315
358
  # #<RedAmber::Vector(:string, size=1):0x000000000001a3b0>
316
359
  # ["H"]
317
360
  #
318
- # @example Sample same size of self: every element is picked in random order
361
+ # @example Sample same size of self: every element is selected in random order
319
362
  # v.sample(8)
320
363
  #
321
364
  # # =>
@@ -330,18 +373,18 @@ module RedAmber
330
373
  # ["E", "E", "A", "D", "H", "C", "A", "F", "H"]
331
374
  #
332
375
  # @overload sample(prop)
333
- # Pick up elements by proportion `prop` at random.
376
+ # Select elements by proportion `prop` at random.
334
377
  #
335
378
  # @param prop [Float]
336
- # positive proportion of elements to pick.
337
- # Absolute number of elements to pick:`prop*size` is rounded (by `half: :up``).
338
- # If prop is smaller or equal to 1.0, elements are picked by non-repeating.
339
- # If prop is greater than 1.0, some elements are picked repeatedly.
379
+ # positive proportion of elements to select.
380
+ # Absolute number of elements to select:`prop*size` is rounded (by `half: :up`).
381
+ # If prop is smaller or equal to 1.0, elements are selected by non-repeating.
382
+ # If prop is greater than 1.0, some elements are selected repeatedly.
340
383
  # @return [Vector]
341
384
  # sampled elements.
342
- # If picked element is only one, it returns a Vector of size == 1
385
+ # If selected element is only one, it returns a Vector of size == 1
343
386
  # not a scalar.
344
- # @example Sample same size of self: every element is picked in random order
387
+ # @example Sample same size of self: every element is selected in random order
345
388
  # v.sample(1.0)
346
389
  #
347
390
  # # =>
@@ -355,6 +398,14 @@ module RedAmber
355
398
  # #<RedAmber::Vector(:string, size=16):0x00000000000233e8>
356
399
  # ["H", "B", "C", "B", "C", "A", "F", "A", "E", "C", "H", "F", "F", "A", ... ]
357
400
  #
401
+ # @example prop less than 1.0
402
+ # v.sample(0.7)
403
+ #
404
+ # # =>
405
+ # # Take (8 * 0.7).truncate => 5 samples
406
+ # #<RedAmber::Vector(:string, size=5):0x000000000001afe0>
407
+ # ["C", "A", "E", "H", "D"]
408
+ #
358
409
  # @since 0.4.0
359
410
  #
360
411
  def sample(n_or_prop = nil)
@@ -367,7 +418,7 @@ module RedAmber
367
418
  in Integer
368
419
  n_or_prop
369
420
  in Float
370
- (n_or_prop * size).round
421
+ (n_or_prop * size).truncate
371
422
  in nil
372
423
  return to_a.sample
373
424
  else
@@ -0,0 +1,211 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedAmber
4
+ # Mix-in for class Vector
5
+ # Methods for string-like related function
6
+ module VectorStringFunction
7
+ using RefineArray
8
+ using RefineArrayLike
9
+
10
+ # For each string in self, emit true if it contains a given pattern.
11
+ #
12
+ # @overload match_substring?(string, ignore_case: nil)
13
+ # Emit true if it contains `string`.
14
+ #
15
+ # @param string [String]
16
+ # string pattern to match.
17
+ # @param ignore_case [boolean]
18
+ # switch whether to ignore case. Ignore case if true.
19
+ # @return [Vector]
20
+ # boolean Vector to show if elements contain a given pattern.
21
+ # nil inputs emit nil.
22
+ # @example Match with string.
23
+ # vector = Vector.new('array', 'Arrow', 'carrot', nil, 'window')
24
+ # vector.match_substring?('arr')
25
+ # # =>
26
+ # #<RedAmber::Vector(:boolean, size=5):0x000000000005a208>
27
+ # [true, false, true, nil, false]
28
+ #
29
+ # @overload match_substring?(regexp, ignore_case: nil)
30
+ # Emit true if it contains substring matching with `regexp`.
31
+ # It calls `match_substring_regex` in Arrow compute function and
32
+ # uses re2 library.
33
+ #
34
+ # @param regexp [Regexp]
35
+ # regular expression pattern to match. Ruby's Regexp is given and
36
+ # it will passed to Arrow's kernel by its source.
37
+ # @param ignore_case [boolean]
38
+ # switch whether to ignore case. Ignore case if true.
39
+ # When `ignore_case` is false, casefolding option in regexp is priortized.
40
+ # @return [Vector]
41
+ # boolean Vector to show if elements contain a given pattern.
42
+ # nil inputs emit nil.
43
+ # @example Match with regexp.
44
+ # vector.match_substring?(/arr/)
45
+ # # =>
46
+ # #<RedAmber::Vector(:boolean, size=5):0x0000000000014b68>
47
+ # [true, false, true, nil, false]
48
+ #
49
+ # @since 0.5.0
50
+ #
51
+ def match_substring?(pattern, ignore_case: nil)
52
+ options = Arrow::MatchSubstringOptions.new
53
+ datum =
54
+ case pattern
55
+ when String
56
+ options.ignore_case = (ignore_case || false)
57
+ options.pattern = pattern
58
+ find(:match_substring).execute([data], options)
59
+ when Regexp
60
+ options.ignore_case = (pattern.casefold? || ignore_case || false)
61
+ options.pattern = pattern.source
62
+ find(:match_substring_regex).execute([data], options)
63
+ else
64
+ message =
65
+ "pattern must be either String or Regexp: #{pattern.inspect}"
66
+ raise VectorArgumentError, message
67
+ end
68
+ Vector.create(datum.value)
69
+ end
70
+
71
+ # Check if elements in self end with a literal pattern.
72
+ #
73
+ # @param string [String]
74
+ # string pattern to match.
75
+ # @param ignore_case [boolean]
76
+ # switch whether to ignore case. Ignore case if true.
77
+ # @return [Vector]
78
+ # boolean Vector to show if elements end with a given pattern.
79
+ # nil inputs emit nil.
80
+ # @example Check if end with?.
81
+ # vector = Vector.new('array', 'Arrow', 'carrot', nil, 'window')
82
+ # vector.end_with?('ow')
83
+ # # =>
84
+ # #<RedAmber::Vector(:boolean, size=5):0x00000000000108ec>
85
+ # [false, true, false, nil, true]
86
+ # @since 0.5.0
87
+ #
88
+ def end_with?(string, ignore_case: nil)
89
+ options = Arrow::MatchSubstringOptions.new
90
+ options.ignore_case = (ignore_case || false)
91
+ options.pattern = string
92
+ datum = find(:ends_with).execute([data], options)
93
+ Vector.create(datum.value)
94
+ end
95
+
96
+ # Check if elements in self start with a literal pattern.
97
+ #
98
+ # @param string [String]
99
+ # string pattern to match.
100
+ # @param ignore_case [boolean]
101
+ # switch whether to ignore case. Ignore case if true.
102
+ # @return [Vector]
103
+ # boolean Vector to show if elements start with a given pattern.
104
+ # nil inputs emit nil.
105
+ # @example Check if start with?.
106
+ # vector = Vector.new('array', 'Arrow', 'carrot', nil, 'window')
107
+ # vector.start_with?('ow')
108
+ # # =>
109
+ # #<RedAmber::Vector(:boolean, size=5):0x00000000000193fc>
110
+ # [false, false, true, nil, false]
111
+ # @since 0.5.0
112
+ #
113
+ def start_with?(string, ignore_case: nil)
114
+ options = Arrow::MatchSubstringOptions.new
115
+ options.ignore_case = (ignore_case || false)
116
+ options.pattern = string
117
+ datum = find(:starts_with).execute([data], options)
118
+ Vector.create(datum.value)
119
+ end
120
+
121
+ # Match elements of self against SQL-style LIKE pattern.
122
+ # the pattern matches a given pattern at any position.
123
+ # '%' will match any number of characters,
124
+ # '_' will match exactly one character,
125
+ # and any other character matches itself.
126
+ # To match a literal '%', '_', or '\', precede the character with a backslash.
127
+ #
128
+ # @param string [String]
129
+ # string pattern to match.
130
+ # @param ignore_case [boolean]
131
+ # switch whether to ignore case. Ignore case if true.
132
+ # @return [Vector]
133
+ # boolean Vector to show if elements start with a given pattern.
134
+ # nil inputs emit nil.
135
+ # @example Check with match_like?.
136
+ # vector = Vector.new('array', 'Arrow', 'carrot', nil, 'window')
137
+ # vector.match_like?('_rr%')
138
+ # # =>
139
+ # @since 0.5.0
140
+ #
141
+ def match_like?(string, ignore_case: nil)
142
+ options = Arrow::MatchSubstringOptions.new
143
+ options.ignore_case = (ignore_case || false)
144
+ options.pattern = string
145
+ datum = find(:match_like).execute([data], options)
146
+ Vector.create(datum.value)
147
+ end
148
+
149
+ # For each string in self, count occuerences of substring in given pattern.
150
+ #
151
+ # @overload count_substring(string, ignore_case: nil)
152
+ # Count if it contains `string`.
153
+ #
154
+ # @param string [String]
155
+ # string pattern to count.
156
+ # @param ignore_case [boolean]
157
+ # switch whether to ignore case. Ignore case if true.
158
+ # @return [Vector]
159
+ # int32 or int64 Vector to show if elements contain a given pattern.
160
+ # nil inputs emit nil.
161
+ # @example Count with string.
162
+ # vector2 = Vector.new('amber', 'Amazon', 'banana', nil)
163
+ # vector2.count_substring('an')
164
+ # # =>
165
+ # #<RedAmber::Vector(:int32, size=4):0x000000000003db30>
166
+ # [0, 0, 2, nil]
167
+ #
168
+ # @overload count_substring(regexp, ignore_case: nil)
169
+ # Count if it contains substring matching with `regexp`.
170
+ # It calls `count_substring_regex` in Arrow compute function and
171
+ # uses re2 library.
172
+ #
173
+ # @param regexp [Regexp]
174
+ # regular expression pattern to count. Ruby's Regexp is given and
175
+ # it will passed to Arrow's kernel by its source.
176
+ # @param ignore_case [boolean]
177
+ # switch whether to ignore case. Ignore case if true.
178
+ # When `ignore_case` is false, casefolding option in regexp is priortized.
179
+ # @return [Vector]
180
+ # int32 or int64 Vector to show the counts in given pattern.
181
+ # nil inputs emit nil.
182
+ # @example Count with regexp with case ignored.
183
+ # vector2.count_substring(/a[mn]/i)
184
+ # # =>
185
+ # #<RedAmber::Vector(:int32, size=4):0x0000000000051298>
186
+ # [1, 1, 2, nil]
187
+ # # it is same result as `vector2.count_substring(/a[mn]/, ignore_case: true)`
188
+ #
189
+ # @since 0.5.0
190
+ #
191
+ def count_substring(pattern, ignore_case: nil)
192
+ options = Arrow::MatchSubstringOptions.new
193
+ datum =
194
+ case pattern
195
+ when String
196
+ options.ignore_case = (ignore_case || false)
197
+ options.pattern = pattern
198
+ find(:count_substring).execute([data], options)
199
+ when Regexp
200
+ options.ignore_case = (pattern.casefold? || ignore_case || false)
201
+ options.pattern = pattern.source
202
+ find(:count_substring_regex).execute([data], options)
203
+ else
204
+ message =
205
+ "pattern must be either String or Regexp: #{pattern.inspect}"
206
+ raise VectorArgumentError, message
207
+ end
208
+ Vector.create(datum.value)
209
+ end
210
+ end
211
+ end
@@ -184,6 +184,8 @@ module RedAmber
184
184
  #
185
185
  # Propagate next valid value backward to previous nil values.
186
186
  # Or nothing if all next values are nil.
187
+ # @note Use `fill_nil(value)` to replace nil by a value.
188
+ # @see #fill_nil
187
189
  # @return [Vector]
188
190
  # a Vector which filled nil backward.
189
191
  # @example
@@ -201,6 +203,8 @@ module RedAmber
201
203
  #
202
204
  # Propagate last valid value backward to next nil values.
203
205
  # Or nothing if all previous values are nil.
206
+ # @note Use `fill_nil(value)` to replace nil by a value.
207
+ # @see #fill_nil
204
208
  # @return [Vector]
205
209
  # a Vector which filled nil forward.
206
210
  # @example
@@ -149,6 +149,22 @@ module RedAmber
149
149
  replace_with(booleans.data, replacer_array)
150
150
  end
151
151
 
152
+ # Replace nil to value.
153
+ #
154
+ # @note Use `fill_nil_backawrd` or `fill_nil_forward` to replace nil
155
+ # by adjacent values.
156
+ # @see #fill_nil_backward
157
+ # @see #fill_nil_forward
158
+ # @param value [scalar]
159
+ # the value to replace with.
160
+ # @return [Vector]
161
+ # replaced Vector
162
+ # @since 0.5.0
163
+ #
164
+ def fill_nil(value)
165
+ is_nil.if_else(value, self)
166
+ end
167
+
152
168
  # Choose values based on self.
153
169
  #
154
170
  # [Ternary element-wise function] Returns a Vector.
@@ -449,6 +465,18 @@ module RedAmber
449
465
  end
450
466
  alias_method :concat, :concatenate
451
467
 
468
+ # Cast self to `type`.
469
+ #
470
+ # @param type [symbol]
471
+ # type to cast.
472
+ # @return [Vector]
473
+ # casted Vector.
474
+ # @since 0.5.0
475
+ #
476
+ def cast(type)
477
+ Vector.create(data.cast(type))
478
+ end
479
+
452
480
  private
453
481
 
454
482
  # Replace elements selected with a boolean mask
@@ -2,5 +2,5 @@
2
2
 
3
3
  module RedAmber
4
4
  # Library version
5
- VERSION = '0.4.2'
5
+ VERSION = '0.5.0'
6
6
  end
data/lib/red_amber.rb CHANGED
@@ -17,9 +17,10 @@ require_relative 'red_amber/group'
17
17
  require_relative 'red_amber/subframes'
18
18
  require_relative 'red_amber/vector_aggregation'
19
19
  require_relative 'red_amber/vector_binary_element_wise'
20
+ require_relative 'red_amber/vector_selectable'
21
+ require_relative 'red_amber/vector_string_function'
20
22
  require_relative 'red_amber/vector_unary_element_wise'
21
23
  require_relative 'red_amber/vector_updatable'
22
- require_relative 'red_amber/vector_selectable'
23
24
  require_relative 'red_amber/vector'
24
25
  require_relative 'red_amber/version'
25
26
 
data/red_amber.gemspec CHANGED
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
32
32
  spec.require_paths = ['lib']
33
33
 
34
- spec.add_dependency 'red-arrow', '~> 11.0.0'
34
+ spec.add_dependency 'red-arrow', '~> 12.0.0'
35
35
 
36
36
  # Development dependency has gone to the Gemfile (rubygems/bundler#7237)
37
37
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red_amber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hirokazu SUZUKI (heronshoes)
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-04-02 00:00:00.000000000 Z
11
+ date: 2023-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 11.0.0
19
+ version: 12.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 11.0.0
26
+ version: 12.0.0
27
27
  description: RedAmber is a simple dataframe library inspired by Rover-df and powered
28
28
  by Red Arrow.
29
29
  email:
@@ -38,6 +38,7 @@ files:
38
38
  - CHANGELOG.md
39
39
  - Gemfile
40
40
  - LICENSE
41
+ - README.ja.md
41
42
  - README.md
42
43
  - Rakefile
43
44
  - benchmark/basic.yml
@@ -114,6 +115,7 @@ files:
114
115
  - lib/red_amber/vector_aggregation.rb
115
116
  - lib/red_amber/vector_binary_element_wise.rb
116
117
  - lib/red_amber/vector_selectable.rb
118
+ - lib/red_amber/vector_string_function.rb
117
119
  - lib/red_amber/vector_unary_element_wise.rb
118
120
  - lib/red_amber/vector_updatable.rb
119
121
  - lib/red_amber/version.rb
@@ -142,7 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
144
  - !ruby/object:Gem::Version
143
145
  version: '0'
144
146
  requirements: []
145
- rubygems_version: 3.4.10
147
+ rubygems_version: 3.4.12
146
148
  signing_key:
147
149
  specification_version: 4
148
150
  summary: Simple dataframe library for Ruby