red_amber 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,6 +7,9 @@ module RedAmber
7
7
  # mix-in for class Vector
8
8
  # Functions to make up some data (especially missing) for new data.
9
9
  module VectorUpdatable
10
+ # Add properties to Arrow::Array and Arrow::ChunkedArray
11
+ using RefineArrayLike
12
+
10
13
  # Replace data
11
14
  # @param specifier [Array, Vector, Arrow::Array] index or booleans.
12
15
  # @param replacer [Scalar, Array, Vector, Arrow::Array] new data to replace for.
@@ -14,7 +17,7 @@ module RedAmber
14
17
  # If specifier has no true, return self.
15
18
  #
16
19
  def replace(specifier, replacer)
17
- vector = parse_to_vector(Array(specifier))
20
+ vector = Vector.new(parse_args(Array(specifier), size))
18
21
  return self if vector.empty? || empty?
19
22
 
20
23
  booleans =
@@ -42,7 +45,9 @@ module RedAmber
42
45
  else # Broadcast scalar to Array
43
46
  Arrow::Array.new(Array(replacer) * booleans.to_a.count(true))
44
47
  end
45
- raise VectorArgumentError, 'Replacements size unmatch' if booleans.sum != replacer_array.length
48
+ if booleans.sum != replacer_array.length
49
+ raise VectorArgumentError, 'Replacements size unmatch'
50
+ end
46
51
 
47
52
  replace_with(booleans.data, replacer_array)
48
53
  end
@@ -57,7 +62,7 @@ module RedAmber
57
62
  raise VectorTypeError, 'Reciever must be a boolean' unless boolean?
58
63
 
59
64
  datum = find(:if_else).execute([data, true_choice, false_choice])
60
- Vector.new(datum.value)
65
+ Vector.create(datum.value)
61
66
  end
62
67
 
63
68
  # same behavior as Ruby's invert
@@ -80,11 +85,134 @@ module RedAmber
80
85
  end
81
86
  end
82
87
 
88
+ # Split string Vector and returns Array of columns.
89
+ #
90
+ # @param sep [nil, String, Regexp] separater.
91
+ # If separator is nil (or no argeument given), the column will be splitted by
92
+ # Arrow's split function using any ASCII whitespace.
93
+ # Otherwise sep will passed to String#split.
94
+ # @param limit [Integer] maximum number to limit separation. Passed to String#split.
95
+ # @return [Array<Vector>] an Array of Vectors.
96
+ # @note nil will separated as nil's at same row. ex) `nil => [nil, nil]`
97
+ #
98
+ def split_to_columns(sep = nil, limit = 0)
99
+ l = split(sep, limit)
100
+ l.list_separate
101
+ end
102
+
103
+ # Split string Vector and flatten into rows.
104
+ #
105
+ # @param sep [nil, String, Regexp] separater.
106
+ # If separator is nil (or no argeument given), the column will be splitted by
107
+ # Arrow's split function using any ASCII whitespace.
108
+ # Otherwise sep will passed to String#split.
109
+ # @param limit [Integer] maximum number to limit separation. Passed to String#split.
110
+ # @return [Vector] a flatten Vector.
111
+ # @note nil will separated as nil's at same row. ex) `nil => [nil, nil]`
112
+ #
113
+ def split_to_rows(sep = nil, limit = 0)
114
+ l = split(sep, limit)
115
+ l.list_flatten
116
+ end
117
+
118
+ # return element size Array for list Vector.
119
+ #
120
+ # @api private
121
+ #
122
+ def list_sizes
123
+ Vector.create find(:list_value_length).execute([data]).value
124
+ end
125
+
126
+ # Separate list Vector by columns.
127
+ #
128
+ # @api private
129
+ #
130
+ def list_separate
131
+ len = list_sizes.data
132
+ min, max = Arrow::Function.find(:min_max).execute([len]).value.value.map(&:value)
133
+
134
+ result = []
135
+ (0...min).each do |i|
136
+ result << Vector.create(find(:list_element).execute([data, i]).value)
137
+ end
138
+ return result if min == max
139
+
140
+ (min...max).each do |i|
141
+ result << Vector.new(data.map { |e| e&.[](i) })
142
+ end
143
+ result
144
+ end
145
+
146
+ # Flatten list Vector for rows.
147
+ #
148
+ # @api private
149
+ #
150
+ def list_flatten
151
+ Vector.create find(:list_flatten).execute([data]).value
152
+ end
153
+
154
+ # Split string Vector by each element with separator and returns list Array.
155
+ #
156
+ # @note if sep is not specified, use Arrow's ascii_split_whitespace.
157
+ # It will separate string by ascii whitespaces.
158
+ # @note if sep specified, sep and limit will passed to String#split.
159
+ #
160
+ def split(sep = nil, limit = 0)
161
+ if empty? || !string?
162
+ raise VectorTypeError, "self is not a valid string Vector: #{self}"
163
+ end
164
+ if self[0].nil? && uniq.to_a == [nil] # Avoid heavy check to be activated always.
165
+ raise VectorTypeError, 'self contains only nil'
166
+ end
167
+
168
+ list =
169
+ if sep
170
+ Arrow::Array.new(to_a.map { |e| e&.split(sep, limit) })
171
+ else
172
+ find(:ascii_split_whitespace).execute([data]).value
173
+ end
174
+ Vector.create(list)
175
+ end
176
+
177
+ # Merge String or other string Vector to self.
178
+ # Self must be a string Vector.
179
+ #
180
+ # @param other [String, Vector]
181
+ # merger from right. It will be broadcasted if it is a scalar String.
182
+ # @param sep [String] separator.
183
+ # @return [Vector] merged Vector
184
+ #
185
+ def merge(other, sep: ' ')
186
+ if empty? || !string?
187
+ raise VectorTypeError,
188
+ "self is not a string Vector: #{self}"
189
+ end
190
+ unless sep.is_a?(String)
191
+ raise VectorArgumentError, "separator is not a String: #{sep}"
192
+ end
193
+
194
+ other_array =
195
+ case other
196
+ in String => s
197
+ [s] * size
198
+ in (Vector | Arrow::Array | Arrow::ChunkedArray) => x if x.string?
199
+ x.to_a
200
+ else
201
+ raise VectorArgumentError,
202
+ "other is not a String or a string Vector: #{self}"
203
+ end
204
+
205
+ list = Arrow::Array.new(to_a.zip(other_array))
206
+ datum = find(:binary_join).execute([list, sep])
207
+ Vector.create(datum.value)
208
+ end
209
+
83
210
  private
84
211
 
85
212
  # Replace elements selected with a boolean mask
86
213
  #
87
- # @param boolean_mask [Arrow::BooleanArray] Boolean mask which indicates the position to be replaced.
214
+ # @param boolean_mask [Arrow::BooleanArray]
215
+ # Boolean mask which indicates the position to be replaced.
88
216
  # - Position with true will be replaced.
89
217
  # - Position with nil will be nil.
90
218
  #
@@ -104,12 +232,13 @@ module RedAmber
104
232
  values = replacer.class.new(data) # Upcast
105
233
 
106
234
  datum = find(:replace_with_mask).execute([values, boolean_mask, replacer])
107
- Vector.new(datum.value)
235
+ Vector.create(datum.value)
108
236
  end
109
237
 
110
238
  # Replace elements selected with a boolean mask by nil
111
239
  #
112
- # @param boolean_mask [Arrow::BooleanArray] Boolean mask which indicates the position to be replaced.
240
+ # @param boolean_mask [Arrow::BooleanArray]
241
+ # Boolean mask which indicates the position to be replaced.
113
242
  # - Position with true will be replaced by nil
114
243
  # - Position with nil will remain as nil.
115
244
  # @return [Vector] Replaced vector.
@@ -117,7 +246,7 @@ module RedAmber
117
246
  def replace_to_nil(boolean_mask)
118
247
  nil_array = data.class.new([nil] * size) # Casted nil Array
119
248
  datum = find(:if_else).execute([boolean_mask, nil_array, data])
120
- Vector.new(datum.value)
249
+ Vector.create(datum.value)
121
250
  end
122
251
  end
123
252
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- VERSION = '0.2.3'
4
+ VERSION = '0.3.0'
5
5
  end
data/lib/red_amber.rb CHANGED
@@ -2,7 +2,9 @@
2
2
 
3
3
  require 'arrow'
4
4
 
5
+ require_relative 'red_amber/refinements'
5
6
  require_relative 'red_amber/helper'
7
+
6
8
  require_relative 'red_amber/data_frame_combinable'
7
9
  require_relative 'red_amber/data_frame_displayable'
8
10
  require_relative 'red_amber/data_frame_indexable'
data/red_amber.gemspec CHANGED
@@ -9,10 +9,11 @@ Gem::Specification.new do |spec|
9
9
  spec.email = ['heronshoes877@gmail.com']
10
10
 
11
11
  spec.summary = 'Simple dataframe library for Ruby'
12
- spec.description = 'RedAmber is a simple dataframe library inspired by Rover-df and powered by Red Arrow.'
12
+ spec.description = 'RedAmber is a simple dataframe library' \
13
+ 'inspired by Rover-df and powered by Red Arrow.'
13
14
  spec.homepage = 'https://github.com/heronshoes/red_amber'
14
15
  spec.license = 'MIT'
15
- spec.required_ruby_version = '>= 2.7'
16
+ spec.required_ruby_version = '>= 3.0'
16
17
 
17
18
  spec.metadata['homepage_uri'] = spec.homepage
18
19
  spec.metadata['source_code_uri'] = 'https://github.com/heronshoes/red_amber'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red_amber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hirokazu SUZUKI (heronshoes)
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-11-16 00:00:00.000000000 Z
11
+ date: 2022-12-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -24,7 +24,7 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 10.0.0
27
- description: RedAmber is a simple dataframe library inspired by Rover-df and powered
27
+ description: RedAmber is a simple dataframe libraryinspired by Rover-df and powered
28
28
  by Red Arrow.
29
29
  email:
30
30
  - heronshoes877@gmail.com
@@ -42,6 +42,7 @@ files:
42
42
  - Rakefile
43
43
  - benchmark/basic.yml
44
44
  - benchmark/combine.yml
45
+ - benchmark/dataframe.yml
45
46
  - benchmark/drop_nil.yml
46
47
  - benchmark/group.yml
47
48
  - benchmark/reshape.yml
@@ -50,6 +51,7 @@ files:
50
51
  - benchmark/rover/penguins.yml
51
52
  - benchmark/rover/planes.yml
52
53
  - benchmark/rover/weather.yml
54
+ - benchmark/vector.yml
53
55
  - doc/CODE_OF_CONDUCT.md
54
56
  - doc/DataFrame.md
55
57
  - doc/Vector.md
@@ -95,6 +97,7 @@ files:
95
97
  - lib/red_amber/data_frame_variable_operation.rb
96
98
  - lib/red_amber/group.rb
97
99
  - lib/red_amber/helper.rb
100
+ - lib/red_amber/refinements.rb
98
101
  - lib/red_amber/vector.rb
99
102
  - lib/red_amber/vector_functions.rb
100
103
  - lib/red_amber/vector_selectable.rb
@@ -110,7 +113,7 @@ metadata:
110
113
  source_code_uri: https://github.com/heronshoes/red_amber
111
114
  changelog_uri: https://github.com/heronshoes/red_amber/blob/main/CHANGELOG.md
112
115
  rubygems_mfa_required: 'true'
113
- post_install_message:
116
+ post_install_message:
114
117
  rdoc_options: []
115
118
  require_paths:
116
119
  - lib
@@ -118,15 +121,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
118
121
  requirements:
119
122
  - - ">="
120
123
  - !ruby/object:Gem::Version
121
- version: '2.7'
124
+ version: '3.0'
122
125
  required_rubygems_version: !ruby/object:Gem::Requirement
123
126
  requirements:
124
127
  - - ">="
125
128
  - !ruby/object:Gem::Version
126
129
  version: '0'
127
130
  requirements: []
128
- rubygems_version: 3.3.7
129
- signing_key:
131
+ rubygems_version: 3.3.26
132
+ signing_key:
130
133
  specification_version: 4
131
134
  summary: Simple dataframe library for Ruby
132
135
  test_files: []