red_amber 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,9 @@ module RedAmber
7
7
  # mix-in for class Vector
8
8
  # Functions to make up some data (especially missing) for new data.
9
9
  module VectorUpdatable
10
+ # Add properties to Arrow::Array and Arrow::ChunkedArray
11
+ using RefineArrayLike
12
+
10
13
  # Replace data
11
14
  # @param specifier [Array, Vector, Arrow::Array] index or booleans.
12
15
  # @param replacer [Scalar, Array, Vector, Arrow::Array] new data to replace for.
@@ -14,7 +17,7 @@ module RedAmber
14
17
  # If specifier has no true, return self.
15
18
  #
16
19
  def replace(specifier, replacer)
17
- vector = parse_to_vector(Array(specifier))
20
+ vector = Vector.new(parse_args(Array(specifier), size))
18
21
  return self if vector.empty? || empty?
19
22
 
20
23
  booleans =
@@ -42,7 +45,9 @@ module RedAmber
42
45
  else # Broadcast scalar to Array
43
46
  Arrow::Array.new(Array(replacer) * booleans.to_a.count(true))
44
47
  end
45
- raise VectorArgumentError, 'Replacements size unmatch' if booleans.sum != replacer_array.length
48
+ if booleans.sum != replacer_array.length
49
+ raise VectorArgumentError, 'Replacements size unmatch'
50
+ end
46
51
 
47
52
  replace_with(booleans.data, replacer_array)
48
53
  end
@@ -57,7 +62,7 @@ module RedAmber
57
62
  raise VectorTypeError, 'Reciever must be a boolean' unless boolean?
58
63
 
59
64
  datum = find(:if_else).execute([data, true_choice, false_choice])
60
- Vector.new(datum.value)
65
+ Vector.create(datum.value)
61
66
  end
62
67
 
63
68
  # same behavior as Ruby's invert
@@ -80,11 +85,134 @@ module RedAmber
80
85
  end
81
86
  end
82
87
 
88
+ # Split string Vector and returns Array of columns.
89
+ #
90
+ # @param sep [nil, String, Regexp] separater.
91
+ # If separator is nil (or no argeument given), the column will be splitted by
92
+ # Arrow's split function using any ASCII whitespace.
93
+ # Otherwise sep will passed to String#split.
94
+ # @param limit [Integer] maximum number to limit separation. Passed to String#split.
95
+ # @return [Array<Vector>] an Array of Vectors.
96
+ # @note nil will separated as nil's at same row. ex) `nil => [nil, nil]`
97
+ #
98
+ def split_to_columns(sep = nil, limit = 0)
99
+ l = split(sep, limit)
100
+ l.list_separate
101
+ end
102
+
103
+ # Split string Vector and flatten into rows.
104
+ #
105
+ # @param sep [nil, String, Regexp] separater.
106
+ # If separator is nil (or no argeument given), the column will be splitted by
107
+ # Arrow's split function using any ASCII whitespace.
108
+ # Otherwise sep will passed to String#split.
109
+ # @param limit [Integer] maximum number to limit separation. Passed to String#split.
110
+ # @return [Vector] a flatten Vector.
111
+ # @note nil will separated as nil's at same row. ex) `nil => [nil, nil]`
112
+ #
113
+ def split_to_rows(sep = nil, limit = 0)
114
+ l = split(sep, limit)
115
+ l.list_flatten
116
+ end
117
+
118
+ # return element size Array for list Vector.
119
+ #
120
+ # @api private
121
+ #
122
+ def list_sizes
123
+ Vector.create find(:list_value_length).execute([data]).value
124
+ end
125
+
126
+ # Separate list Vector by columns.
127
+ #
128
+ # @api private
129
+ #
130
+ def list_separate
131
+ len = list_sizes.data
132
+ min, max = Arrow::Function.find(:min_max).execute([len]).value.value.map(&:value)
133
+
134
+ result = []
135
+ (0...min).each do |i|
136
+ result << Vector.create(find(:list_element).execute([data, i]).value)
137
+ end
138
+ return result if min == max
139
+
140
+ (min...max).each do |i|
141
+ result << Vector.new(data.map { |e| e&.[](i) })
142
+ end
143
+ result
144
+ end
145
+
146
+ # Flatten list Vector for rows.
147
+ #
148
+ # @api private
149
+ #
150
+ def list_flatten
151
+ Vector.create find(:list_flatten).execute([data]).value
152
+ end
153
+
154
+ # Split string Vector by each element with separator and returns list Array.
155
+ #
156
+ # @note if sep is not specified, use Arrow's ascii_split_whitespace.
157
+ # It will separate string by ascii whitespaces.
158
+ # @note if sep specified, sep and limit will passed to String#split.
159
+ #
160
+ def split(sep = nil, limit = 0)
161
+ if empty? || !string?
162
+ raise VectorTypeError, "self is not a valid string Vector: #{self}"
163
+ end
164
+ if self[0].nil? && uniq.to_a == [nil] # Avoid heavy check to be activated always.
165
+ raise VectorTypeError, 'self contains only nil'
166
+ end
167
+
168
+ list =
169
+ if sep
170
+ Arrow::Array.new(to_a.map { |e| e&.split(sep, limit) })
171
+ else
172
+ find(:ascii_split_whitespace).execute([data]).value
173
+ end
174
+ Vector.create(list)
175
+ end
176
+
177
+ # Merge String or other string Vector to self.
178
+ # Self must be a string Vector.
179
+ #
180
+ # @param other [String, Vector]
181
+ # merger from right. It will be broadcasted if it is a scalar String.
182
+ # @param sep [String] separator.
183
+ # @return [Vector] merged Vector
184
+ #
185
+ def merge(other, sep: ' ')
186
+ if empty? || !string?
187
+ raise VectorTypeError,
188
+ "self is not a string Vector: #{self}"
189
+ end
190
+ unless sep.is_a?(String)
191
+ raise VectorArgumentError, "separator is not a String: #{sep}"
192
+ end
193
+
194
+ other_array =
195
+ case other
196
+ in String => s
197
+ [s] * size
198
+ in (Vector | Arrow::Array | Arrow::ChunkedArray) => x if x.string?
199
+ x.to_a
200
+ else
201
+ raise VectorArgumentError,
202
+ "other is not a String or a string Vector: #{self}"
203
+ end
204
+
205
+ list = Arrow::Array.new(to_a.zip(other_array))
206
+ datum = find(:binary_join).execute([list, sep])
207
+ Vector.create(datum.value)
208
+ end
209
+
83
210
  private
84
211
 
85
212
  # Replace elements selected with a boolean mask
86
213
  #
87
- # @param boolean_mask [Arrow::BooleanArray] Boolean mask which indicates the position to be replaced.
214
+ # @param boolean_mask [Arrow::BooleanArray]
215
+ # Boolean mask which indicates the position to be replaced.
88
216
  # - Position with true will be replaced.
89
217
  # - Position with nil will be nil.
90
218
  #
@@ -104,12 +232,13 @@ module RedAmber
104
232
  values = replacer.class.new(data) # Upcast
105
233
 
106
234
  datum = find(:replace_with_mask).execute([values, boolean_mask, replacer])
107
- Vector.new(datum.value)
235
+ Vector.create(datum.value)
108
236
  end
109
237
 
110
238
  # Replace elements selected with a boolean mask by nil
111
239
  #
112
- # @param boolean_mask [Arrow::BooleanArray] Boolean mask which indicates the position to be replaced.
240
+ # @param boolean_mask [Arrow::BooleanArray]
241
+ # Boolean mask which indicates the position to be replaced.
113
242
  # - Position with true will be replaced by nil
114
243
  # - Position with nil will remain as nil.
115
244
  # @return [Vector] Replaced vector.
@@ -117,7 +246,7 @@ module RedAmber
117
246
  def replace_to_nil(boolean_mask)
118
247
  nil_array = data.class.new([nil] * size) # Casted nil Array
119
248
  datum = find(:if_else).execute([boolean_mask, nil_array, data])
120
- Vector.new(datum.value)
249
+ Vector.create(datum.value)
121
250
  end
122
251
  end
123
252
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- VERSION = '0.2.3'
4
+ VERSION = '0.3.0'
5
5
  end
data/lib/red_amber.rb CHANGED
@@ -2,7 +2,9 @@
2
2
 
3
3
  require 'arrow'
4
4
 
5
+ require_relative 'red_amber/refinements'
5
6
  require_relative 'red_amber/helper'
7
+
6
8
  require_relative 'red_amber/data_frame_combinable'
7
9
  require_relative 'red_amber/data_frame_displayable'
8
10
  require_relative 'red_amber/data_frame_indexable'
data/red_amber.gemspec CHANGED
@@ -9,10 +9,11 @@ Gem::Specification.new do |spec|
9
9
  spec.email = ['heronshoes877@gmail.com']
10
10
 
11
11
  spec.summary = 'Simple dataframe library for Ruby'
12
- spec.description = 'RedAmber is a simple dataframe library inspired by Rover-df and powered by Red Arrow.'
12
+ spec.description = 'RedAmber is a simple dataframe library' \
13
+ 'inspired by Rover-df and powered by Red Arrow.'
13
14
  spec.homepage = 'https://github.com/heronshoes/red_amber'
14
15
  spec.license = 'MIT'
15
- spec.required_ruby_version = '>= 2.7'
16
+ spec.required_ruby_version = '>= 3.0'
16
17
 
17
18
  spec.metadata['homepage_uri'] = spec.homepage
18
19
  spec.metadata['source_code_uri'] = 'https://github.com/heronshoes/red_amber'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red_amber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hirokazu SUZUKI (heronshoes)
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-11-16 00:00:00.000000000 Z
11
+ date: 2022-12-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -24,7 +24,7 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 10.0.0
27
- description: RedAmber is a simple dataframe library inspired by Rover-df and powered
27
+ description: RedAmber is a simple dataframe libraryinspired by Rover-df and powered
28
28
  by Red Arrow.
29
29
  email:
30
30
  - heronshoes877@gmail.com
@@ -42,6 +42,7 @@ files:
42
42
  - Rakefile
43
43
  - benchmark/basic.yml
44
44
  - benchmark/combine.yml
45
+ - benchmark/dataframe.yml
45
46
  - benchmark/drop_nil.yml
46
47
  - benchmark/group.yml
47
48
  - benchmark/reshape.yml
@@ -50,6 +51,7 @@ files:
50
51
  - benchmark/rover/penguins.yml
51
52
  - benchmark/rover/planes.yml
52
53
  - benchmark/rover/weather.yml
54
+ - benchmark/vector.yml
53
55
  - doc/CODE_OF_CONDUCT.md
54
56
  - doc/DataFrame.md
55
57
  - doc/Vector.md
@@ -95,6 +97,7 @@ files:
95
97
  - lib/red_amber/data_frame_variable_operation.rb
96
98
  - lib/red_amber/group.rb
97
99
  - lib/red_amber/helper.rb
100
+ - lib/red_amber/refinements.rb
98
101
  - lib/red_amber/vector.rb
99
102
  - lib/red_amber/vector_functions.rb
100
103
  - lib/red_amber/vector_selectable.rb
@@ -110,7 +113,7 @@ metadata:
110
113
  source_code_uri: https://github.com/heronshoes/red_amber
111
114
  changelog_uri: https://github.com/heronshoes/red_amber/blob/main/CHANGELOG.md
112
115
  rubygems_mfa_required: 'true'
113
- post_install_message:
116
+ post_install_message:
114
117
  rdoc_options: []
115
118
  require_paths:
116
119
  - lib
@@ -118,15 +121,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
118
121
  requirements:
119
122
  - - ">="
120
123
  - !ruby/object:Gem::Version
121
- version: '2.7'
124
+ version: '3.0'
122
125
  required_rubygems_version: !ruby/object:Gem::Requirement
123
126
  requirements:
124
127
  - - ">="
125
128
  - !ruby/object:Gem::Version
126
129
  version: '0'
127
130
  requirements: []
128
- rubygems_version: 3.3.7
129
- signing_key:
131
+ rubygems_version: 3.3.26
132
+ signing_key:
130
133
  specification_version: 4
131
134
  summary: Simple dataframe library for Ruby
132
135
  test_files: []