red_amber 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +111 -48
- data/CHANGELOG.md +90 -1
- data/Gemfile +1 -0
- data/README.md +42 -25
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +60 -0
- data/doc/DataFrame.md +3 -0
- data/doc/Vector.md +88 -0
- data/lib/red_amber/data_frame.rb +161 -46
- data/lib/red_amber/data_frame_combinable.rb +304 -101
- data/lib/red_amber/data_frame_displayable.rb +4 -4
- data/lib/red_amber/data_frame_indexable.rb +2 -2
- data/lib/red_amber/data_frame_loadsave.rb +4 -1
- data/lib/red_amber/data_frame_reshaping.rb +35 -10
- data/lib/red_amber/data_frame_selectable.rb +221 -116
- data/lib/red_amber/data_frame_variable_operation.rb +146 -82
- data/lib/red_amber/group.rb +16 -7
- data/lib/red_amber/helper.rb +53 -31
- data/lib/red_amber/refinements.rb +199 -0
- data/lib/red_amber/vector.rb +55 -52
- data/lib/red_amber/vector_functions.rb +23 -75
- data/lib/red_amber/vector_selectable.rb +116 -69
- data/lib/red_amber/vector_updatable.rb +136 -7
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +3 -2
- metadata +11 -8
@@ -7,6 +7,9 @@ module RedAmber
|
|
7
7
|
# mix-in for class Vector
|
8
8
|
# Functions to make up some data (especially missing) for new data.
|
9
9
|
module VectorUpdatable
|
10
|
+
# Add properties to Arrow::Array and Arrow::ChunkedArray
|
11
|
+
using RefineArrayLike
|
12
|
+
|
10
13
|
# Replace data
|
11
14
|
# @param specifier [Array, Vector, Arrow::Array] index or booleans.
|
12
15
|
# @param replacer [Scalar, Array, Vector, Arrow::Array] new data to replace for.
|
@@ -14,7 +17,7 @@ module RedAmber
|
|
14
17
|
# If specifier has no true, return self.
|
15
18
|
#
|
16
19
|
def replace(specifier, replacer)
|
17
|
-
vector =
|
20
|
+
vector = Vector.new(parse_args(Array(specifier), size))
|
18
21
|
return self if vector.empty? || empty?
|
19
22
|
|
20
23
|
booleans =
|
@@ -42,7 +45,9 @@ module RedAmber
|
|
42
45
|
else # Broadcast scalar to Array
|
43
46
|
Arrow::Array.new(Array(replacer) * booleans.to_a.count(true))
|
44
47
|
end
|
45
|
-
|
48
|
+
if booleans.sum != replacer_array.length
|
49
|
+
raise VectorArgumentError, 'Replacements size unmatch'
|
50
|
+
end
|
46
51
|
|
47
52
|
replace_with(booleans.data, replacer_array)
|
48
53
|
end
|
@@ -57,7 +62,7 @@ module RedAmber
|
|
57
62
|
raise VectorTypeError, 'Reciever must be a boolean' unless boolean?
|
58
63
|
|
59
64
|
datum = find(:if_else).execute([data, true_choice, false_choice])
|
60
|
-
Vector.
|
65
|
+
Vector.create(datum.value)
|
61
66
|
end
|
62
67
|
|
63
68
|
# same behavior as Ruby's invert
|
@@ -80,11 +85,134 @@ module RedAmber
|
|
80
85
|
end
|
81
86
|
end
|
82
87
|
|
88
|
+
# Split string Vector and returns Array of columns.
|
89
|
+
#
|
90
|
+
# @param sep [nil, String, Regexp] separater.
|
91
|
+
# If separator is nil (or no argeument given), the column will be splitted by
|
92
|
+
# Arrow's split function using any ASCII whitespace.
|
93
|
+
# Otherwise sep will passed to String#split.
|
94
|
+
# @param limit [Integer] maximum number to limit separation. Passed to String#split.
|
95
|
+
# @return [Array<Vector>] an Array of Vectors.
|
96
|
+
# @note nil will separated as nil's at same row. ex) `nil => [nil, nil]`
|
97
|
+
#
|
98
|
+
def split_to_columns(sep = nil, limit = 0)
|
99
|
+
l = split(sep, limit)
|
100
|
+
l.list_separate
|
101
|
+
end
|
102
|
+
|
103
|
+
# Split string Vector and flatten into rows.
|
104
|
+
#
|
105
|
+
# @param sep [nil, String, Regexp] separater.
|
106
|
+
# If separator is nil (or no argeument given), the column will be splitted by
|
107
|
+
# Arrow's split function using any ASCII whitespace.
|
108
|
+
# Otherwise sep will passed to String#split.
|
109
|
+
# @param limit [Integer] maximum number to limit separation. Passed to String#split.
|
110
|
+
# @return [Vector] a flatten Vector.
|
111
|
+
# @note nil will separated as nil's at same row. ex) `nil => [nil, nil]`
|
112
|
+
#
|
113
|
+
def split_to_rows(sep = nil, limit = 0)
|
114
|
+
l = split(sep, limit)
|
115
|
+
l.list_flatten
|
116
|
+
end
|
117
|
+
|
118
|
+
# return element size Array for list Vector.
|
119
|
+
#
|
120
|
+
# @api private
|
121
|
+
#
|
122
|
+
def list_sizes
|
123
|
+
Vector.create find(:list_value_length).execute([data]).value
|
124
|
+
end
|
125
|
+
|
126
|
+
# Separate list Vector by columns.
|
127
|
+
#
|
128
|
+
# @api private
|
129
|
+
#
|
130
|
+
def list_separate
|
131
|
+
len = list_sizes.data
|
132
|
+
min, max = Arrow::Function.find(:min_max).execute([len]).value.value.map(&:value)
|
133
|
+
|
134
|
+
result = []
|
135
|
+
(0...min).each do |i|
|
136
|
+
result << Vector.create(find(:list_element).execute([data, i]).value)
|
137
|
+
end
|
138
|
+
return result if min == max
|
139
|
+
|
140
|
+
(min...max).each do |i|
|
141
|
+
result << Vector.new(data.map { |e| e&.[](i) })
|
142
|
+
end
|
143
|
+
result
|
144
|
+
end
|
145
|
+
|
146
|
+
# Flatten list Vector for rows.
|
147
|
+
#
|
148
|
+
# @api private
|
149
|
+
#
|
150
|
+
def list_flatten
|
151
|
+
Vector.create find(:list_flatten).execute([data]).value
|
152
|
+
end
|
153
|
+
|
154
|
+
# Split string Vector by each element with separator and returns list Array.
|
155
|
+
#
|
156
|
+
# @note if sep is not specified, use Arrow's ascii_split_whitespace.
|
157
|
+
# It will separate string by ascii whitespaces.
|
158
|
+
# @note if sep specified, sep and limit will passed to String#split.
|
159
|
+
#
|
160
|
+
def split(sep = nil, limit = 0)
|
161
|
+
if empty? || !string?
|
162
|
+
raise VectorTypeError, "self is not a valid string Vector: #{self}"
|
163
|
+
end
|
164
|
+
if self[0].nil? && uniq.to_a == [nil] # Avoid heavy check to be activated always.
|
165
|
+
raise VectorTypeError, 'self contains only nil'
|
166
|
+
end
|
167
|
+
|
168
|
+
list =
|
169
|
+
if sep
|
170
|
+
Arrow::Array.new(to_a.map { |e| e&.split(sep, limit) })
|
171
|
+
else
|
172
|
+
find(:ascii_split_whitespace).execute([data]).value
|
173
|
+
end
|
174
|
+
Vector.create(list)
|
175
|
+
end
|
176
|
+
|
177
|
+
# Merge String or other string Vector to self.
|
178
|
+
# Self must be a string Vector.
|
179
|
+
#
|
180
|
+
# @param other [String, Vector]
|
181
|
+
# merger from right. It will be broadcasted if it is a scalar String.
|
182
|
+
# @param sep [String] separator.
|
183
|
+
# @return [Vector] merged Vector
|
184
|
+
#
|
185
|
+
def merge(other, sep: ' ')
|
186
|
+
if empty? || !string?
|
187
|
+
raise VectorTypeError,
|
188
|
+
"self is not a string Vector: #{self}"
|
189
|
+
end
|
190
|
+
unless sep.is_a?(String)
|
191
|
+
raise VectorArgumentError, "separator is not a String: #{sep}"
|
192
|
+
end
|
193
|
+
|
194
|
+
other_array =
|
195
|
+
case other
|
196
|
+
in String => s
|
197
|
+
[s] * size
|
198
|
+
in (Vector | Arrow::Array | Arrow::ChunkedArray) => x if x.string?
|
199
|
+
x.to_a
|
200
|
+
else
|
201
|
+
raise VectorArgumentError,
|
202
|
+
"other is not a String or a string Vector: #{self}"
|
203
|
+
end
|
204
|
+
|
205
|
+
list = Arrow::Array.new(to_a.zip(other_array))
|
206
|
+
datum = find(:binary_join).execute([list, sep])
|
207
|
+
Vector.create(datum.value)
|
208
|
+
end
|
209
|
+
|
83
210
|
private
|
84
211
|
|
85
212
|
# Replace elements selected with a boolean mask
|
86
213
|
#
|
87
|
-
# @param boolean_mask [Arrow::BooleanArray]
|
214
|
+
# @param boolean_mask [Arrow::BooleanArray]
|
215
|
+
# Boolean mask which indicates the position to be replaced.
|
88
216
|
# - Position with true will be replaced.
|
89
217
|
# - Position with nil will be nil.
|
90
218
|
#
|
@@ -104,12 +232,13 @@ module RedAmber
|
|
104
232
|
values = replacer.class.new(data) # Upcast
|
105
233
|
|
106
234
|
datum = find(:replace_with_mask).execute([values, boolean_mask, replacer])
|
107
|
-
Vector.
|
235
|
+
Vector.create(datum.value)
|
108
236
|
end
|
109
237
|
|
110
238
|
# Replace elements selected with a boolean mask by nil
|
111
239
|
#
|
112
|
-
# @param boolean_mask [Arrow::BooleanArray]
|
240
|
+
# @param boolean_mask [Arrow::BooleanArray]
|
241
|
+
# Boolean mask which indicates the position to be replaced.
|
113
242
|
# - Position with true will be replaced by nil
|
114
243
|
# - Position with nil will remain as nil.
|
115
244
|
# @return [Vector] Replaced vector.
|
@@ -117,7 +246,7 @@ module RedAmber
|
|
117
246
|
def replace_to_nil(boolean_mask)
|
118
247
|
nil_array = data.class.new([nil] * size) # Casted nil Array
|
119
248
|
datum = find(:if_else).execute([boolean_mask, nil_array, data])
|
120
|
-
Vector.
|
249
|
+
Vector.create(datum.value)
|
121
250
|
end
|
122
251
|
end
|
123
252
|
end
|
data/lib/red_amber/version.rb
CHANGED
data/lib/red_amber.rb
CHANGED
data/red_amber.gemspec
CHANGED
@@ -9,10 +9,11 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ['heronshoes877@gmail.com']
|
10
10
|
|
11
11
|
spec.summary = 'Simple dataframe library for Ruby'
|
12
|
-
spec.description = 'RedAmber is a simple dataframe library
|
12
|
+
spec.description = 'RedAmber is a simple dataframe library' \
|
13
|
+
'inspired by Rover-df and powered by Red Arrow.'
|
13
14
|
spec.homepage = 'https://github.com/heronshoes/red_amber'
|
14
15
|
spec.license = 'MIT'
|
15
|
-
spec.required_ruby_version = '>=
|
16
|
+
spec.required_ruby_version = '>= 3.0'
|
16
17
|
|
17
18
|
spec.metadata['homepage_uri'] = spec.homepage
|
18
19
|
spec.metadata['source_code_uri'] = 'https://github.com/heronshoes/red_amber'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-12-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -24,7 +24,7 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 10.0.0
|
27
|
-
description: RedAmber is a simple dataframe
|
27
|
+
description: RedAmber is a simple dataframe libraryinspired by Rover-df and powered
|
28
28
|
by Red Arrow.
|
29
29
|
email:
|
30
30
|
- heronshoes877@gmail.com
|
@@ -42,6 +42,7 @@ files:
|
|
42
42
|
- Rakefile
|
43
43
|
- benchmark/basic.yml
|
44
44
|
- benchmark/combine.yml
|
45
|
+
- benchmark/dataframe.yml
|
45
46
|
- benchmark/drop_nil.yml
|
46
47
|
- benchmark/group.yml
|
47
48
|
- benchmark/reshape.yml
|
@@ -50,6 +51,7 @@ files:
|
|
50
51
|
- benchmark/rover/penguins.yml
|
51
52
|
- benchmark/rover/planes.yml
|
52
53
|
- benchmark/rover/weather.yml
|
54
|
+
- benchmark/vector.yml
|
53
55
|
- doc/CODE_OF_CONDUCT.md
|
54
56
|
- doc/DataFrame.md
|
55
57
|
- doc/Vector.md
|
@@ -95,6 +97,7 @@ files:
|
|
95
97
|
- lib/red_amber/data_frame_variable_operation.rb
|
96
98
|
- lib/red_amber/group.rb
|
97
99
|
- lib/red_amber/helper.rb
|
100
|
+
- lib/red_amber/refinements.rb
|
98
101
|
- lib/red_amber/vector.rb
|
99
102
|
- lib/red_amber/vector_functions.rb
|
100
103
|
- lib/red_amber/vector_selectable.rb
|
@@ -110,7 +113,7 @@ metadata:
|
|
110
113
|
source_code_uri: https://github.com/heronshoes/red_amber
|
111
114
|
changelog_uri: https://github.com/heronshoes/red_amber/blob/main/CHANGELOG.md
|
112
115
|
rubygems_mfa_required: 'true'
|
113
|
-
post_install_message:
|
116
|
+
post_install_message:
|
114
117
|
rdoc_options: []
|
115
118
|
require_paths:
|
116
119
|
- lib
|
@@ -118,15 +121,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
118
121
|
requirements:
|
119
122
|
- - ">="
|
120
123
|
- !ruby/object:Gem::Version
|
121
|
-
version: '
|
124
|
+
version: '3.0'
|
122
125
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
126
|
requirements:
|
124
127
|
- - ">="
|
125
128
|
- !ruby/object:Gem::Version
|
126
129
|
version: '0'
|
127
130
|
requirements: []
|
128
|
-
rubygems_version: 3.3.
|
129
|
-
signing_key:
|
131
|
+
rubygems_version: 3.3.26
|
132
|
+
signing_key:
|
130
133
|
specification_version: 4
|
131
134
|
summary: Simple dataframe library for Ruby
|
132
135
|
test_files: []
|