red_amber 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +111 -48
- data/CHANGELOG.md +90 -1
- data/Gemfile +1 -0
- data/README.md +42 -25
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +60 -0
- data/doc/DataFrame.md +3 -0
- data/doc/Vector.md +88 -0
- data/lib/red_amber/data_frame.rb +161 -46
- data/lib/red_amber/data_frame_combinable.rb +304 -101
- data/lib/red_amber/data_frame_displayable.rb +4 -4
- data/lib/red_amber/data_frame_indexable.rb +2 -2
- data/lib/red_amber/data_frame_loadsave.rb +4 -1
- data/lib/red_amber/data_frame_reshaping.rb +35 -10
- data/lib/red_amber/data_frame_selectable.rb +221 -116
- data/lib/red_amber/data_frame_variable_operation.rb +146 -82
- data/lib/red_amber/group.rb +16 -7
- data/lib/red_amber/helper.rb +53 -31
- data/lib/red_amber/refinements.rb +199 -0
- data/lib/red_amber/vector.rb +55 -52
- data/lib/red_amber/vector_functions.rb +23 -75
- data/lib/red_amber/vector_selectable.rb +116 -69
- data/lib/red_amber/vector_updatable.rb +136 -7
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +3 -2
- metadata +11 -8
@@ -7,6 +7,9 @@ module RedAmber
|
|
7
7
|
# mix-in for class Vector
|
8
8
|
# Functions to make up some data (especially missing) for new data.
|
9
9
|
module VectorUpdatable
|
10
|
+
# Add properties to Arrow::Array and Arrow::ChunkedArray
|
11
|
+
using RefineArrayLike
|
12
|
+
|
10
13
|
# Replace data
|
11
14
|
# @param specifier [Array, Vector, Arrow::Array] index or booleans.
|
12
15
|
# @param replacer [Scalar, Array, Vector, Arrow::Array] new data to replace for.
|
@@ -14,7 +17,7 @@ module RedAmber
|
|
14
17
|
# If specifier has no true, return self.
|
15
18
|
#
|
16
19
|
def replace(specifier, replacer)
|
17
|
-
vector =
|
20
|
+
vector = Vector.new(parse_args(Array(specifier), size))
|
18
21
|
return self if vector.empty? || empty?
|
19
22
|
|
20
23
|
booleans =
|
@@ -42,7 +45,9 @@ module RedAmber
|
|
42
45
|
else # Broadcast scalar to Array
|
43
46
|
Arrow::Array.new(Array(replacer) * booleans.to_a.count(true))
|
44
47
|
end
|
45
|
-
|
48
|
+
if booleans.sum != replacer_array.length
|
49
|
+
raise VectorArgumentError, 'Replacements size unmatch'
|
50
|
+
end
|
46
51
|
|
47
52
|
replace_with(booleans.data, replacer_array)
|
48
53
|
end
|
@@ -57,7 +62,7 @@ module RedAmber
|
|
57
62
|
raise VectorTypeError, 'Reciever must be a boolean' unless boolean?
|
58
63
|
|
59
64
|
datum = find(:if_else).execute([data, true_choice, false_choice])
|
60
|
-
Vector.
|
65
|
+
Vector.create(datum.value)
|
61
66
|
end
|
62
67
|
|
63
68
|
# same behavior as Ruby's invert
|
@@ -80,11 +85,134 @@ module RedAmber
|
|
80
85
|
end
|
81
86
|
end
|
82
87
|
|
88
|
+
# Split string Vector and returns Array of columns.
|
89
|
+
#
|
90
|
+
# @param sep [nil, String, Regexp] separater.
|
91
|
+
# If separator is nil (or no argeument given), the column will be splitted by
|
92
|
+
# Arrow's split function using any ASCII whitespace.
|
93
|
+
# Otherwise sep will passed to String#split.
|
94
|
+
# @param limit [Integer] maximum number to limit separation. Passed to String#split.
|
95
|
+
# @return [Array<Vector>] an Array of Vectors.
|
96
|
+
# @note nil will separated as nil's at same row. ex) `nil => [nil, nil]`
|
97
|
+
#
|
98
|
+
def split_to_columns(sep = nil, limit = 0)
|
99
|
+
l = split(sep, limit)
|
100
|
+
l.list_separate
|
101
|
+
end
|
102
|
+
|
103
|
+
# Split string Vector and flatten into rows.
|
104
|
+
#
|
105
|
+
# @param sep [nil, String, Regexp] separater.
|
106
|
+
# If separator is nil (or no argeument given), the column will be splitted by
|
107
|
+
# Arrow's split function using any ASCII whitespace.
|
108
|
+
# Otherwise sep will passed to String#split.
|
109
|
+
# @param limit [Integer] maximum number to limit separation. Passed to String#split.
|
110
|
+
# @return [Vector] a flatten Vector.
|
111
|
+
# @note nil will separated as nil's at same row. ex) `nil => [nil, nil]`
|
112
|
+
#
|
113
|
+
def split_to_rows(sep = nil, limit = 0)
|
114
|
+
l = split(sep, limit)
|
115
|
+
l.list_flatten
|
116
|
+
end
|
117
|
+
|
118
|
+
# return element size Array for list Vector.
|
119
|
+
#
|
120
|
+
# @api private
|
121
|
+
#
|
122
|
+
def list_sizes
|
123
|
+
Vector.create find(:list_value_length).execute([data]).value
|
124
|
+
end
|
125
|
+
|
126
|
+
# Separate list Vector by columns.
|
127
|
+
#
|
128
|
+
# @api private
|
129
|
+
#
|
130
|
+
def list_separate
|
131
|
+
len = list_sizes.data
|
132
|
+
min, max = Arrow::Function.find(:min_max).execute([len]).value.value.map(&:value)
|
133
|
+
|
134
|
+
result = []
|
135
|
+
(0...min).each do |i|
|
136
|
+
result << Vector.create(find(:list_element).execute([data, i]).value)
|
137
|
+
end
|
138
|
+
return result if min == max
|
139
|
+
|
140
|
+
(min...max).each do |i|
|
141
|
+
result << Vector.new(data.map { |e| e&.[](i) })
|
142
|
+
end
|
143
|
+
result
|
144
|
+
end
|
145
|
+
|
146
|
+
# Flatten list Vector for rows.
|
147
|
+
#
|
148
|
+
# @api private
|
149
|
+
#
|
150
|
+
def list_flatten
|
151
|
+
Vector.create find(:list_flatten).execute([data]).value
|
152
|
+
end
|
153
|
+
|
154
|
+
# Split string Vector by each element with separator and returns list Array.
|
155
|
+
#
|
156
|
+
# @note if sep is not specified, use Arrow's ascii_split_whitespace.
|
157
|
+
# It will separate string by ascii whitespaces.
|
158
|
+
# @note if sep specified, sep and limit will passed to String#split.
|
159
|
+
#
|
160
|
+
def split(sep = nil, limit = 0)
|
161
|
+
if empty? || !string?
|
162
|
+
raise VectorTypeError, "self is not a valid string Vector: #{self}"
|
163
|
+
end
|
164
|
+
if self[0].nil? && uniq.to_a == [nil] # Avoid heavy check to be activated always.
|
165
|
+
raise VectorTypeError, 'self contains only nil'
|
166
|
+
end
|
167
|
+
|
168
|
+
list =
|
169
|
+
if sep
|
170
|
+
Arrow::Array.new(to_a.map { |e| e&.split(sep, limit) })
|
171
|
+
else
|
172
|
+
find(:ascii_split_whitespace).execute([data]).value
|
173
|
+
end
|
174
|
+
Vector.create(list)
|
175
|
+
end
|
176
|
+
|
177
|
+
# Merge String or other string Vector to self.
|
178
|
+
# Self must be a string Vector.
|
179
|
+
#
|
180
|
+
# @param other [String, Vector]
|
181
|
+
# merger from right. It will be broadcasted if it is a scalar String.
|
182
|
+
# @param sep [String] separator.
|
183
|
+
# @return [Vector] merged Vector
|
184
|
+
#
|
185
|
+
def merge(other, sep: ' ')
|
186
|
+
if empty? || !string?
|
187
|
+
raise VectorTypeError,
|
188
|
+
"self is not a string Vector: #{self}"
|
189
|
+
end
|
190
|
+
unless sep.is_a?(String)
|
191
|
+
raise VectorArgumentError, "separator is not a String: #{sep}"
|
192
|
+
end
|
193
|
+
|
194
|
+
other_array =
|
195
|
+
case other
|
196
|
+
in String => s
|
197
|
+
[s] * size
|
198
|
+
in (Vector | Arrow::Array | Arrow::ChunkedArray) => x if x.string?
|
199
|
+
x.to_a
|
200
|
+
else
|
201
|
+
raise VectorArgumentError,
|
202
|
+
"other is not a String or a string Vector: #{self}"
|
203
|
+
end
|
204
|
+
|
205
|
+
list = Arrow::Array.new(to_a.zip(other_array))
|
206
|
+
datum = find(:binary_join).execute([list, sep])
|
207
|
+
Vector.create(datum.value)
|
208
|
+
end
|
209
|
+
|
83
210
|
private
|
84
211
|
|
85
212
|
# Replace elements selected with a boolean mask
|
86
213
|
#
|
87
|
-
# @param boolean_mask [Arrow::BooleanArray]
|
214
|
+
# @param boolean_mask [Arrow::BooleanArray]
|
215
|
+
# Boolean mask which indicates the position to be replaced.
|
88
216
|
# - Position with true will be replaced.
|
89
217
|
# - Position with nil will be nil.
|
90
218
|
#
|
@@ -104,12 +232,13 @@ module RedAmber
|
|
104
232
|
values = replacer.class.new(data) # Upcast
|
105
233
|
|
106
234
|
datum = find(:replace_with_mask).execute([values, boolean_mask, replacer])
|
107
|
-
Vector.
|
235
|
+
Vector.create(datum.value)
|
108
236
|
end
|
109
237
|
|
110
238
|
# Replace elements selected with a boolean mask by nil
|
111
239
|
#
|
112
|
-
# @param boolean_mask [Arrow::BooleanArray]
|
240
|
+
# @param boolean_mask [Arrow::BooleanArray]
|
241
|
+
# Boolean mask which indicates the position to be replaced.
|
113
242
|
# - Position with true will be replaced by nil
|
114
243
|
# - Position with nil will remain as nil.
|
115
244
|
# @return [Vector] Replaced vector.
|
@@ -117,7 +246,7 @@ module RedAmber
|
|
117
246
|
def replace_to_nil(boolean_mask)
|
118
247
|
nil_array = data.class.new([nil] * size) # Casted nil Array
|
119
248
|
datum = find(:if_else).execute([boolean_mask, nil_array, data])
|
120
|
-
Vector.
|
249
|
+
Vector.create(datum.value)
|
121
250
|
end
|
122
251
|
end
|
123
252
|
end
|
data/lib/red_amber/version.rb
CHANGED
data/lib/red_amber.rb
CHANGED
data/red_amber.gemspec
CHANGED
@@ -9,10 +9,11 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ['heronshoes877@gmail.com']
|
10
10
|
|
11
11
|
spec.summary = 'Simple dataframe library for Ruby'
|
12
|
-
spec.description = 'RedAmber is a simple dataframe library
|
12
|
+
spec.description = 'RedAmber is a simple dataframe library' \
|
13
|
+
'inspired by Rover-df and powered by Red Arrow.'
|
13
14
|
spec.homepage = 'https://github.com/heronshoes/red_amber'
|
14
15
|
spec.license = 'MIT'
|
15
|
-
spec.required_ruby_version = '>=
|
16
|
+
spec.required_ruby_version = '>= 3.0'
|
16
17
|
|
17
18
|
spec.metadata['homepage_uri'] = spec.homepage
|
18
19
|
spec.metadata['source_code_uri'] = 'https://github.com/heronshoes/red_amber'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-12-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -24,7 +24,7 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 10.0.0
|
27
|
-
description: RedAmber is a simple dataframe
|
27
|
+
description: RedAmber is a simple dataframe libraryinspired by Rover-df and powered
|
28
28
|
by Red Arrow.
|
29
29
|
email:
|
30
30
|
- heronshoes877@gmail.com
|
@@ -42,6 +42,7 @@ files:
|
|
42
42
|
- Rakefile
|
43
43
|
- benchmark/basic.yml
|
44
44
|
- benchmark/combine.yml
|
45
|
+
- benchmark/dataframe.yml
|
45
46
|
- benchmark/drop_nil.yml
|
46
47
|
- benchmark/group.yml
|
47
48
|
- benchmark/reshape.yml
|
@@ -50,6 +51,7 @@ files:
|
|
50
51
|
- benchmark/rover/penguins.yml
|
51
52
|
- benchmark/rover/planes.yml
|
52
53
|
- benchmark/rover/weather.yml
|
54
|
+
- benchmark/vector.yml
|
53
55
|
- doc/CODE_OF_CONDUCT.md
|
54
56
|
- doc/DataFrame.md
|
55
57
|
- doc/Vector.md
|
@@ -95,6 +97,7 @@ files:
|
|
95
97
|
- lib/red_amber/data_frame_variable_operation.rb
|
96
98
|
- lib/red_amber/group.rb
|
97
99
|
- lib/red_amber/helper.rb
|
100
|
+
- lib/red_amber/refinements.rb
|
98
101
|
- lib/red_amber/vector.rb
|
99
102
|
- lib/red_amber/vector_functions.rb
|
100
103
|
- lib/red_amber/vector_selectable.rb
|
@@ -110,7 +113,7 @@ metadata:
|
|
110
113
|
source_code_uri: https://github.com/heronshoes/red_amber
|
111
114
|
changelog_uri: https://github.com/heronshoes/red_amber/blob/main/CHANGELOG.md
|
112
115
|
rubygems_mfa_required: 'true'
|
113
|
-
post_install_message:
|
116
|
+
post_install_message:
|
114
117
|
rdoc_options: []
|
115
118
|
require_paths:
|
116
119
|
- lib
|
@@ -118,15 +121,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
118
121
|
requirements:
|
119
122
|
- - ">="
|
120
123
|
- !ruby/object:Gem::Version
|
121
|
-
version: '
|
124
|
+
version: '3.0'
|
122
125
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
126
|
requirements:
|
124
127
|
- - ">="
|
125
128
|
- !ruby/object:Gem::Version
|
126
129
|
version: '0'
|
127
130
|
requirements: []
|
128
|
-
rubygems_version: 3.3.
|
129
|
-
signing_key:
|
131
|
+
rubygems_version: 3.3.26
|
132
|
+
signing_key:
|
130
133
|
specification_version: 4
|
131
134
|
summary: Simple dataframe library for Ruby
|
132
135
|
test_files: []
|