red_amber 0.1.3 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +31 -7
- data/CHANGELOG.md +214 -10
- data/Gemfile +4 -0
- data/README.md +117 -342
- data/benchmark/csv_load_penguins.yml +15 -0
- data/benchmark/drop_nil.yml +11 -0
- data/doc/DataFrame.md +854 -0
- data/doc/Vector.md +449 -0
- data/doc/image/arrow_table_new.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/example_in_red_arrow.png +0 -0
- data/doc/image/tdr.png +0 -0
- data/doc/image/tdr_and_table.png +0 -0
- data/doc/image/tidy_data_in_TDR.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/doc/tdr.md +56 -0
- data/doc/tdr_ja.md +56 -0
- data/lib/red-amber.rb +27 -0
- data/lib/red_amber/data_frame.rb +91 -37
- data/lib/red_amber/{data_frame_output.rb → data_frame_displayable.rb} +49 -41
- data/lib/red_amber/data_frame_indexable.rb +38 -0
- data/lib/red_amber/data_frame_observation_operation.rb +11 -0
- data/lib/red_amber/data_frame_selectable.rb +155 -48
- data/lib/red_amber/data_frame_variable_operation.rb +137 -0
- data/lib/red_amber/helper.rb +61 -0
- data/lib/red_amber/vector.rb +69 -16
- data/lib/red_amber/vector_functions.rb +80 -45
- data/lib/red_amber/vector_selectable.rb +124 -0
- data/lib/red_amber/vector_updatable.rb +104 -0
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -16
- data/red_amber.gemspec +3 -6
- metadata +38 -9
@@ -0,0 +1,104 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
|
4
|
+
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
|
+
|
6
|
+
module RedAmber
|
7
|
+
# mix-ins for class Vector
|
8
|
+
# Functions to make up some data (especially missing) for new data.
|
9
|
+
module VectorUpdatable
|
10
|
+
# Replace data
|
11
|
+
# @param arg [Array, Vector, Arrow::Array] index specifier
|
12
|
+
# @param replacer [Array, Vector, Arrow::Array] new data to replace for.
|
13
|
+
# @return [Vector] Replaced new Vector
|
14
|
+
def replace(args, replacer)
|
15
|
+
args = args.is_a?(Array) ? args : Array(args)
|
16
|
+
replacer = Array(replacer)
|
17
|
+
return self if args.empty? || args[0].nil?
|
18
|
+
|
19
|
+
replacer = nil if replacer.empty?
|
20
|
+
vector = parse_to_vector(args)
|
21
|
+
booleans =
|
22
|
+
if vector.boolean?
|
23
|
+
vector
|
24
|
+
elsif vector.numeric?
|
25
|
+
Vector.new(indices).is_in(vector)
|
26
|
+
else
|
27
|
+
raise VectorArgumentError, "Invalid data type #{args}"
|
28
|
+
end
|
29
|
+
replace_with(booleans, replacer)
|
30
|
+
end
|
31
|
+
|
32
|
+
# (related functions)
|
33
|
+
# fill_null_backward, fill_null_forward
|
34
|
+
|
35
|
+
# [Ternary element-wise]: boolean_vector.func(if_true, else) => vector
|
36
|
+
def if_else(true_choice, false_choice)
|
37
|
+
true_choice = true_choice.data if true_choice.is_a? Vector
|
38
|
+
false_choice = false_choice.data if false_choice.is_a? Vector
|
39
|
+
raise VectorTypeError, 'Reciever must be a boolean' unless boolean?
|
40
|
+
|
41
|
+
datum = find(:if_else).execute([data, true_choice, false_choice])
|
42
|
+
Vector.new(datum.value)
|
43
|
+
end
|
44
|
+
|
45
|
+
# same behavior as Ruby's invert
|
46
|
+
# ![true, false, nil] #=> [false, true, true]
|
47
|
+
def primitive_invert
|
48
|
+
raise VectorTypeError, "Not a boolean Vector: #{self}" unless boolean?
|
49
|
+
|
50
|
+
is_nil.if_else(false, self).invert
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
# [Ternary]: replace_with(booleans, replacements) => vector
|
56
|
+
# Replace items selected with a boolean mask
|
57
|
+
#
|
58
|
+
# (from Arrow C++ inline doc.)
|
59
|
+
# Given an array and a boolean mask (either scalar or of equal length),
|
60
|
+
# along with replacement values (either scalar or array),
|
61
|
+
# each element of the array for which the corresponding mask element is
|
62
|
+
# true will be replaced by the next value from the replacements,
|
63
|
+
# or with null if the mask is null.
|
64
|
+
# Hence, for replacement arrays, len(replacements) == sum(mask == true).
|
65
|
+
|
66
|
+
def replace_with(booleans, replacer = nil)
|
67
|
+
specifier =
|
68
|
+
if booleans.is_a?(Arrow::BooleanArray)
|
69
|
+
booleans
|
70
|
+
elsif booleans.is_a?(Vector) && booleans.boolean?
|
71
|
+
booleans.data
|
72
|
+
elsif booleans.is_a?(Array) && booleans?(booleans)
|
73
|
+
Arrow::BooleanArray.new(booleans)
|
74
|
+
else
|
75
|
+
raise VectorTypeError, 'Not a valid type'
|
76
|
+
end
|
77
|
+
raise VectorArgumentError, 'Booleans size unmatch' if specifier.length != size
|
78
|
+
raise VectorArgumentError, 'Booleans not have any `true`' unless specifier.any?
|
79
|
+
|
80
|
+
r = Array(replacer) # scalar to [scalar]
|
81
|
+
r = [nil] if r.empty?
|
82
|
+
|
83
|
+
replacer =
|
84
|
+
if r.size == 1
|
85
|
+
case replacer
|
86
|
+
when Arrow::Array then replacer
|
87
|
+
when Vector then replacer.data
|
88
|
+
else
|
89
|
+
Arrow::Array.new(r * specifier.to_a.count(true)) # broadcast
|
90
|
+
end
|
91
|
+
else
|
92
|
+
Arrow::Array.new(r)
|
93
|
+
end
|
94
|
+
replacer = data.class.new(replacer) if replacer.uniq == [nil]
|
95
|
+
|
96
|
+
raise VectorArgumentError, 'Replacements size unmatch' if Array(specifier).count(true) != replacer.length
|
97
|
+
|
98
|
+
values = replacer.class.new(data)
|
99
|
+
|
100
|
+
datum = find('replace_with_mask').execute([values, specifier, replacer])
|
101
|
+
Vector.new(datum.value)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
data/lib/red_amber/version.rb
CHANGED
data/lib/red_amber.rb
CHANGED
@@ -1,18 +1,3 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
require 'rover-df'
|
5
|
-
|
6
|
-
require_relative 'red_amber/data_frame_output'
|
7
|
-
require_relative 'red_amber/data_frame_selectable'
|
8
|
-
require_relative 'red_amber/data_frame'
|
9
|
-
require_relative 'red_amber/vector_functions'
|
10
|
-
require_relative 'red_amber/vector'
|
11
|
-
require_relative 'red_amber/version'
|
12
|
-
|
13
|
-
module RedAmber
|
14
|
-
class Error < StandardError; end
|
15
|
-
|
16
|
-
class DataFrameArgumentError < ArgumentError; end
|
17
|
-
class DataFrameTypeError < TypeError; end
|
18
|
-
end
|
3
|
+
require_relative 'red-amber'
|
data/red_amber.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ['heronshoes877@gmail.com']
|
10
10
|
|
11
11
|
spec.summary = 'Simple dataframe library for Ruby'
|
12
|
-
spec.description = 'RedAmber is a simple dataframe library
|
12
|
+
spec.description = 'RedAmber is a simple dataframe library inspired by Rover-df and powered by Red Arrow.'
|
13
13
|
spec.homepage = 'https://github.com/heronshoes/red_amber'
|
14
14
|
spec.license = 'MIT'
|
15
15
|
spec.required_ruby_version = '>= 2.7'
|
@@ -30,14 +30,11 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
|
-
spec.add_dependency 'red-arrow', '>=
|
34
|
-
spec.add_dependency 'red-parquet', '>=
|
33
|
+
spec.add_dependency 'red-arrow', '>= 8.0.0'
|
34
|
+
spec.add_dependency 'red-parquet', '>= 8.0.0'
|
35
35
|
spec.add_dependency 'rover-df', '~> 0.3.0'
|
36
36
|
|
37
37
|
# Development dependency has gone to the Gemfile (rubygems/bundler#7237)
|
38
38
|
|
39
39
|
spec.metadata['rubygems_mfa_required'] = 'true'
|
40
|
-
|
41
|
-
# For more information and examples about making a new gem, check out our
|
42
|
-
# guide at: https://bundler.io/guides/creating_gem.html
|
43
40
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 8.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 8.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: red-parquet
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 8.0.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 8.0.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rover-df
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,8 +52,8 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.3.0
|
55
|
-
description: RedAmber is a simple dataframe library
|
56
|
-
|
55
|
+
description: RedAmber is a simple dataframe library inspired by Rover-df and powered
|
56
|
+
by Red Arrow.
|
57
57
|
email:
|
58
58
|
- heronshoes877@gmail.com
|
59
59
|
executables: []
|
@@ -67,13 +67,42 @@ files:
|
|
67
67
|
- LICENSE
|
68
68
|
- README.md
|
69
69
|
- Rakefile
|
70
|
+
- benchmark/csv_load_penguins.yml
|
71
|
+
- benchmark/drop_nil.yml
|
70
72
|
- doc/CODE_OF_CONDUCT.md
|
73
|
+
- doc/DataFrame.md
|
74
|
+
- doc/Vector.md
|
75
|
+
- doc/image/arrow_table_new.png
|
76
|
+
- doc/image/dataframe/assign.png
|
77
|
+
- doc/image/dataframe/drop.png
|
78
|
+
- doc/image/dataframe/pick.png
|
79
|
+
- doc/image/dataframe/remove.png
|
80
|
+
- doc/image/dataframe/rename.png
|
81
|
+
- doc/image/dataframe/slice.png
|
82
|
+
- doc/image/dataframe_model.png
|
83
|
+
- doc/image/example_in_red_arrow.png
|
84
|
+
- doc/image/tdr.png
|
85
|
+
- doc/image/tdr_and_table.png
|
86
|
+
- doc/image/tidy_data_in_TDR.png
|
87
|
+
- doc/image/vector/binary_element_wise.png
|
88
|
+
- doc/image/vector/unary_aggregation.png
|
89
|
+
- doc/image/vector/unary_aggregation_w_option.png
|
90
|
+
- doc/image/vector/unary_element_wise.png
|
91
|
+
- doc/tdr.md
|
92
|
+
- doc/tdr_ja.md
|
93
|
+
- lib/red-amber.rb
|
71
94
|
- lib/red_amber.rb
|
72
95
|
- lib/red_amber/data_frame.rb
|
73
|
-
- lib/red_amber/
|
96
|
+
- lib/red_amber/data_frame_displayable.rb
|
97
|
+
- lib/red_amber/data_frame_indexable.rb
|
98
|
+
- lib/red_amber/data_frame_observation_operation.rb
|
74
99
|
- lib/red_amber/data_frame_selectable.rb
|
100
|
+
- lib/red_amber/data_frame_variable_operation.rb
|
101
|
+
- lib/red_amber/helper.rb
|
75
102
|
- lib/red_amber/vector.rb
|
76
103
|
- lib/red_amber/vector_functions.rb
|
104
|
+
- lib/red_amber/vector_selectable.rb
|
105
|
+
- lib/red_amber/vector_updatable.rb
|
77
106
|
- lib/red_amber/version.rb
|
78
107
|
- red_amber.gemspec
|
79
108
|
- sig/red_amber.rbs
|