red_amber 0.1.3 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +31 -7
- data/CHANGELOG.md +214 -10
- data/Gemfile +4 -0
- data/README.md +117 -342
- data/benchmark/csv_load_penguins.yml +15 -0
- data/benchmark/drop_nil.yml +11 -0
- data/doc/DataFrame.md +854 -0
- data/doc/Vector.md +449 -0
- data/doc/image/arrow_table_new.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/example_in_red_arrow.png +0 -0
- data/doc/image/tdr.png +0 -0
- data/doc/image/tdr_and_table.png +0 -0
- data/doc/image/tidy_data_in_TDR.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/doc/tdr.md +56 -0
- data/doc/tdr_ja.md +56 -0
- data/lib/red-amber.rb +27 -0
- data/lib/red_amber/data_frame.rb +91 -37
- data/lib/red_amber/{data_frame_output.rb → data_frame_displayable.rb} +49 -41
- data/lib/red_amber/data_frame_indexable.rb +38 -0
- data/lib/red_amber/data_frame_observation_operation.rb +11 -0
- data/lib/red_amber/data_frame_selectable.rb +155 -48
- data/lib/red_amber/data_frame_variable_operation.rb +137 -0
- data/lib/red_amber/helper.rb +61 -0
- data/lib/red_amber/vector.rb +69 -16
- data/lib/red_amber/vector_functions.rb +80 -45
- data/lib/red_amber/vector_selectable.rb +124 -0
- data/lib/red_amber/vector_updatable.rb +104 -0
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -16
- data/red_amber.gemspec +3 -6
- metadata +38 -9
@@ -0,0 +1,104 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
|
4
|
+
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
|
+
|
6
|
+
module RedAmber
|
7
|
+
# mix-ins for class Vector
|
8
|
+
# Functions to make up some data (especially missing) for new data.
|
9
|
+
module VectorUpdatable
|
10
|
+
# Replace data
|
11
|
+
# @param arg [Array, Vector, Arrow::Array] index specifier
|
12
|
+
# @param replacer [Array, Vector, Arrow::Array] new data to replace for.
|
13
|
+
# @return [Vector] Replaced new Vector
|
14
|
+
def replace(args, replacer)
|
15
|
+
args = args.is_a?(Array) ? args : Array(args)
|
16
|
+
replacer = Array(replacer)
|
17
|
+
return self if args.empty? || args[0].nil?
|
18
|
+
|
19
|
+
replacer = nil if replacer.empty?
|
20
|
+
vector = parse_to_vector(args)
|
21
|
+
booleans =
|
22
|
+
if vector.boolean?
|
23
|
+
vector
|
24
|
+
elsif vector.numeric?
|
25
|
+
Vector.new(indices).is_in(vector)
|
26
|
+
else
|
27
|
+
raise VectorArgumentError, "Invalid data type #{args}"
|
28
|
+
end
|
29
|
+
replace_with(booleans, replacer)
|
30
|
+
end
|
31
|
+
|
32
|
+
# (related functions)
|
33
|
+
# fill_null_backward, fill_null_forward
|
34
|
+
|
35
|
+
# [Ternary element-wise]: boolean_vector.func(if_true, else) => vector
|
36
|
+
def if_else(true_choice, false_choice)
|
37
|
+
true_choice = true_choice.data if true_choice.is_a? Vector
|
38
|
+
false_choice = false_choice.data if false_choice.is_a? Vector
|
39
|
+
raise VectorTypeError, 'Reciever must be a boolean' unless boolean?
|
40
|
+
|
41
|
+
datum = find(:if_else).execute([data, true_choice, false_choice])
|
42
|
+
Vector.new(datum.value)
|
43
|
+
end
|
44
|
+
|
45
|
+
# same behavior as Ruby's invert
|
46
|
+
# ![true, false, nil] #=> [false, true, true]
|
47
|
+
def primitive_invert
|
48
|
+
raise VectorTypeError, "Not a boolean Vector: #{self}" unless boolean?
|
49
|
+
|
50
|
+
is_nil.if_else(false, self).invert
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
# [Ternary]: replace_with(booleans, replacements) => vector
|
56
|
+
# Replace items selected with a boolean mask
|
57
|
+
#
|
58
|
+
# (from Arrow C++ inline doc.)
|
59
|
+
# Given an array and a boolean mask (either scalar or of equal length),
|
60
|
+
# along with replacement values (either scalar or array),
|
61
|
+
# each element of the array for which the corresponding mask element is
|
62
|
+
# true will be replaced by the next value from the replacements,
|
63
|
+
# or with null if the mask is null.
|
64
|
+
# Hence, for replacement arrays, len(replacements) == sum(mask == true).
|
65
|
+
|
66
|
+
def replace_with(booleans, replacer = nil)
|
67
|
+
specifier =
|
68
|
+
if booleans.is_a?(Arrow::BooleanArray)
|
69
|
+
booleans
|
70
|
+
elsif booleans.is_a?(Vector) && booleans.boolean?
|
71
|
+
booleans.data
|
72
|
+
elsif booleans.is_a?(Array) && booleans?(booleans)
|
73
|
+
Arrow::BooleanArray.new(booleans)
|
74
|
+
else
|
75
|
+
raise VectorTypeError, 'Not a valid type'
|
76
|
+
end
|
77
|
+
raise VectorArgumentError, 'Booleans size unmatch' if specifier.length != size
|
78
|
+
raise VectorArgumentError, 'Booleans not have any `true`' unless specifier.any?
|
79
|
+
|
80
|
+
r = Array(replacer) # scalar to [scalar]
|
81
|
+
r = [nil] if r.empty?
|
82
|
+
|
83
|
+
replacer =
|
84
|
+
if r.size == 1
|
85
|
+
case replacer
|
86
|
+
when Arrow::Array then replacer
|
87
|
+
when Vector then replacer.data
|
88
|
+
else
|
89
|
+
Arrow::Array.new(r * specifier.to_a.count(true)) # broadcast
|
90
|
+
end
|
91
|
+
else
|
92
|
+
Arrow::Array.new(r)
|
93
|
+
end
|
94
|
+
replacer = data.class.new(replacer) if replacer.uniq == [nil]
|
95
|
+
|
96
|
+
raise VectorArgumentError, 'Replacements size unmatch' if Array(specifier).count(true) != replacer.length
|
97
|
+
|
98
|
+
values = replacer.class.new(data)
|
99
|
+
|
100
|
+
datum = find('replace_with_mask').execute([values, specifier, replacer])
|
101
|
+
Vector.new(datum.value)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
data/lib/red_amber/version.rb
CHANGED
data/lib/red_amber.rb
CHANGED
@@ -1,18 +1,3 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
require 'rover-df'
|
5
|
-
|
6
|
-
require_relative 'red_amber/data_frame_output'
|
7
|
-
require_relative 'red_amber/data_frame_selectable'
|
8
|
-
require_relative 'red_amber/data_frame'
|
9
|
-
require_relative 'red_amber/vector_functions'
|
10
|
-
require_relative 'red_amber/vector'
|
11
|
-
require_relative 'red_amber/version'
|
12
|
-
|
13
|
-
module RedAmber
|
14
|
-
class Error < StandardError; end
|
15
|
-
|
16
|
-
class DataFrameArgumentError < ArgumentError; end
|
17
|
-
class DataFrameTypeError < TypeError; end
|
18
|
-
end
|
3
|
+
require_relative 'red-amber'
|
data/red_amber.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ['heronshoes877@gmail.com']
|
10
10
|
|
11
11
|
spec.summary = 'Simple dataframe library for Ruby'
|
12
|
-
spec.description = 'RedAmber is a simple dataframe library
|
12
|
+
spec.description = 'RedAmber is a simple dataframe library inspired by Rover-df and powered by Red Arrow.'
|
13
13
|
spec.homepage = 'https://github.com/heronshoes/red_amber'
|
14
14
|
spec.license = 'MIT'
|
15
15
|
spec.required_ruby_version = '>= 2.7'
|
@@ -30,14 +30,11 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
|
-
spec.add_dependency 'red-arrow', '>=
|
34
|
-
spec.add_dependency 'red-parquet', '>=
|
33
|
+
spec.add_dependency 'red-arrow', '>= 8.0.0'
|
34
|
+
spec.add_dependency 'red-parquet', '>= 8.0.0'
|
35
35
|
spec.add_dependency 'rover-df', '~> 0.3.0'
|
36
36
|
|
37
37
|
# Development dependency has gone to the Gemfile (rubygems/bundler#7237)
|
38
38
|
|
39
39
|
spec.metadata['rubygems_mfa_required'] = 'true'
|
40
|
-
|
41
|
-
# For more information and examples about making a new gem, check out our
|
42
|
-
# guide at: https://bundler.io/guides/creating_gem.html
|
43
40
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 8.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 8.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: red-parquet
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 8.0.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 8.0.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rover-df
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,8 +52,8 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.3.0
|
55
|
-
description: RedAmber is a simple dataframe library
|
56
|
-
|
55
|
+
description: RedAmber is a simple dataframe library inspired by Rover-df and powered
|
56
|
+
by Red Arrow.
|
57
57
|
email:
|
58
58
|
- heronshoes877@gmail.com
|
59
59
|
executables: []
|
@@ -67,13 +67,42 @@ files:
|
|
67
67
|
- LICENSE
|
68
68
|
- README.md
|
69
69
|
- Rakefile
|
70
|
+
- benchmark/csv_load_penguins.yml
|
71
|
+
- benchmark/drop_nil.yml
|
70
72
|
- doc/CODE_OF_CONDUCT.md
|
73
|
+
- doc/DataFrame.md
|
74
|
+
- doc/Vector.md
|
75
|
+
- doc/image/arrow_table_new.png
|
76
|
+
- doc/image/dataframe/assign.png
|
77
|
+
- doc/image/dataframe/drop.png
|
78
|
+
- doc/image/dataframe/pick.png
|
79
|
+
- doc/image/dataframe/remove.png
|
80
|
+
- doc/image/dataframe/rename.png
|
81
|
+
- doc/image/dataframe/slice.png
|
82
|
+
- doc/image/dataframe_model.png
|
83
|
+
- doc/image/example_in_red_arrow.png
|
84
|
+
- doc/image/tdr.png
|
85
|
+
- doc/image/tdr_and_table.png
|
86
|
+
- doc/image/tidy_data_in_TDR.png
|
87
|
+
- doc/image/vector/binary_element_wise.png
|
88
|
+
- doc/image/vector/unary_aggregation.png
|
89
|
+
- doc/image/vector/unary_aggregation_w_option.png
|
90
|
+
- doc/image/vector/unary_element_wise.png
|
91
|
+
- doc/tdr.md
|
92
|
+
- doc/tdr_ja.md
|
93
|
+
- lib/red-amber.rb
|
71
94
|
- lib/red_amber.rb
|
72
95
|
- lib/red_amber/data_frame.rb
|
73
|
-
- lib/red_amber/
|
96
|
+
- lib/red_amber/data_frame_displayable.rb
|
97
|
+
- lib/red_amber/data_frame_indexable.rb
|
98
|
+
- lib/red_amber/data_frame_observation_operation.rb
|
74
99
|
- lib/red_amber/data_frame_selectable.rb
|
100
|
+
- lib/red_amber/data_frame_variable_operation.rb
|
101
|
+
- lib/red_amber/helper.rb
|
75
102
|
- lib/red_amber/vector.rb
|
76
103
|
- lib/red_amber/vector_functions.rb
|
104
|
+
- lib/red_amber/vector_selectable.rb
|
105
|
+
- lib/red_amber/vector_updatable.rb
|
77
106
|
- lib/red_amber/version.rb
|
78
107
|
- red_amber.gemspec
|
79
108
|
- sig/red_amber.rbs
|