red_amber 0.1.3 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +31 -7
  3. data/CHANGELOG.md +214 -10
  4. data/Gemfile +4 -0
  5. data/README.md +117 -342
  6. data/benchmark/csv_load_penguins.yml +15 -0
  7. data/benchmark/drop_nil.yml +11 -0
  8. data/doc/DataFrame.md +854 -0
  9. data/doc/Vector.md +449 -0
  10. data/doc/image/arrow_table_new.png +0 -0
  11. data/doc/image/dataframe/assign.png +0 -0
  12. data/doc/image/dataframe/drop.png +0 -0
  13. data/doc/image/dataframe/pick.png +0 -0
  14. data/doc/image/dataframe/remove.png +0 -0
  15. data/doc/image/dataframe/rename.png +0 -0
  16. data/doc/image/dataframe/slice.png +0 -0
  17. data/doc/image/dataframe_model.png +0 -0
  18. data/doc/image/example_in_red_arrow.png +0 -0
  19. data/doc/image/tdr.png +0 -0
  20. data/doc/image/tdr_and_table.png +0 -0
  21. data/doc/image/tidy_data_in_TDR.png +0 -0
  22. data/doc/image/vector/binary_element_wise.png +0 -0
  23. data/doc/image/vector/unary_aggregation.png +0 -0
  24. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  25. data/doc/image/vector/unary_element_wise.png +0 -0
  26. data/doc/tdr.md +56 -0
  27. data/doc/tdr_ja.md +56 -0
  28. data/lib/red-amber.rb +27 -0
  29. data/lib/red_amber/data_frame.rb +91 -37
  30. data/lib/red_amber/{data_frame_output.rb → data_frame_displayable.rb} +49 -41
  31. data/lib/red_amber/data_frame_indexable.rb +38 -0
  32. data/lib/red_amber/data_frame_observation_operation.rb +11 -0
  33. data/lib/red_amber/data_frame_selectable.rb +155 -48
  34. data/lib/red_amber/data_frame_variable_operation.rb +137 -0
  35. data/lib/red_amber/helper.rb +61 -0
  36. data/lib/red_amber/vector.rb +69 -16
  37. data/lib/red_amber/vector_functions.rb +80 -45
  38. data/lib/red_amber/vector_selectable.rb +124 -0
  39. data/lib/red_amber/vector_updatable.rb +104 -0
  40. data/lib/red_amber/version.rb +1 -1
  41. data/lib/red_amber.rb +1 -16
  42. data/red_amber.gemspec +3 -6
  43. metadata +38 -9
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
4
+ # reference: https://arrow.apache.org/docs/cpp/compute.html
5
+
6
+ module RedAmber
7
+ # mix-ins for class Vector
8
+ # Functions to make up some data (especially missing) for new data.
9
+ module VectorUpdatable
10
+ # Replace data
11
+ # @param arg [Array, Vector, Arrow::Array] index specifier
12
+ # @param replacer [Array, Vector, Arrow::Array] new data to replace for.
13
+ # @return [Vector] Replaced new Vector
14
+ def replace(args, replacer)
15
+ args = args.is_a?(Array) ? args : Array(args)
16
+ replacer = Array(replacer)
17
+ return self if args.empty? || args[0].nil?
18
+
19
+ replacer = nil if replacer.empty?
20
+ vector = parse_to_vector(args)
21
+ booleans =
22
+ if vector.boolean?
23
+ vector
24
+ elsif vector.numeric?
25
+ Vector.new(indices).is_in(vector)
26
+ else
27
+ raise VectorArgumentError, "Invalid data type #{args}"
28
+ end
29
+ replace_with(booleans, replacer)
30
+ end
31
+
32
+ # (related functions)
33
+ # fill_null_backward, fill_null_forward
34
+
35
+ # [Ternary element-wise]: boolean_vector.func(if_true, else) => vector
36
+ def if_else(true_choice, false_choice)
37
+ true_choice = true_choice.data if true_choice.is_a? Vector
38
+ false_choice = false_choice.data if false_choice.is_a? Vector
39
+ raise VectorTypeError, 'Reciever must be a boolean' unless boolean?
40
+
41
+ datum = find(:if_else).execute([data, true_choice, false_choice])
42
+ Vector.new(datum.value)
43
+ end
44
+
45
+ # same behavior as Ruby's invert
46
+ # ![true, false, nil] #=> [false, true, true]
47
+ def primitive_invert
48
+ raise VectorTypeError, "Not a boolean Vector: #{self}" unless boolean?
49
+
50
+ is_nil.if_else(false, self).invert
51
+ end
52
+
53
+ private
54
+
55
+ # [Ternary]: replace_with(booleans, replacements) => vector
56
+ # Replace items selected with a boolean mask
57
+ #
58
+ # (from Arrow C++ inline doc.)
59
+ # Given an array and a boolean mask (either scalar or of equal length),
60
+ # along with replacement values (either scalar or array),
61
+ # each element of the array for which the corresponding mask element is
62
+ # true will be replaced by the next value from the replacements,
63
+ # or with null if the mask is null.
64
+ # Hence, for replacement arrays, len(replacements) == sum(mask == true).
65
+
66
+ def replace_with(booleans, replacer = nil)
67
+ specifier =
68
+ if booleans.is_a?(Arrow::BooleanArray)
69
+ booleans
70
+ elsif booleans.is_a?(Vector) && booleans.boolean?
71
+ booleans.data
72
+ elsif booleans.is_a?(Array) && booleans?(booleans)
73
+ Arrow::BooleanArray.new(booleans)
74
+ else
75
+ raise VectorTypeError, 'Not a valid type'
76
+ end
77
+ raise VectorArgumentError, 'Booleans size unmatch' if specifier.length != size
78
+ raise VectorArgumentError, 'Booleans not have any `true`' unless specifier.any?
79
+
80
+ r = Array(replacer) # scalar to [scalar]
81
+ r = [nil] if r.empty?
82
+
83
+ replacer =
84
+ if r.size == 1
85
+ case replacer
86
+ when Arrow::Array then replacer
87
+ when Vector then replacer.data
88
+ else
89
+ Arrow::Array.new(r * specifier.to_a.count(true)) # broadcast
90
+ end
91
+ else
92
+ Arrow::Array.new(r)
93
+ end
94
+ replacer = data.class.new(replacer) if replacer.uniq == [nil]
95
+
96
+ raise VectorArgumentError, 'Replacements size unmatch' if Array(specifier).count(true) != replacer.length
97
+
98
+ values = replacer.class.new(data)
99
+
100
+ datum = find('replace_with_mask').execute([values, specifier, replacer])
101
+ Vector.new(datum.value)
102
+ end
103
+ end
104
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- VERSION = '0.1.3'
4
+ VERSION = '0.1.6'
5
5
  end
data/lib/red_amber.rb CHANGED
@@ -1,18 +1,3 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'arrow'
4
- require 'rover-df'
5
-
6
- require_relative 'red_amber/data_frame_output'
7
- require_relative 'red_amber/data_frame_selectable'
8
- require_relative 'red_amber/data_frame'
9
- require_relative 'red_amber/vector_functions'
10
- require_relative 'red_amber/vector'
11
- require_relative 'red_amber/version'
12
-
13
- module RedAmber
14
- class Error < StandardError; end
15
-
16
- class DataFrameArgumentError < ArgumentError; end
17
- class DataFrameTypeError < TypeError; end
18
- end
3
+ require_relative 'red-amber'
data/red_amber.gemspec CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.email = ['heronshoes877@gmail.com']
10
10
 
11
11
  spec.summary = 'Simple dataframe library for Ruby'
12
- spec.description = 'RedAmber is a simple dataframe library powered by Red Arrow with API similar to Rover-df.'
12
+ spec.description = 'RedAmber is a simple dataframe library inspired by Rover-df and powered by Red Arrow.'
13
13
  spec.homepage = 'https://github.com/heronshoes/red_amber'
14
14
  spec.license = 'MIT'
15
15
  spec.required_ruby_version = '>= 2.7'
@@ -30,14 +30,11 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ['lib']
32
32
 
33
- spec.add_dependency 'red-arrow', '>= 7.0.0'
34
- spec.add_dependency 'red-parquet', '>= 7.0.0'
33
+ spec.add_dependency 'red-arrow', '>= 8.0.0'
34
+ spec.add_dependency 'red-parquet', '>= 8.0.0'
35
35
  spec.add_dependency 'rover-df', '~> 0.3.0'
36
36
 
37
37
  # Development dependency has gone to the Gemfile (rubygems/bundler#7237)
38
38
 
39
39
  spec.metadata['rubygems_mfa_required'] = 'true'
40
-
41
- # For more information and examples about making a new gem, check out our
42
- # guide at: https://bundler.io/guides/creating_gem.html
43
40
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red_amber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hirokazu SUZUKI (heronshoes)
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-05-14 00:00:00.000000000 Z
11
+ date: 2022-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 7.0.0
19
+ version: 8.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 7.0.0
26
+ version: 8.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: red-parquet
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 7.0.0
33
+ version: 8.0.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: 7.0.0
40
+ version: 8.0.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rover-df
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -52,8 +52,8 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: 0.3.0
55
- description: RedAmber is a simple dataframe library powered by Red Arrow with API
56
- similar to Rover-df.
55
+ description: RedAmber is a simple dataframe library inspired by Rover-df and powered
56
+ by Red Arrow.
57
57
  email:
58
58
  - heronshoes877@gmail.com
59
59
  executables: []
@@ -67,13 +67,42 @@ files:
67
67
  - LICENSE
68
68
  - README.md
69
69
  - Rakefile
70
+ - benchmark/csv_load_penguins.yml
71
+ - benchmark/drop_nil.yml
70
72
  - doc/CODE_OF_CONDUCT.md
73
+ - doc/DataFrame.md
74
+ - doc/Vector.md
75
+ - doc/image/arrow_table_new.png
76
+ - doc/image/dataframe/assign.png
77
+ - doc/image/dataframe/drop.png
78
+ - doc/image/dataframe/pick.png
79
+ - doc/image/dataframe/remove.png
80
+ - doc/image/dataframe/rename.png
81
+ - doc/image/dataframe/slice.png
82
+ - doc/image/dataframe_model.png
83
+ - doc/image/example_in_red_arrow.png
84
+ - doc/image/tdr.png
85
+ - doc/image/tdr_and_table.png
86
+ - doc/image/tidy_data_in_TDR.png
87
+ - doc/image/vector/binary_element_wise.png
88
+ - doc/image/vector/unary_aggregation.png
89
+ - doc/image/vector/unary_aggregation_w_option.png
90
+ - doc/image/vector/unary_element_wise.png
91
+ - doc/tdr.md
92
+ - doc/tdr_ja.md
93
+ - lib/red-amber.rb
71
94
  - lib/red_amber.rb
72
95
  - lib/red_amber/data_frame.rb
73
- - lib/red_amber/data_frame_output.rb
96
+ - lib/red_amber/data_frame_displayable.rb
97
+ - lib/red_amber/data_frame_indexable.rb
98
+ - lib/red_amber/data_frame_observation_operation.rb
74
99
  - lib/red_amber/data_frame_selectable.rb
100
+ - lib/red_amber/data_frame_variable_operation.rb
101
+ - lib/red_amber/helper.rb
75
102
  - lib/red_amber/vector.rb
76
103
  - lib/red_amber/vector_functions.rb
104
+ - lib/red_amber/vector_selectable.rb
105
+ - lib/red_amber/vector_updatable.rb
77
106
  - lib/red_amber/version.rb
78
107
  - red_amber.gemspec
79
108
  - sig/red_amber.rbs