red_amber 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +31 -7
  3. data/CHANGELOG.md +214 -10
  4. data/Gemfile +4 -0
  5. data/README.md +117 -342
  6. data/benchmark/csv_load_penguins.yml +15 -0
  7. data/benchmark/drop_nil.yml +11 -0
  8. data/doc/DataFrame.md +854 -0
  9. data/doc/Vector.md +449 -0
  10. data/doc/image/arrow_table_new.png +0 -0
  11. data/doc/image/dataframe/assign.png +0 -0
  12. data/doc/image/dataframe/drop.png +0 -0
  13. data/doc/image/dataframe/pick.png +0 -0
  14. data/doc/image/dataframe/remove.png +0 -0
  15. data/doc/image/dataframe/rename.png +0 -0
  16. data/doc/image/dataframe/slice.png +0 -0
  17. data/doc/image/dataframe_model.png +0 -0
  18. data/doc/image/example_in_red_arrow.png +0 -0
  19. data/doc/image/tdr.png +0 -0
  20. data/doc/image/tdr_and_table.png +0 -0
  21. data/doc/image/tidy_data_in_TDR.png +0 -0
  22. data/doc/image/vector/binary_element_wise.png +0 -0
  23. data/doc/image/vector/unary_aggregation.png +0 -0
  24. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  25. data/doc/image/vector/unary_element_wise.png +0 -0
  26. data/doc/tdr.md +56 -0
  27. data/doc/tdr_ja.md +56 -0
  28. data/lib/red-amber.rb +27 -0
  29. data/lib/red_amber/data_frame.rb +91 -37
  30. data/lib/red_amber/{data_frame_output.rb → data_frame_displayable.rb} +49 -41
  31. data/lib/red_amber/data_frame_indexable.rb +38 -0
  32. data/lib/red_amber/data_frame_observation_operation.rb +11 -0
  33. data/lib/red_amber/data_frame_selectable.rb +155 -48
  34. data/lib/red_amber/data_frame_variable_operation.rb +137 -0
  35. data/lib/red_amber/helper.rb +61 -0
  36. data/lib/red_amber/vector.rb +69 -16
  37. data/lib/red_amber/vector_functions.rb +80 -45
  38. data/lib/red_amber/vector_selectable.rb +124 -0
  39. data/lib/red_amber/vector_updatable.rb +104 -0
  40. data/lib/red_amber/version.rb +1 -1
  41. data/lib/red_amber.rb +1 -16
  42. data/red_amber.gemspec +3 -6
  43. metadata +38 -9
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
4
+ # reference: https://arrow.apache.org/docs/cpp/compute.html
5
+
6
+ module RedAmber
7
+ # mix-ins for class Vector
8
+ # Functions to make up some data (especially missing) for new data.
9
+ module VectorUpdatable
10
+ # Replace data
11
+ # @param arg [Array, Vector, Arrow::Array] index specifier
12
+ # @param replacer [Array, Vector, Arrow::Array] new data to replace for.
13
+ # @return [Vector] Replaced new Vector
14
+ def replace(args, replacer)
15
+ args = args.is_a?(Array) ? args : Array(args)
16
+ replacer = Array(replacer)
17
+ return self if args.empty? || args[0].nil?
18
+
19
+ replacer = nil if replacer.empty?
20
+ vector = parse_to_vector(args)
21
+ booleans =
22
+ if vector.boolean?
23
+ vector
24
+ elsif vector.numeric?
25
+ Vector.new(indices).is_in(vector)
26
+ else
27
+ raise VectorArgumentError, "Invalid data type #{args}"
28
+ end
29
+ replace_with(booleans, replacer)
30
+ end
31
+
32
+ # (related functions)
33
+ # fill_null_backward, fill_null_forward
34
+
35
+ # [Ternary element-wise]: boolean_vector.func(if_true, else) => vector
36
+ def if_else(true_choice, false_choice)
37
+ true_choice = true_choice.data if true_choice.is_a? Vector
38
+ false_choice = false_choice.data if false_choice.is_a? Vector
39
+ raise VectorTypeError, 'Reciever must be a boolean' unless boolean?
40
+
41
+ datum = find(:if_else).execute([data, true_choice, false_choice])
42
+ Vector.new(datum.value)
43
+ end
44
+
45
+ # same behavior as Ruby's invert
46
+ # ![true, false, nil] #=> [false, true, true]
47
+ def primitive_invert
48
+ raise VectorTypeError, "Not a boolean Vector: #{self}" unless boolean?
49
+
50
+ is_nil.if_else(false, self).invert
51
+ end
52
+
53
+ private
54
+
55
+ # [Ternary]: replace_with(booleans, replacements) => vector
56
+ # Replace items selected with a boolean mask
57
+ #
58
+ # (from Arrow C++ inline doc.)
59
+ # Given an array and a boolean mask (either scalar or of equal length),
60
+ # along with replacement values (either scalar or array),
61
+ # each element of the array for which the corresponding mask element is
62
+ # true will be replaced by the next value from the replacements,
63
+ # or with null if the mask is null.
64
+ # Hence, for replacement arrays, len(replacements) == sum(mask == true).
65
+
66
+ def replace_with(booleans, replacer = nil)
67
+ specifier =
68
+ if booleans.is_a?(Arrow::BooleanArray)
69
+ booleans
70
+ elsif booleans.is_a?(Vector) && booleans.boolean?
71
+ booleans.data
72
+ elsif booleans.is_a?(Array) && booleans?(booleans)
73
+ Arrow::BooleanArray.new(booleans)
74
+ else
75
+ raise VectorTypeError, 'Not a valid type'
76
+ end
77
+ raise VectorArgumentError, 'Booleans size unmatch' if specifier.length != size
78
+ raise VectorArgumentError, 'Booleans not have any `true`' unless specifier.any?
79
+
80
+ r = Array(replacer) # scalar to [scalar]
81
+ r = [nil] if r.empty?
82
+
83
+ replacer =
84
+ if r.size == 1
85
+ case replacer
86
+ when Arrow::Array then replacer
87
+ when Vector then replacer.data
88
+ else
89
+ Arrow::Array.new(r * specifier.to_a.count(true)) # broadcast
90
+ end
91
+ else
92
+ Arrow::Array.new(r)
93
+ end
94
+ replacer = data.class.new(replacer) if replacer.uniq == [nil]
95
+
96
+ raise VectorArgumentError, 'Replacements size unmatch' if Array(specifier).count(true) != replacer.length
97
+
98
+ values = replacer.class.new(data)
99
+
100
+ datum = find('replace_with_mask').execute([values, specifier, replacer])
101
+ Vector.new(datum.value)
102
+ end
103
+ end
104
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- VERSION = '0.1.3'
4
+ VERSION = '0.1.6'
5
5
  end
data/lib/red_amber.rb CHANGED
@@ -1,18 +1,3 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'arrow'
4
- require 'rover-df'
5
-
6
- require_relative 'red_amber/data_frame_output'
7
- require_relative 'red_amber/data_frame_selectable'
8
- require_relative 'red_amber/data_frame'
9
- require_relative 'red_amber/vector_functions'
10
- require_relative 'red_amber/vector'
11
- require_relative 'red_amber/version'
12
-
13
- module RedAmber
14
- class Error < StandardError; end
15
-
16
- class DataFrameArgumentError < ArgumentError; end
17
- class DataFrameTypeError < TypeError; end
18
- end
3
+ require_relative 'red-amber'
data/red_amber.gemspec CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.email = ['heronshoes877@gmail.com']
10
10
 
11
11
  spec.summary = 'Simple dataframe library for Ruby'
12
- spec.description = 'RedAmber is a simple dataframe library powered by Red Arrow with API similar to Rover-df.'
12
+ spec.description = 'RedAmber is a simple dataframe library inspired by Rover-df and powered by Red Arrow.'
13
13
  spec.homepage = 'https://github.com/heronshoes/red_amber'
14
14
  spec.license = 'MIT'
15
15
  spec.required_ruby_version = '>= 2.7'
@@ -30,14 +30,11 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ['lib']
32
32
 
33
- spec.add_dependency 'red-arrow', '>= 7.0.0'
34
- spec.add_dependency 'red-parquet', '>= 7.0.0'
33
+ spec.add_dependency 'red-arrow', '>= 8.0.0'
34
+ spec.add_dependency 'red-parquet', '>= 8.0.0'
35
35
  spec.add_dependency 'rover-df', '~> 0.3.0'
36
36
 
37
37
  # Development dependency has gone to the Gemfile (rubygems/bundler#7237)
38
38
 
39
39
  spec.metadata['rubygems_mfa_required'] = 'true'
40
-
41
- # For more information and examples about making a new gem, check out our
42
- # guide at: https://bundler.io/guides/creating_gem.html
43
40
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red_amber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hirokazu SUZUKI (heronshoes)
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-05-14 00:00:00.000000000 Z
11
+ date: 2022-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 7.0.0
19
+ version: 8.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 7.0.0
26
+ version: 8.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: red-parquet
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 7.0.0
33
+ version: 8.0.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: 7.0.0
40
+ version: 8.0.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rover-df
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -52,8 +52,8 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: 0.3.0
55
- description: RedAmber is a simple dataframe library powered by Red Arrow with API
56
- similar to Rover-df.
55
+ description: RedAmber is a simple dataframe library inspired by Rover-df and powered
56
+ by Red Arrow.
57
57
  email:
58
58
  - heronshoes877@gmail.com
59
59
  executables: []
@@ -67,13 +67,42 @@ files:
67
67
  - LICENSE
68
68
  - README.md
69
69
  - Rakefile
70
+ - benchmark/csv_load_penguins.yml
71
+ - benchmark/drop_nil.yml
70
72
  - doc/CODE_OF_CONDUCT.md
73
+ - doc/DataFrame.md
74
+ - doc/Vector.md
75
+ - doc/image/arrow_table_new.png
76
+ - doc/image/dataframe/assign.png
77
+ - doc/image/dataframe/drop.png
78
+ - doc/image/dataframe/pick.png
79
+ - doc/image/dataframe/remove.png
80
+ - doc/image/dataframe/rename.png
81
+ - doc/image/dataframe/slice.png
82
+ - doc/image/dataframe_model.png
83
+ - doc/image/example_in_red_arrow.png
84
+ - doc/image/tdr.png
85
+ - doc/image/tdr_and_table.png
86
+ - doc/image/tidy_data_in_TDR.png
87
+ - doc/image/vector/binary_element_wise.png
88
+ - doc/image/vector/unary_aggregation.png
89
+ - doc/image/vector/unary_aggregation_w_option.png
90
+ - doc/image/vector/unary_element_wise.png
91
+ - doc/tdr.md
92
+ - doc/tdr_ja.md
93
+ - lib/red-amber.rb
71
94
  - lib/red_amber.rb
72
95
  - lib/red_amber/data_frame.rb
73
- - lib/red_amber/data_frame_output.rb
96
+ - lib/red_amber/data_frame_displayable.rb
97
+ - lib/red_amber/data_frame_indexable.rb
98
+ - lib/red_amber/data_frame_observation_operation.rb
74
99
  - lib/red_amber/data_frame_selectable.rb
100
+ - lib/red_amber/data_frame_variable_operation.rb
101
+ - lib/red_amber/helper.rb
75
102
  - lib/red_amber/vector.rb
76
103
  - lib/red_amber/vector_functions.rb
104
+ - lib/red_amber/vector_selectable.rb
105
+ - lib/red_amber/vector_updatable.rb
77
106
  - lib/red_amber/version.rb
78
107
  - red_amber.gemspec
79
108
  - sig/red_amber.rbs