red_amber 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +15 -0
  3. data/CHANGELOG.md +170 -20
  4. data/Gemfile +4 -2
  5. data/README.md +121 -302
  6. data/benchmark/basic.yml +79 -0
  7. data/benchmark/combine.yml +63 -0
  8. data/benchmark/drop_nil.yml +15 -3
  9. data/benchmark/group.yml +33 -0
  10. data/benchmark/reshape.yml +27 -0
  11. data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
  12. data/benchmark/rover/flights.yml +23 -0
  13. data/benchmark/rover/penguins.yml +23 -0
  14. data/benchmark/rover/planes.yml +23 -0
  15. data/benchmark/rover/weather.yml +23 -0
  16. data/doc/DataFrame.md +611 -318
  17. data/doc/Vector.md +31 -36
  18. data/doc/image/basic_verbs.png +0 -0
  19. data/doc/image/dataframe/assign.png +0 -0
  20. data/doc/image/dataframe/assign_operation.png +0 -0
  21. data/doc/image/dataframe/drop.png +0 -0
  22. data/doc/image/dataframe/join.png +0 -0
  23. data/doc/image/dataframe/pick.png +0 -0
  24. data/doc/image/dataframe/pick_operation.png +0 -0
  25. data/doc/image/dataframe/remove.png +0 -0
  26. data/doc/image/dataframe/rename.png +0 -0
  27. data/doc/image/dataframe/rename_operation.png +0 -0
  28. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  29. data/doc/image/dataframe/set_and_bind.png +0 -0
  30. data/doc/image/dataframe/slice.png +0 -0
  31. data/doc/image/dataframe/slice_operation.png +0 -0
  32. data/doc/image/dataframe_model.png +0 -0
  33. data/doc/image/group_operation.png +0 -0
  34. data/doc/image/replace-if_then.png +0 -0
  35. data/doc/image/reshaping_dataframe.png +0 -0
  36. data/doc/image/screenshot.png +0 -0
  37. data/doc/image/vector/binary_element_wise.png +0 -0
  38. data/doc/image/vector/unary_aggregation.png +0 -0
  39. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  40. data/doc/image/vector/unary_element_wise.png +0 -0
  41. data/lib/red_amber/data_frame.rb +16 -42
  42. data/lib/red_amber/data_frame_combinable.rb +283 -0
  43. data/lib/red_amber/data_frame_displayable.rb +58 -3
  44. data/lib/red_amber/data_frame_loadsave.rb +36 -0
  45. data/lib/red_amber/data_frame_reshaping.rb +8 -6
  46. data/lib/red_amber/data_frame_selectable.rb +9 -9
  47. data/lib/red_amber/data_frame_variable_operation.rb +27 -21
  48. data/lib/red_amber/group.rb +100 -17
  49. data/lib/red_amber/helper.rb +20 -30
  50. data/lib/red_amber/vector.rb +56 -30
  51. data/lib/red_amber/vector_functions.rb +0 -8
  52. data/lib/red_amber/vector_selectable.rb +9 -1
  53. data/lib/red_amber/vector_updatable.rb +61 -63
  54. data/lib/red_amber/version.rb +1 -1
  55. data/lib/red_amber.rb +2 -0
  56. data/red_amber.gemspec +1 -1
  57. metadata +32 -11
  58. data/doc/examples_of_red_amber.ipynb +0 -8979
@@ -25,7 +25,13 @@ module RedAmber
25
25
  end
26
26
 
27
27
  # TODO: support for option {null_selection_behavior: :drop}
28
- def filter(*booleans)
28
+ def filter(*booleans, &block)
29
+ if block
30
+ raise VectorArgumentError, 'Must not specify both arguments and block.' unless booleans.empty?
31
+
32
+ booleans = [yield]
33
+ end
34
+
29
35
  booleans.flatten!
30
36
  return Vector.new([]) if booleans.empty?
31
37
 
@@ -46,6 +52,8 @@ module RedAmber
46
52
 
47
53
  filter_by_array(boolean_array) # returns sub Vector
48
54
  end
55
+ alias_method :select, :filter
56
+ alias_method :find_all, :filter
49
57
 
50
58
  # @param indices
51
59
  # @param booleans
@@ -4,38 +4,47 @@
4
4
  # reference: https://arrow.apache.org/docs/cpp/compute.html
5
5
 
6
6
  module RedAmber
7
- # mix-ins for class Vector
7
+ # mix-in for class Vector
8
8
  # Functions to make up some data (especially missing) for new data.
9
9
  module VectorUpdatable
10
10
  # Replace data
11
- # @param arg [Array, Vector, Arrow::Array] index specifier
12
- # @param replacer [Array, Vector, Arrow::Array] new data to replace for.
13
- # @return [Vector] Replaced new Vector
14
- def replace(args, replacer)
15
- args =
16
- case args
17
- when Array
18
- args
19
- when Range
20
- normalize_element(args)
21
- else
22
- Array(args)
23
- end
24
- replacer = Array(replacer)
25
- return self if args.empty? || args[0].nil?
11
+ # @param specifier [Array, Vector, Arrow::Array] index or booleans.
12
+ # @param replacer [Scalar, Array, Vector, Arrow::Array] new data to replace for.
13
+ # @return [Vector] Replaced new Vector.
14
+ # If specifier has no true, return self.
15
+ #
16
+ def replace(specifier, replacer)
17
+ vector = parse_to_vector(Array(specifier))
18
+ return self if vector.empty? || empty?
26
19
 
27
- replacer = nil if replacer.empty?
28
- vector = parse_to_vector(args)
29
20
  booleans =
30
21
  if vector.boolean?
31
22
  vector
32
23
  elsif vector.numeric?
33
- replacer.sort_by! { |x| args[replacer.index(x)] } if replacer # rubocop:disable Style/SafeNavigation
34
24
  Vector.new(indices).is_in(vector)
35
25
  else
36
- raise VectorArgumentError, "Invalid data type #{args}"
26
+ raise VectorArgumentError, "Invalid data type #{specifier}"
27
+ end
28
+ return self if booleans.sum.zero?
29
+
30
+ replacer_array =
31
+ case replacer
32
+ in []
33
+ return self
34
+ in nil | [nil]
35
+ return replace_to_nil(booleans.data)
36
+ in Arrow::Array
37
+ # nop
38
+ in Vector
39
+ replacer.data
40
+ in Array
41
+ Arrow::Array.new(replacer)
42
+ else # Broadcast scalar to Array
43
+ Arrow::Array.new(Array(replacer) * booleans.to_a.count(true))
37
44
  end
38
- replace_with(booleans, replacer)
45
+ raise VectorArgumentError, 'Replacements size unmatch' if booleans.sum != replacer_array.length
46
+
47
+ replace_with(booleans.data, replacer_array)
39
48
  end
40
49
 
41
50
  # (related functions)
@@ -73,52 +82,41 @@ module RedAmber
73
82
 
74
83
  private
75
84
 
76
- # [Ternary]: replace_with(booleans, replacements) => vector
77
- # Replace items selected with a boolean mask
85
+ # Replace elements selected with a boolean mask
78
86
  #
79
- # (from Arrow C++ inline doc.)
80
- # Given an array and a boolean mask (either scalar or of equal length),
81
- # along with replacement values (either scalar or array),
82
- # each element of the array for which the corresponding mask element is
83
- # true will be replaced by the next value from the replacements,
84
- # or with null if the mask is null.
85
- # Hence, for replacement arrays, len(replacements) == sum(mask == true).
86
-
87
- def replace_with(booleans, replacer = nil)
88
- specifier =
89
- if booleans.is_a?(Arrow::BooleanArray)
90
- booleans
91
- elsif booleans.is_a?(Vector) && booleans.boolean?
92
- booleans.data
93
- elsif booleans.is_a?(Array) && booleans?(booleans)
94
- Arrow::BooleanArray.new(booleans)
95
- else
96
- raise VectorTypeError, 'Not a valid type'
97
- end
98
- raise VectorArgumentError, 'Booleans size unmatch' if specifier.length != size
99
- raise VectorArgumentError, 'Booleans not have any `true`' unless specifier.any?
100
-
101
- r = Array(replacer) # scalar to [scalar]
102
- r = [nil] if r.empty?
103
-
104
- replacer =
105
- if r.size == 1
106
- case replacer
107
- when Arrow::Array then replacer
108
- when Vector then replacer.data
109
- else
110
- Arrow::Array.new(r * specifier.to_a.count(true)) # broadcast
111
- end
112
- else
113
- Arrow::Array.new(r)
114
- end
115
- replacer = data.class.new(replacer) if replacer.uniq == [nil]
87
+ # @param boolean_mask [Arrow::BooleanArray] Boolean mask which indicates the position to be replaced.
88
+ # - Position with true will be replaced.
89
+ # - Position with nil will be nil.
90
+ #
91
+ # @param replacer [Arrow::Array] Values after replaced
92
+ # (either scalar or array). If Array is given, original values are replaced by
93
+ # each element of the array at the corresponding position of mask element.
94
+ # - `replacer.size` must be equal to `mask.count(true)`.
95
+ # - Types of self and replacer must be same
96
+ #
97
+ # @return [Vector] Replaced vector.
98
+ # Type of returned Vector is upcasted if needed.
99
+ #
100
+ def replace_with(boolean_mask, replacer)
101
+ raise VectorArgumentError, 'Booleans size unmatch' if boolean_mask.length != size
102
+ raise VectorArgumentError, 'Booleans not have any `true`' unless boolean_mask.any?
116
103
 
117
- raise VectorArgumentError, 'Replacements size unmatch' if Array(specifier).count(true) != replacer.length
104
+ values = replacer.class.new(data) # Upcast
118
105
 
119
- values = replacer.class.new(data)
106
+ datum = find(:replace_with_mask).execute([values, boolean_mask, replacer])
107
+ Vector.new(datum.value)
108
+ end
120
109
 
121
- datum = find('replace_with_mask').execute([values, specifier, replacer])
110
+ # Replace elements selected with a boolean mask by nil
111
+ #
112
+ # @param boolean_mask [Arrow::BooleanArray] Boolean mask which indicates the position to be replaced.
113
+ # - Position with true will be replaced by nil
114
+ # - Position with nil will remain as nil.
115
+ # @return [Vector] Replaced vector.
116
+ #
117
+ def replace_to_nil(boolean_mask)
118
+ nil_array = data.class.new([nil] * size) # Casted nil Array
119
+ datum = find(:if_else).execute([boolean_mask, nil_array, data])
122
120
  Vector.new(datum.value)
123
121
  end
124
122
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- VERSION = '0.2.1'
4
+ VERSION = '0.2.3'
5
5
  end
data/lib/red_amber.rb CHANGED
@@ -3,8 +3,10 @@
3
3
  require 'arrow'
4
4
 
5
5
  require_relative 'red_amber/helper'
6
+ require_relative 'red_amber/data_frame_combinable'
6
7
  require_relative 'red_amber/data_frame_displayable'
7
8
  require_relative 'red_amber/data_frame_indexable'
9
+ require_relative 'red_amber/data_frame_loadsave'
8
10
  require_relative 'red_amber/data_frame_reshaping'
9
11
  require_relative 'red_amber/data_frame_selectable'
10
12
  require_relative 'red_amber/data_frame_variable_operation'
data/red_amber.gemspec CHANGED
@@ -30,7 +30,7 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ['lib']
32
32
 
33
- spec.add_dependency 'red-arrow', '>= 9.0.0'
33
+ spec.add_dependency 'red-arrow', '~> 10.0.0'
34
34
 
35
35
  # Development dependency has gone to the Gemfile (rubygems/bundler#7237)
36
36
 
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red_amber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hirokazu SUZUKI (heronshoes)
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-09-07 00:00:00.000000000 Z
11
+ date: 2022-11-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 9.0.0
19
+ version: 10.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 9.0.0
26
+ version: 10.0.0
27
27
  description: RedAmber is a simple dataframe library inspired by Rover-df and powered
28
28
  by Red Arrow.
29
29
  email:
@@ -40,21 +40,40 @@ files:
40
40
  - LICENSE
41
41
  - README.md
42
42
  - Rakefile
43
- - benchmark/csv_load_penguins.yml
43
+ - benchmark/basic.yml
44
+ - benchmark/combine.yml
44
45
  - benchmark/drop_nil.yml
46
+ - benchmark/group.yml
47
+ - benchmark/reshape.yml
48
+ - benchmark/rover/csv_load_penguins.yml
49
+ - benchmark/rover/flights.yml
50
+ - benchmark/rover/penguins.yml
51
+ - benchmark/rover/planes.yml
52
+ - benchmark/rover/weather.yml
45
53
  - doc/CODE_OF_CONDUCT.md
46
54
  - doc/DataFrame.md
47
55
  - doc/Vector.md
48
- - doc/examples_of_red_amber.ipynb
49
56
  - doc/image/arrow_table_new.png
57
+ - doc/image/basic_verbs.png
50
58
  - doc/image/dataframe/assign.png
59
+ - doc/image/dataframe/assign_operation.png
51
60
  - doc/image/dataframe/drop.png
61
+ - doc/image/dataframe/join.png
52
62
  - doc/image/dataframe/pick.png
63
+ - doc/image/dataframe/pick_operation.png
53
64
  - doc/image/dataframe/remove.png
54
65
  - doc/image/dataframe/rename.png
66
+ - doc/image/dataframe/rename_operation.png
67
+ - doc/image/dataframe/reshaping_DataFrames.png
68
+ - doc/image/dataframe/set_and_bind.png
55
69
  - doc/image/dataframe/slice.png
70
+ - doc/image/dataframe/slice_operation.png
56
71
  - doc/image/dataframe_model.png
57
72
  - doc/image/example_in_red_arrow.png
73
+ - doc/image/group_operation.png
74
+ - doc/image/replace-if_then.png
75
+ - doc/image/reshaping_dataframe.png
76
+ - doc/image/screenshot.png
58
77
  - doc/image/tdr.png
59
78
  - doc/image/tdr_and_table.png
60
79
  - doc/image/tidy_data_in_TDR.png
@@ -67,8 +86,10 @@ files:
67
86
  - lib/red-amber.rb
68
87
  - lib/red_amber.rb
69
88
  - lib/red_amber/data_frame.rb
89
+ - lib/red_amber/data_frame_combinable.rb
70
90
  - lib/red_amber/data_frame_displayable.rb
71
91
  - lib/red_amber/data_frame_indexable.rb
92
+ - lib/red_amber/data_frame_loadsave.rb
72
93
  - lib/red_amber/data_frame_reshaping.rb
73
94
  - lib/red_amber/data_frame_selectable.rb
74
95
  - lib/red_amber/data_frame_variable_operation.rb
@@ -89,7 +110,7 @@ metadata:
89
110
  source_code_uri: https://github.com/heronshoes/red_amber
90
111
  changelog_uri: https://github.com/heronshoes/red_amber/blob/main/CHANGELOG.md
91
112
  rubygems_mfa_required: 'true'
92
- post_install_message:
113
+ post_install_message:
93
114
  rdoc_options: []
94
115
  require_paths:
95
116
  - lib
@@ -105,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
105
126
  version: '0'
106
127
  requirements: []
107
128
  rubygems_version: 3.3.7
108
- signing_key:
129
+ signing_key:
109
130
  specification_version: 4
110
131
  summary: Simple dataframe library for Ruby
111
132
  test_files: []