red_amber 0.2.1 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +15 -0
  3. data/CHANGELOG.md +170 -20
  4. data/Gemfile +4 -2
  5. data/README.md +121 -302
  6. data/benchmark/basic.yml +79 -0
  7. data/benchmark/combine.yml +63 -0
  8. data/benchmark/drop_nil.yml +15 -3
  9. data/benchmark/group.yml +33 -0
  10. data/benchmark/reshape.yml +27 -0
  11. data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
  12. data/benchmark/rover/flights.yml +23 -0
  13. data/benchmark/rover/penguins.yml +23 -0
  14. data/benchmark/rover/planes.yml +23 -0
  15. data/benchmark/rover/weather.yml +23 -0
  16. data/doc/DataFrame.md +611 -318
  17. data/doc/Vector.md +31 -36
  18. data/doc/image/basic_verbs.png +0 -0
  19. data/doc/image/dataframe/assign.png +0 -0
  20. data/doc/image/dataframe/assign_operation.png +0 -0
  21. data/doc/image/dataframe/drop.png +0 -0
  22. data/doc/image/dataframe/join.png +0 -0
  23. data/doc/image/dataframe/pick.png +0 -0
  24. data/doc/image/dataframe/pick_operation.png +0 -0
  25. data/doc/image/dataframe/remove.png +0 -0
  26. data/doc/image/dataframe/rename.png +0 -0
  27. data/doc/image/dataframe/rename_operation.png +0 -0
  28. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  29. data/doc/image/dataframe/set_and_bind.png +0 -0
  30. data/doc/image/dataframe/slice.png +0 -0
  31. data/doc/image/dataframe/slice_operation.png +0 -0
  32. data/doc/image/dataframe_model.png +0 -0
  33. data/doc/image/group_operation.png +0 -0
  34. data/doc/image/replace-if_then.png +0 -0
  35. data/doc/image/reshaping_dataframe.png +0 -0
  36. data/doc/image/screenshot.png +0 -0
  37. data/doc/image/vector/binary_element_wise.png +0 -0
  38. data/doc/image/vector/unary_aggregation.png +0 -0
  39. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  40. data/doc/image/vector/unary_element_wise.png +0 -0
  41. data/lib/red_amber/data_frame.rb +16 -42
  42. data/lib/red_amber/data_frame_combinable.rb +283 -0
  43. data/lib/red_amber/data_frame_displayable.rb +58 -3
  44. data/lib/red_amber/data_frame_loadsave.rb +36 -0
  45. data/lib/red_amber/data_frame_reshaping.rb +8 -6
  46. data/lib/red_amber/data_frame_selectable.rb +9 -9
  47. data/lib/red_amber/data_frame_variable_operation.rb +27 -21
  48. data/lib/red_amber/group.rb +100 -17
  49. data/lib/red_amber/helper.rb +20 -30
  50. data/lib/red_amber/vector.rb +56 -30
  51. data/lib/red_amber/vector_functions.rb +0 -8
  52. data/lib/red_amber/vector_selectable.rb +9 -1
  53. data/lib/red_amber/vector_updatable.rb +61 -63
  54. data/lib/red_amber/version.rb +1 -1
  55. data/lib/red_amber.rb +2 -0
  56. data/red_amber.gemspec +1 -1
  57. metadata +32 -11
  58. data/doc/examples_of_red_amber.ipynb +0 -8979
@@ -25,7 +25,13 @@ module RedAmber
25
25
  end
26
26
 
27
27
  # TODO: support for option {null_selection_behavior: :drop}
28
- def filter(*booleans)
28
+ def filter(*booleans, &block)
29
+ if block
30
+ raise VectorArgumentError, 'Must not specify both arguments and block.' unless booleans.empty?
31
+
32
+ booleans = [yield]
33
+ end
34
+
29
35
  booleans.flatten!
30
36
  return Vector.new([]) if booleans.empty?
31
37
 
@@ -46,6 +52,8 @@ module RedAmber
46
52
 
47
53
  filter_by_array(boolean_array) # returns sub Vector
48
54
  end
55
+ alias_method :select, :filter
56
+ alias_method :find_all, :filter
49
57
 
50
58
  # @param indices
51
59
  # @param booleans
@@ -4,38 +4,47 @@
4
4
  # reference: https://arrow.apache.org/docs/cpp/compute.html
5
5
 
6
6
  module RedAmber
7
- # mix-ins for class Vector
7
+ # mix-in for class Vector
8
8
  # Functions to make up some data (especially missing) for new data.
9
9
  module VectorUpdatable
10
10
  # Replace data
11
- # @param arg [Array, Vector, Arrow::Array] index specifier
12
- # @param replacer [Array, Vector, Arrow::Array] new data to replace for.
13
- # @return [Vector] Replaced new Vector
14
- def replace(args, replacer)
15
- args =
16
- case args
17
- when Array
18
- args
19
- when Range
20
- normalize_element(args)
21
- else
22
- Array(args)
23
- end
24
- replacer = Array(replacer)
25
- return self if args.empty? || args[0].nil?
11
+ # @param specifier [Array, Vector, Arrow::Array] index or booleans.
12
+ # @param replacer [Scalar, Array, Vector, Arrow::Array] new data to replace for.
13
+ # @return [Vector] Replaced new Vector.
14
+ # If specifier has no true, return self.
15
+ #
16
+ def replace(specifier, replacer)
17
+ vector = parse_to_vector(Array(specifier))
18
+ return self if vector.empty? || empty?
26
19
 
27
- replacer = nil if replacer.empty?
28
- vector = parse_to_vector(args)
29
20
  booleans =
30
21
  if vector.boolean?
31
22
  vector
32
23
  elsif vector.numeric?
33
- replacer.sort_by! { |x| args[replacer.index(x)] } if replacer # rubocop:disable Style/SafeNavigation
34
24
  Vector.new(indices).is_in(vector)
35
25
  else
36
- raise VectorArgumentError, "Invalid data type #{args}"
26
+ raise VectorArgumentError, "Invalid data type #{specifier}"
27
+ end
28
+ return self if booleans.sum.zero?
29
+
30
+ replacer_array =
31
+ case replacer
32
+ in []
33
+ return self
34
+ in nil | [nil]
35
+ return replace_to_nil(booleans.data)
36
+ in Arrow::Array
37
+ # nop
38
+ in Vector
39
+ replacer.data
40
+ in Array
41
+ Arrow::Array.new(replacer)
42
+ else # Broadcast scalar to Array
43
+ Arrow::Array.new(Array(replacer) * booleans.to_a.count(true))
37
44
  end
38
- replace_with(booleans, replacer)
45
+ raise VectorArgumentError, 'Replacements size unmatch' if booleans.sum != replacer_array.length
46
+
47
+ replace_with(booleans.data, replacer_array)
39
48
  end
40
49
 
41
50
  # (related functions)
@@ -73,52 +82,41 @@ module RedAmber
73
82
 
74
83
  private
75
84
 
76
- # [Ternary]: replace_with(booleans, replacements) => vector
77
- # Replace items selected with a boolean mask
85
+ # Replace elements selected with a boolean mask
78
86
  #
79
- # (from Arrow C++ inline doc.)
80
- # Given an array and a boolean mask (either scalar or of equal length),
81
- # along with replacement values (either scalar or array),
82
- # each element of the array for which the corresponding mask element is
83
- # true will be replaced by the next value from the replacements,
84
- # or with null if the mask is null.
85
- # Hence, for replacement arrays, len(replacements) == sum(mask == true).
86
-
87
- def replace_with(booleans, replacer = nil)
88
- specifier =
89
- if booleans.is_a?(Arrow::BooleanArray)
90
- booleans
91
- elsif booleans.is_a?(Vector) && booleans.boolean?
92
- booleans.data
93
- elsif booleans.is_a?(Array) && booleans?(booleans)
94
- Arrow::BooleanArray.new(booleans)
95
- else
96
- raise VectorTypeError, 'Not a valid type'
97
- end
98
- raise VectorArgumentError, 'Booleans size unmatch' if specifier.length != size
99
- raise VectorArgumentError, 'Booleans not have any `true`' unless specifier.any?
100
-
101
- r = Array(replacer) # scalar to [scalar]
102
- r = [nil] if r.empty?
103
-
104
- replacer =
105
- if r.size == 1
106
- case replacer
107
- when Arrow::Array then replacer
108
- when Vector then replacer.data
109
- else
110
- Arrow::Array.new(r * specifier.to_a.count(true)) # broadcast
111
- end
112
- else
113
- Arrow::Array.new(r)
114
- end
115
- replacer = data.class.new(replacer) if replacer.uniq == [nil]
87
+ # @param boolean_mask [Arrow::BooleanArray] Boolean mask which indicates the position to be replaced.
88
+ # - Position with true will be replaced.
89
+ # - Position with nil will be nil.
90
+ #
91
+ # @param replacer [Arrow::Array] Values after replaced
92
+ # (either scalar or array). If Array is given, original values are replaced by
93
+ # each element of the array at the corresponding position of mask element.
94
+ # - `replacer.size` must be equal to `mask.count(true)`.
95
+ # - Types of self and replacer must be same
96
+ #
97
+ # @return [Vector] Replaced vector.
98
+ # Type of returned Vector is upcasted if needed.
99
+ #
100
+ def replace_with(boolean_mask, replacer)
101
+ raise VectorArgumentError, 'Booleans size unmatch' if boolean_mask.length != size
102
+ raise VectorArgumentError, 'Booleans not have any `true`' unless boolean_mask.any?
116
103
 
117
- raise VectorArgumentError, 'Replacements size unmatch' if Array(specifier).count(true) != replacer.length
104
+ values = replacer.class.new(data) # Upcast
118
105
 
119
- values = replacer.class.new(data)
106
+ datum = find(:replace_with_mask).execute([values, boolean_mask, replacer])
107
+ Vector.new(datum.value)
108
+ end
120
109
 
121
- datum = find('replace_with_mask').execute([values, specifier, replacer])
110
+ # Replace elements selected with a boolean mask by nil
111
+ #
112
+ # @param boolean_mask [Arrow::BooleanArray] Boolean mask which indicates the position to be replaced.
113
+ # - Position with true will be replaced by nil
114
+ # - Position with nil will remain as nil.
115
+ # @return [Vector] Replaced vector.
116
+ #
117
+ def replace_to_nil(boolean_mask)
118
+ nil_array = data.class.new([nil] * size) # Casted nil Array
119
+ datum = find(:if_else).execute([boolean_mask, nil_array, data])
122
120
  Vector.new(datum.value)
123
121
  end
124
122
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedAmber
4
- VERSION = '0.2.1'
4
+ VERSION = '0.2.3'
5
5
  end
data/lib/red_amber.rb CHANGED
@@ -3,8 +3,10 @@
3
3
  require 'arrow'
4
4
 
5
5
  require_relative 'red_amber/helper'
6
+ require_relative 'red_amber/data_frame_combinable'
6
7
  require_relative 'red_amber/data_frame_displayable'
7
8
  require_relative 'red_amber/data_frame_indexable'
9
+ require_relative 'red_amber/data_frame_loadsave'
8
10
  require_relative 'red_amber/data_frame_reshaping'
9
11
  require_relative 'red_amber/data_frame_selectable'
10
12
  require_relative 'red_amber/data_frame_variable_operation'
data/red_amber.gemspec CHANGED
@@ -30,7 +30,7 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ['lib']
32
32
 
33
- spec.add_dependency 'red-arrow', '>= 9.0.0'
33
+ spec.add_dependency 'red-arrow', '~> 10.0.0'
34
34
 
35
35
  # Development dependency has gone to the Gemfile (rubygems/bundler#7237)
36
36
 
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red_amber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hirokazu SUZUKI (heronshoes)
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-09-07 00:00:00.000000000 Z
11
+ date: 2022-11-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: red-arrow
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 9.0.0
19
+ version: 10.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 9.0.0
26
+ version: 10.0.0
27
27
  description: RedAmber is a simple dataframe library inspired by Rover-df and powered
28
28
  by Red Arrow.
29
29
  email:
@@ -40,21 +40,40 @@ files:
40
40
  - LICENSE
41
41
  - README.md
42
42
  - Rakefile
43
- - benchmark/csv_load_penguins.yml
43
+ - benchmark/basic.yml
44
+ - benchmark/combine.yml
44
45
  - benchmark/drop_nil.yml
46
+ - benchmark/group.yml
47
+ - benchmark/reshape.yml
48
+ - benchmark/rover/csv_load_penguins.yml
49
+ - benchmark/rover/flights.yml
50
+ - benchmark/rover/penguins.yml
51
+ - benchmark/rover/planes.yml
52
+ - benchmark/rover/weather.yml
45
53
  - doc/CODE_OF_CONDUCT.md
46
54
  - doc/DataFrame.md
47
55
  - doc/Vector.md
48
- - doc/examples_of_red_amber.ipynb
49
56
  - doc/image/arrow_table_new.png
57
+ - doc/image/basic_verbs.png
50
58
  - doc/image/dataframe/assign.png
59
+ - doc/image/dataframe/assign_operation.png
51
60
  - doc/image/dataframe/drop.png
61
+ - doc/image/dataframe/join.png
52
62
  - doc/image/dataframe/pick.png
63
+ - doc/image/dataframe/pick_operation.png
53
64
  - doc/image/dataframe/remove.png
54
65
  - doc/image/dataframe/rename.png
66
+ - doc/image/dataframe/rename_operation.png
67
+ - doc/image/dataframe/reshaping_DataFrames.png
68
+ - doc/image/dataframe/set_and_bind.png
55
69
  - doc/image/dataframe/slice.png
70
+ - doc/image/dataframe/slice_operation.png
56
71
  - doc/image/dataframe_model.png
57
72
  - doc/image/example_in_red_arrow.png
73
+ - doc/image/group_operation.png
74
+ - doc/image/replace-if_then.png
75
+ - doc/image/reshaping_dataframe.png
76
+ - doc/image/screenshot.png
58
77
  - doc/image/tdr.png
59
78
  - doc/image/tdr_and_table.png
60
79
  - doc/image/tidy_data_in_TDR.png
@@ -67,8 +86,10 @@ files:
67
86
  - lib/red-amber.rb
68
87
  - lib/red_amber.rb
69
88
  - lib/red_amber/data_frame.rb
89
+ - lib/red_amber/data_frame_combinable.rb
70
90
  - lib/red_amber/data_frame_displayable.rb
71
91
  - lib/red_amber/data_frame_indexable.rb
92
+ - lib/red_amber/data_frame_loadsave.rb
72
93
  - lib/red_amber/data_frame_reshaping.rb
73
94
  - lib/red_amber/data_frame_selectable.rb
74
95
  - lib/red_amber/data_frame_variable_operation.rb
@@ -89,7 +110,7 @@ metadata:
89
110
  source_code_uri: https://github.com/heronshoes/red_amber
90
111
  changelog_uri: https://github.com/heronshoes/red_amber/blob/main/CHANGELOG.md
91
112
  rubygems_mfa_required: 'true'
92
- post_install_message:
113
+ post_install_message:
93
114
  rdoc_options: []
94
115
  require_paths:
95
116
  - lib
@@ -105,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
105
126
  version: '0'
106
127
  requirements: []
107
128
  rubygems_version: 3.3.7
108
- signing_key:
129
+ signing_key:
109
130
  specification_version: 4
110
131
  summary: Simple dataframe library for Ruby
111
132
  test_files: []