red_amber 0.3.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +56 -22
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +29 -30
  8. data/benchmark/basic.yml +7 -7
  9. data/benchmark/combine.yml +3 -3
  10. data/benchmark/dataframe.yml +15 -9
  11. data/benchmark/group.yml +6 -6
  12. data/benchmark/reshape.yml +6 -6
  13. data/benchmark/vector.yml +6 -3
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +454 -85
  20. data/lib/red_amber/data_frame_combinable.rb +609 -115
  21. data/lib/red_amber/data_frame_displayable.rb +313 -34
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +623 -70
  26. data/lib/red_amber/data_frame_variable_operation.rb +452 -35
  27. data/lib/red_amber/group.rb +186 -22
  28. data/lib/red_amber/helper.rb +74 -14
  29. data/lib/red_amber/refinements.rb +26 -6
  30. data/lib/red_amber/subframes.rb +1101 -0
  31. data/lib/red_amber/vector.rb +362 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +506 -0
  34. data/lib/red_amber/vector_selectable.rb +265 -23
  35. data/lib/red_amber/vector_unary_element_wise.rb +529 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 78fa72064f9494f0f756f15cf1daaacb3640535e899ba71ab080730c0d61b0b2
4
- data.tar.gz: 3f2de4a449c38eb995ebcc0394a1a93633f097e533696edfa91267a596dcb580
3
+ metadata.gz: 264e7637475fd01946900335751a1592a3859e9bfa772ecc0800ab05c4d852f0
4
+ data.tar.gz: a57400445419698a66d6b5c94e15fa8c040f2f3930f9fbf75603ffb6e18bd9cf
5
5
  SHA512:
6
- metadata.gz: 45a7c37cc746c606e8d4d2a43005da8154b60df21bf2cf6b2bafa9f7ad5f962a3e3c8e2f931e6543b20b8f6cd8c8a447b99b7f0127854d3bb716ea763ab3cae5
7
- data.tar.gz: b3ac4479df1e30b75e7ccfcc48b09f709cea536c98072bfe937ae283c0cc1d203ab97388cf6f57c39fd31c6beceadcb850c3f14e8e07e5e196cc0c862634f36d
6
+ metadata.gz: 0fdbcdb732e36bb866a8251800ab3fa1a714fa075234bf8cd516f2542ab6704ebfa429a7177da2bd8cd6fa6eb1158efb0d68f46f43d1dc088a9a0f0debdc5c54
7
+ data.tar.gz: f9c1dffaa157ecf34b0b4fec6c1d7972b4773bbf7a11101a345172d621753cd9fc3818753b329dd2906a506af294d6a96c0180a0fb4dc84c2b54bceef6b520f5
data/.rubocop.yml CHANGED
@@ -31,16 +31,28 @@ Style/TrailingCommaInHashLiteral:
31
31
 
32
32
  # To let you know the possibility of refactoring ===
33
33
 
34
- # avoid unused variable asignment
35
- Rubycw/Rubycw:
34
+ # Max: 120
35
+ # This cop supports safe autocorrection (--autocorrect).
36
+ # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
37
+ # URISchemes: http, https
38
+ Layout/LineLength:
39
+ Max: 90
36
40
  Exclude:
37
41
  - 'test/**/*'
38
42
 
43
+ # EnforcedStyle: aligned
44
+ Layout/MultilineMethodCallIndentation:
45
+ EnforcedStyle: indented_relative_to_receiver
46
+
39
47
  # Disabled to define Vector operators
40
48
  # Offense count: 38
41
49
  Lint/BinaryOperatorWithIdenticalOperands:
42
50
  Exclude:
43
- - 'test/test_vector_function.rb'
51
+ - 'test/test_vector_binary_element_wise.rb'
52
+
53
+ Lint/Debugger:
54
+ Exclude:
55
+ - 'bin/example'
44
56
 
45
57
  # Need for test with empty block
46
58
  # Offense count: 1
@@ -55,15 +67,6 @@ Lint/UselessAssignment:
55
67
  Exclude:
56
68
  - 'test/**/*'
57
69
 
58
- # Max: 120
59
- # This cop supports safe autocorrection (--autocorrect).
60
- # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
61
- # URISchemes: http, https
62
- Layout/LineLength:
63
- Max: 90
64
- Exclude:
65
- - 'test/**/*'
66
-
67
70
  # <= 17 satisfactory
68
71
  # 18..30 unsatisfactory
69
72
  # > 30 dangerous
@@ -83,6 +86,7 @@ Metrics/AbcSize:
83
86
  'drop', # 31.42
84
87
  '[]', # 33.76
85
88
  'split', # 37.35
89
+ 'aggregate', # 38.13
86
90
  ]
87
91
 
88
92
  # Max: 25
@@ -104,7 +108,9 @@ Metrics/ClassLength:
104
108
  - 'test/**/*'
105
109
  - 'lib/red_amber/data_frame.rb' # 162
106
110
  - 'lib/red_amber/group.rb' # 105
111
+ - 'lib/red_amber/subframes.rb' # 110
107
112
  - 'lib/red_amber/vector.rb' # 152
113
+ - 'lib/red_amber/vector_binary_element_wise.rb' # 109
108
114
 
109
115
  # Only for monitoring. I will measure by PerceivedComplexity.
110
116
  # Max: 7
@@ -113,14 +119,17 @@ Metrics/ClassLength:
113
119
  Metrics/CyclomaticComplexity:
114
120
  Max: 12
115
121
  AllowedMethods: [
116
- 'join', # 14
122
+ 'split', # 33
117
123
  'format_table', # 21
124
+ 'normalize_element', # 17
118
125
  'slice_by', # 16
126
+ 'assign_update', # 14
127
+ 'join', # 14
128
+ 'parse_range', # 14
119
129
  'remove', # 14
120
- 'normalize_element', # 17
121
130
  '[]', # 13
122
- 'parse_range', # 14
123
- 'split', # 33
131
+ 'drop', # 13
132
+ 'aggregate', # 13
124
133
  ]
125
134
 
126
135
  # Max: 10
@@ -134,6 +143,8 @@ Metrics/MethodLength:
134
143
  'format_table', # 53
135
144
  'slice_by', # 38
136
145
  'assign_update', # 35
146
+ 'drop', # 32
147
+ 'aggregate', # 31
137
148
  ]
138
149
 
139
150
  # Max: 100
@@ -163,17 +174,24 @@ Metrics/ParameterLists:
163
174
  Metrics/PerceivedComplexity:
164
175
  Max: 10
165
176
  AllowedMethods: [
166
- 'join', # 14
167
- 'dataframe_info', # 13
168
177
  'format_table', # 22
169
178
  'slice_by', # 20
170
- 'remove', # 14
171
- 'drop', # 12
172
- 'filters', # 11
173
179
  'normalize_element', # 17
174
- '[]', # 11
180
+ 'assign_update', # 15
175
181
  'parse_range', # 15
182
+ 'join', # 14
183
+ 'remove', # 14
176
184
  'split', # 14
185
+ 'dataframe_info', # 13
186
+ 'replace', # 13
187
+ 'drop', # 12
188
+ 'initialize', # 12
189
+ 'aggregate', # 12
190
+ '[]', # 11
191
+ 'filters', # 11
192
+ 'html_table', # 11
193
+ 'slice', # 11
194
+ 'pick', # 11
177
195
  ]
178
196
 
179
197
  # Offense count: 1
@@ -197,6 +215,18 @@ Naming/PredicateName:
197
215
  - 'lib/red_amber/vector_functions.rb'
198
216
  - 'lib/red_amber/vector_selectable.rb'
199
217
 
218
+ # avoid unused variable asignment
219
+ Rubycw/Rubycw:
220
+ Exclude:
221
+ - 'test/**/*'
222
+ - 'bin/example'
223
+
224
+ # Offense count: 16
225
+ # This cop supports safe autocorrection (--autocorrect).
226
+ Style/OperatorMethodCall:
227
+ Exclude:
228
+ - 'test/test_vector_binary_element_wise.rb'
229
+
200
230
  # Necessary to test when range.end == -1
201
231
  # Offense count: 2
202
232
  # This cop supports unsafe autocorrection (--autocorrect-all).
@@ -204,6 +234,10 @@ Style/SlicingWithRange:
204
234
  Exclude:
205
235
  - 'test/test_data_frame_selectable.rb'
206
236
 
237
+ Style/MixinUsage:
238
+ Exclude:
239
+ - 'bin/example'
240
+
207
241
  # Necessary to Vector < 0 element-wise comparison
208
242
  # Offense count: 5
209
243
  # This cop supports unsafe autocorrection (--autocorrect-all).
data/.yardopts CHANGED
@@ -1 +1,3 @@
1
1
  --output-dir doc/yard
2
+ --template-path doc/yard-templates
3
+ --use-cache
data/CHANGELOG.md CHANGED
@@ -1,3 +1,181 @@
1
+ ## [0.4.1] - 2023-03-11
2
+
3
+ - Breaking change
4
+ - Remove Vector.aggregate? method (#200)
5
+
6
+ - Bug fixes
7
+ - Return self in DataFrame#drop when dropper is empty (reverts 746ac263) (#193)
8
+ - Return self in DataFrame#rename when renaming to same name (#193)
9
+ - Return self in DataFrame#pick when pick itself (#199)
10
+ - Fix column width for non-ascii elemnts in DataFrame#to_s (#193)
11
+ - This change uses String#width.
12
+ - Fix DataFrame#to_iruby when data is date32 type (#193)
13
+ - Fix DataFrame#shorthand to show temporal type data simply (#193)
14
+ - Fix Vector#rank when data is ChunkedArray (#198)
15
+ - Fix Vector element-wise functions with nil as scalar (#198)
16
+ - Support :force_order for all methods of join family (#199)
17
+ - Supports :force_order option to force sorting after join for all #join familiy.
18
+ - This will valuable in some cases such as large dataframes.
19
+ - Ensure baseframe's schema for SubFrames (#200)
20
+
21
+ - New features and improvements
22
+ - Add Vector#first, #last method (#198)
23
+ - This method will be used in SubFrames feature.
24
+ - Add Vector#modulo method (#198)
25
+ - The divmod function in Arrow C++ is still in draft state.
26
+ This method was created by combining existing functions
27
+ - Add Vector#quotient method (#198)
28
+ - Add aliases #div, #mod, #mul, #pow, #quo and #sub for Vector (#198)
29
+ - Add Vector#*_checked functions (#198)
30
+ - This functions will check numeric range overflow.
31
+ - Add 'tdra' and 'plain' in display mode (#193)
32
+ - The plain mode and default inspect will show up to 128 rows and 128 columns.
33
+ - Add String#width method in refinements (#193)
34
+ - This will be used to update DataFrame#to_s.
35
+ - Introduce pre-loaded REPL environment (#199)
36
+ - This commit will add bin/example and it will start irb environment
37
+ with enabled commonly used datasets such as penguins, diamonds, etc.
38
+ - Upgrade SubFrames#aggregate to accept block (#200)
39
+
40
+ - Refactoring
41
+ - Use symbolized keys in refinements of Table#keys, #key? (#193)
42
+ - This can be treat Tables and DataFrames as same manner.
43
+ - Use key_name.succ in suffix of DataFrame#join (#193)
44
+ - This will make simple to get name candidate.
45
+ - Use ||= to memorize instance variables (#193)
46
+ - Refine vector projection to use #variables (#193)
47
+ - #variables is fastest when picking Vectors.
48
+ - Refine Vector#is_in to avoid #pack (#198)
49
+ - Refine Vector#index (#198)
50
+
51
+ - Improve in tests/CI
52
+ - Tests
53
+ - Update benchmarks to test from older version (#193)
54
+ - Refine test of Vector function with scalar (#198)
55
+ - Refine test subframes and test_vector_selectable (#200)
56
+
57
+ - Cops
58
+ - CI
59
+
60
+ - Documentation
61
+ - Update documents(small fix) (#201)
62
+
63
+ - GitHub site
64
+
65
+ - Thanks
66
+
67
+ ## [0.4.0] - 2023-02-25
68
+
69
+ - Breaking change
70
+ - Upgrade dependency to Arrow 11.0.0 (#188)
71
+
72
+ - Bug fixes
73
+ - Add :force_order option for DataFrame#join (#174)
74
+ - Return error for empty DataFrame in DataFrame#filter (#172)
75
+ - Accept ChunkedArray in DataFrame#filter (#172)
76
+ - Fix Vector#replace to accept Arrow::Array as a replacer (#179)
77
+ - Fix Vector#round_to_multiple to accept Float or Integer (#180)
78
+ - Change Vector atan2 to a class method (#180)
79
+ - Fix Vector#shift when boolean Vector (#184)
80
+ - Fix processing empty SubFrames (#183)
81
+ - Do not check object id in DataFrame#rename, #drop for self (#188)
82
+
83
+ - New features and improvements
84
+ - Accept a block in DataFrame#filter (#172)
85
+ - Add Vector.aggregate? method (#175)
86
+ - Introduce Vector#propagate method (#175)
87
+ - Add Vector#rank methods (#176)
88
+ - Add Vector#sample method (#176)
89
+ - Add Vector#sort method (#176)
90
+ - Promote DataFrame#shape_str to public (#184)
91
+ - Introduce Vector#concatenate (#184)
92
+ - Add #numeric? in refinements of Array (#184)
93
+ - Add Vector#cumulative_sum_checked and #cumsum (#184)
94
+ - Add Vector#resolve method (#184)
95
+ - Add DataFrame#tdra method (#184)
96
+ - Add #expand as an alias for Vector#propagate (#184)
97
+ - Add #glimpse as an alias for DataFrame#tdr (#184)
98
+ - New class SubFrames (#183)
99
+ - Introduce class SubFrames
100
+ - Memorize dataframes in SubFrames
101
+ - Add @frames to memorize sub DataFrames
102
+ - Accept filters in SubFrames.new
103
+ - Accept block in SubFrames.new
104
+ - Add SubFrames.by_filter
105
+ - Introduce methods creating SubFrames from DataFrame
106
+ - Introduce SubFrames#each method
107
+ - Add SubFrames#to_s method
108
+ - Add SubFrames#concatenate method
109
+ - Add SubFrames#offset_indices method
110
+ - SubFrames#aggregate method
111
+ - Redefine SubFrames#map to return SubFrames
112
+ - Define SubFrame#map dynamically
113
+ - Add SubFrames#assign method
114
+ - Redefine SubFrames#select to return SubFrames
115
+ - Add SubFrames#reject method
116
+ - Add SubFrames#filter_map method
117
+ - Refine DataFrame#indices memorizing @indices
118
+ - Rename SubFrames#universal_frame as #baseframe
119
+ - Set Group iteration feature to @api private
120
+
121
+ - Refactoring
122
+ - Generate Vector functions in class method (#177)
123
+ - Set Constant visibility to private (#179)
124
+ - Separate test_vector_function (#179)
125
+ - Relocate methods in DataFrameIndexable (#179)
126
+ - Rename Array refinements to the same name as Vector (#184)
127
+
128
+ - Improve in tests/CI
129
+ - Tests
130
+ - Update benchmarks to set 0.3.0 as a reference (#167)
131
+ - Move test of Vector#logb to proper location (#180)
132
+
133
+ - Cops
134
+ - Update .rubocop.yml to align with latest cops (#174)
135
+ - Unify style of MethodCallIndentation as relative to reciever (#184)
136
+
137
+ - CI
138
+ - Fix setting up Arrow by homebrew in CI (#167)
139
+ - Fix CI error on homebrew deleting python link (#167)
140
+ - Set cache-version to get new C extensions in CI (#173)
141
+ - Thanks to @kou for suggestion.
142
+
143
+ - Documentation
144
+ - Update DataFrame.md about loading csv without headers (#165)
145
+ - Thanks to kojix2
146
+ - Update YARD in DataFrame combinable (#168)
147
+ - Update comment for Ruby 2.7 support in README.md
148
+ - Update license year
149
+ - Update README (#172)
150
+ - Update Vector.md and yardoc in #propagate (#175)
151
+ - Use customized style sheet for YARD (#179)
152
+ - Add examples for the doc of #pick and #drop (#179)
153
+ - Add examples to YARD in DataFrame reshaping methods (#179)
154
+ - Update documents in DataFrameDisplayable (#179)
155
+ - Update documents in DataFrameVariableOperation (#179)
156
+ - Update document for dynamically generated methods (#179)
157
+ - Unify style in document (#179)
158
+ - Update documents in DataFrameSelectable (#179)
159
+ - Update documents of basic Vector methods (#179)
160
+ - Update document in VectorUpdatable (#179)
161
+ - Update document of Group (#179)
162
+ - Update document of DataFrameLoadSave (#180)
163
+ - Add examples for document of ArrowFunction (#180)
164
+ - Update document of Vector_unary_aggregation (#180)
165
+ - Update document of Vector_unary_element_wise (#180)
166
+ - Update document of Vector_biary_element_wise (#180)
167
+ - Add documentation to give comparison of dataframes(#169)
168
+ - Thanks to Benson Muite
169
+ - Update documents for consistency of method indentation (#189)
170
+ - Update CHANGELOG (#189)
171
+ - Update README for 0.4.0 (#189)
172
+
173
+ - GitHub site
174
+
175
+ - Thanks
176
+ - kojix2
177
+ - Benson Muite
178
+
1
179
  ## [0.3.0] - 2022-12-18
2
180
 
3
181
  - Breaking change
data/Gemfile CHANGED
@@ -7,7 +7,7 @@ gemspec
7
7
  group :test do
8
8
  gem 'rake'
9
9
 
10
- gem 'red-parquet', '~> 10.0.0'
10
+ gem 'red-parquet', '~> 11.0.0'
11
11
  gem 'rover-df', '~> 0.3.0'
12
12
 
13
13
  gem 'rubocop'
data/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2022 Hirokazu SUZUKI (heronshoes)
3
+ Copyright (c) 2022-2023 Hirokazu SUZUKI (heronshoes)
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # RedAmber
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/red_amber.svg)](https://badge.fury.io/rb/red_amber)
3
+ [![Gem Version](https://img.shields.io/gem/v/red_amber?color=brightgreen)](https://rubygems.org/gems/red_amber)
4
4
  [![CI](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml/badge.svg)](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml)
5
5
  [![Maintainability](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/maintainability)](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
6
6
  [![Test coverage](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/test_coverage)](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
@@ -10,34 +10,32 @@
10
10
  A simple dataframe library for Ruby.
11
11
 
12
12
  - Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
13
- [![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en)
13
+ [![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en) [![Gem Version](https://img.shields.io/gem/v/red-arrow?color=brightgreen)](https://rubygems.org/gems/red-arrow)
14
14
  - Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
15
15
 
16
16
  ![screenshot from jupyterlab](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/screenshot.png)
17
17
 
18
18
  ## Requirements
19
-
19
+ ### Ruby
20
20
  Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
21
+ - I decided to remove support for Ruby 2.7 without waiting for its EOL. See [Release note for v0.3.0](https://github.com/heronshoes/red_amber/discussions/162) for details.
21
22
 
22
- - I decided to remove Ruby 2.7 without waiting for EOL because it cannot solve the problem of simultaneous use of Hash and keyword arguments when implementing DataFrame#join.
23
-
23
+ ### Libraries
24
24
  ```ruby
25
- # Libraries required
26
- gem 'red-arrow', '~> 10.0.0' # Requires Apache Arrow (see installation below)
27
-
28
- gem 'red-parquet', '~> 10.0.0' # Optional, if you use IO from/to parquet
25
+ gem 'red-arrow', '~> 11.0.0' # Requires Apache Arrow (see installation below)
26
+ gem 'red-parquet', '~> 11.0.0' # Optional, if you use IO from/to parquet
29
27
  gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
30
28
  ```
31
29
 
32
30
  ## Installation
33
31
 
34
- Install requirements before you install Red Amber.
32
+ Install requirements before you install RedAmber.
35
33
 
36
- - Apache Arrow (~> 10.0.0)
37
- - Apache Arrow GLib (~> 10.0.0)
38
- - Apache Parquet GLib (~> 10.0.0) # If you use IO from/to parquet
34
+ - Apache Arrow (~> 11.0.0)
35
+ - Apache Arrow GLib (~> 11.0.0)
36
+ - Apache Parquet GLib (~> 11.0.0) # If you use IO from/to parquet
39
37
 
40
- See [Apache Arrow install document](https://arrow.apache.org/install/).
38
+ See [Apache Arrow install document](https://arrow.apache.org/install/).
41
39
 
42
40
  - Minimum installation example for the latest Ubuntu:
43
41
 
@@ -58,43 +56,44 @@ Install requirements before you install Red Amber.
58
56
  sudo dnf -y install gcc-c++ libarrow-devel libarrow-glib-devel ruby-devel
59
57
  ```
60
58
 
61
- - On macOS, you can install Apache Arrow C++ library using Homebrew:
59
+ - On macOS, using Homebrew:
62
60
 
63
61
  ```
64
62
  brew install apache-arrow
65
- ```
66
-
67
- and GLib (C) package with:
68
-
69
- ```
70
63
  brew install apache-arrow-glib
71
64
  ```
72
65
 
73
66
  If you prepared Apache Arrow, add these lines to your Gemfile:
74
67
 
75
68
  ```ruby
76
- gem 'red-arrow', '~> 10.0.0'
69
+ gem 'red-arrow', '~> 11.0.0'
77
70
  gem 'red_amber'
78
- gem 'red-parquet', '~> 10.0.0' # Optional, if you use IO from/to parquet
71
+ gem 'red-parquet', '~> 11.0.0' # Optional, if you use IO from/to parquet
79
72
  gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
80
73
  gem 'red-datasets-arrow' # Optional, recommended if you use Red Datasets
81
74
  gem 'red-arrow-numo-narray' # Optional, recommended if you use inputs from Numo::NArray
82
75
  ```
83
76
 
84
- And then execute `bundle install` or install it yourself as `gem install red_amber`.
77
+ And then execute `bundle install` or install them yourself such as `gem install red_amber`.
85
78
 
86
79
  ## Docker image and Jupyter Notebook
87
80
 
88
- [RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to @mrkn).
81
+ [RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to Kenta Murata).
89
82
 
90
83
  Also you can try the contents of this README interactively by [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb).
91
84
  [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb)
92
85
 
86
+ ## Comparison of DataFrames
87
+
88
+ Comparison of basic features of RedAmber with Python
89
+ [pandas](https://pandas.pydata.org/),
90
+ R [Tidyverse](https://www.tidyverse.org/) and
91
+ Julia [Dataframes](https://dataframes.juliadata.org/stable/) is in [DataFrame_Comparison.md](doc/DataFrame_Comparison.md) (Thanks to Benson Muite).
93
92
 
94
93
  ## Data frame in `RedAmber`
95
94
 
96
95
  Class `RedAmber::DataFrame` represents a set of data in 2D-shape.
97
- The entity is a Red Arrow's Table object.
96
+ Its entity is a Red Arrow's Table object.
98
97
 
99
98
  ![dataframe model of RedAmber](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/dataframe_model.png)
100
99
 
@@ -116,7 +115,7 @@ then
116
115
  require 'datasets-arrow' # to load sample data
117
116
 
118
117
  dataset = Datasets::Diamonds.new
119
- diamonds = DataFrame.new(dataset) # from v0.2.2, should be `dataset.to_arrow` if older.
118
+ diamonds = DataFrame.new(dataset) # before v0.2.3, should be `dataset.to_arrow`
120
119
 
121
120
  # =>
122
121
  #<RedAmber::DataFrame : 53940 x 10 Vectors, 0x000000000000f668>
@@ -137,7 +136,7 @@ For example, we can compute mean prices per cut for the data larger than 1 carat
137
136
 
138
137
  ```ruby
139
138
  df = diamonds
140
- .slice { carat > 1 }
139
+ .slice { carat > 1 } # or use #filter instead of #slice
141
140
  .group(:cut)
142
141
  .mean(:price) # `pick` prior to `group` is not required if `:price` is specified here.
143
142
  .sort('-mean(price)')
@@ -175,7 +174,7 @@ df.rename('mean(price)': :mean_price_USD)
175
174
 
176
175
  ### Example: starwars dataset
177
176
 
178
- Next example is `starwars` dataset reading from the downloaded CSV file. Followed by minimum data cleansing.
177
+ Next example is `starwars` dataset reading from the downloaded CSV file. Followed by minimum data cleaning.
179
178
 
180
179
  ```ruby
181
180
  uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')
@@ -186,7 +185,7 @@ starwars
186
185
  .drop(0) # delete unnecessary index column
187
186
  .remove { species == "NA" } # delete unnecessary rows
188
187
  .group(:species) { [count(:species), mean(:height, :mass)] }
189
- .slice { count > 1 }
188
+ .slice { count > 1 } # or use #filter instead of slice
190
189
 
191
190
  # =>
192
191
  #<RedAmber::DataFrame : 8 x 4 Vectors, 0x000000000000f848>
@@ -213,7 +212,7 @@ See [Vector.md](doc/Vector.md) for details.
213
212
 
214
213
  ## Jupyter notebook
215
214
 
216
- [89 Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
215
+ [Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
217
216
  ([raw file](https://raw.githubusercontent.com/heronshoes/docker-stacks/RedAmber-binder/binder/examples_of_red_amber.ipynb)) shows more examples in jupyter notebook.
218
217
 
219
218
  You can try this notebook on [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=examples_of_red_amber.ipynb).
data/benchmark/basic.yml CHANGED
@@ -1,18 +1,18 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.2.3
4
+ - name: 0.1.5
8
5
  gems:
9
- red_amber: 0.2.3
6
+ red_amber: 0.1.5
10
7
  - name: 0.2.0
11
8
  gems:
12
9
  red_amber: 0.2.0
13
- - name: 0.1.5
10
+ - name: 0.3.0
14
11
  gems:
15
- red_amber: 0.1.5
12
+ red_amber: 0.3.0
13
+ - name: HEAD
14
+ prelude: |
15
+ $LOAD_PATH.unshift(File.expand_path('lib'))
16
16
 
17
17
  prelude: |
18
18
  require 'red_amber'
@@ -1,12 +1,12 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
+ - name: 0.3.0
5
+ gems:
6
+ red_amber: 0.3.0
4
7
  - name: HEAD
5
8
  prelude: |
6
9
  $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.2.3
8
- gems:
9
- red_amber: 0.2.3
10
10
 
11
11
  prelude: |
12
12
  require 'red_amber'
@@ -1,15 +1,15 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.2.3
8
- gems:
9
- red_amber: 0.2.3
10
4
  - name: 0.2.0
11
5
  gems:
12
6
  red_amber: 0.2.0
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
10
+ - name: HEAD
11
+ prelude: |
12
+ $LOAD_PATH.unshift(File.expand_path('lib'))
13
13
 
14
14
  prelude: |
15
15
  require 'red_amber'
@@ -19,8 +19,14 @@ prelude: |
19
19
 
20
20
  starwars = RedAmber::DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars').to_arrow)
21
21
 
22
- uri = URI("https://raw.githubusercontent.com/heronshoes/red_amber/master/test/entity/import_cars.tsv")
23
- import_cars = RedAmber::DataFrame.load(uri)
22
+ import_cars = RedAmber::DataFrame.load(Arrow::Buffer.new(<<~TSV), format: :tsv)
23
+ Year Audi BMW BMW_MINI Mercedes-Benz VW
24
+ 2017 28336 52527 25427 68221 49040
25
+ 2018 26473 50982 25984 67554 51961
26
+ 2019 24222 46814 23813 66553 46794
27
+ 2020 22304 35712 20196 57041 36576
28
+ 2021 22535 35905 18211 51722 35215
29
+ TSV
24
30
 
25
31
  ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
26
32
  simpsons_paradox_covid = RedAmber::DataFrame.new(ds.to_arrow)
@@ -43,7 +49,7 @@ benchmark:
43
49
  .group(:species) { [count(:species), mean(:height, :mass)] }
44
50
  .slice { v(:count) > 1 }
45
51
 
46
- 'D03: Inport cars test': |
52
+ 'D03: Import cars test': |
47
53
  import_cars
48
54
  .to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
49
55
  .to_wide(name: :Manufacturer, value: :Num_of_imported)
data/benchmark/group.yml CHANGED
@@ -1,15 +1,15 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.2.3
8
- gems:
9
- red_amber: 0.2.3
10
4
  - name: 0.2.2
11
5
  gems:
12
6
  red_amber: 0.2.2
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
10
+ - name: HEAD
11
+ prelude: |
12
+ $LOAD_PATH.unshift(File.expand_path('lib'))
13
13
 
14
14
  prelude: |
15
15
  require 'red_amber'
@@ -1,15 +1,15 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.2.3
8
- gems:
9
- red_amber: 0.2.3
10
4
  - name: 0.2.2
11
5
  gems:
12
6
  red_amber: 0.2.2
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
10
+ - name: HEAD
11
+ prelude: |
12
+ $LOAD_PATH.unshift(File.expand_path('lib'))
13
13
 
14
14
  prelude: |
15
15
  require 'red_amber'
data/benchmark/vector.yml CHANGED
@@ -1,12 +1,15 @@
1
1
  loop_count: 10
2
2
 
3
3
  contexts:
4
- - name: HEAD
5
- prelude: |
6
- $LOAD_PATH.unshift(File.expand_path('lib'))
7
4
  - name: 0.2.0
8
5
  gems:
9
6
  red_amber: 0.2.0
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
10
+ - name: HEAD
11
+ prelude: |
12
+ $LOAD_PATH.unshift(File.expand_path('lib'))
10
13
 
11
14
  prelude: |
12
15
  require 'red_amber'