red_amber 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6f70451aad21c0750fb2a0bfe165baf5862ac3837541135cf9d58df4ecd732ac
4
- data.tar.gz: c31f143278bf3792bc03e0e727e12df72fca5a001b3d6c098c3f028df456e2f0
3
+ metadata.gz: 78fa72064f9494f0f756f15cf1daaacb3640535e899ba71ab080730c0d61b0b2
4
+ data.tar.gz: 3f2de4a449c38eb995ebcc0394a1a93633f097e533696edfa91267a596dcb580
5
5
  SHA512:
6
- metadata.gz: c5026422e8f0c0b1b1b25f6baa97b540287937a3c0d224a16ce13c17c16a11ccd54682682f17fdf5e176190f7db40bfda7bbe5659e89a212eed8f1bf9b3567e9
7
- data.tar.gz: 82b500a1570b8fc8925a7c988bd7d3f3db677588537ee9f8d75f965ca98f5d730a0a55be5fa6637e3beeb4aed11b55a9afd3e4dfd52aee687e123f390f0d8d2b
6
+ metadata.gz: 45a7c37cc746c606e8d4d2a43005da8154b60df21bf2cf6b2bafa9f7ad5f962a3e3c8e2f931e6543b20b8f6cd8c8a447b99b7f0127854d3bb716ea763ab3cae5
7
+ data.tar.gz: b3ac4479df1e30b75e7ccfcc48b09f709cea536c98072bfe937ae283c0cc1d203ab97388cf6f57c39fd31c6beceadcb850c3f14e8e07e5e196cc0c862634f36d
data/.rubocop.yml CHANGED
@@ -8,12 +8,13 @@ require:
8
8
  - rubocop-rake
9
9
 
10
10
  AllCops:
11
- # drop support for < 2.7
12
- TargetRubyVersion: 2.7
11
+ # drop support for 2.7 (since 0.3.0)
12
+ TargetRubyVersion: 3.0
13
13
  # accept new cops if any
14
14
  NewCops: enable
15
15
 
16
16
  # ===
17
+ # Globally I change these 3 cops from default
17
18
 
18
19
  # alias is hard to see separately
19
20
  Style/Alias:
@@ -29,123 +30,185 @@ Style/TrailingCommaInHashLiteral:
29
30
  # ===
30
31
 
31
32
  # To let you know the possibility of refactoring ===
32
- #
33
+
33
34
  # avoid unused variable asignment
34
35
  Rubycw/Rubycw:
35
36
  Exclude:
36
37
  - 'test/**/*'
37
- Lint/UselessAssignment:
38
- Exclude:
39
- - 'test/**/*'
40
38
 
41
39
  # Disabled to define Vector operators
40
+ # Offense count: 38
42
41
  Lint/BinaryOperatorWithIdenticalOperands:
43
42
  Exclude:
44
43
  - 'test/test_vector_function.rb'
45
44
 
46
45
  # Need for test with empty block
46
+ # Offense count: 1
47
+ # Configuration parameters: AllowComments, AllowEmptyLambdas.
47
48
  Lint/EmptyBlock:
48
49
  Exclude:
49
50
  - 'test/test_group.rb'
50
51
 
52
+ # avoid unused variable asignment
53
+ # Offense count: 6
54
+ Lint/UselessAssignment:
55
+ Exclude:
56
+ - 'test/**/*'
57
+
51
58
  # Max: 120
59
+ # This cop supports safe autocorrection (--autocorrect).
60
+ # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
61
+ # URISchemes: http, https
52
62
  Layout/LineLength:
53
- Max: 118
63
+ Max: 90
54
64
  Exclude:
55
65
  - 'test/**/*'
56
66
 
57
67
  # <= 17 satisfactory
58
68
  # 18..30 unsatisfactory
59
69
  # > 30 dangerous
70
+ # Offense count: 28
71
+ # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods, CountRepeatedAttributes.
60
72
  Metrics/AbcSize:
61
73
  Max: 30
62
- Exclude:
63
- - 'lib/red_amber/data_frame_combinable.rb' # Max: 43
64
- - 'lib/red_amber/data_frame_displayable.rb' # Max: 55
65
- - 'lib/red_amber/data_frame_reshaping.rb' # Max 40.91
66
- - 'lib/red_amber/data_frame_selectable.rb' # Max: 51
67
- - 'lib/red_amber/data_frame_variable_operation.rb' # Max: 30.15
68
- - 'lib/red_amber/vector_updatable.rb' # Max: 36
69
- - 'lib/red_amber/vector_selectable.rb' # Max: 33
74
+ CountRepeatedAttributes: false
75
+ AllowedMethods: [
76
+ 'join', # 51.87
77
+ 'dataframe_info', # 46.5
78
+ 'format_table', # 84.62
79
+ 'to_long', # 33.66
80
+ 'to_wide', #38.22
81
+ 'slice_by', # 38.29
82
+ 'remove', # 44.42
83
+ 'drop', # 31.42
84
+ '[]', # 33.76
85
+ 'split', # 37.35
86
+ ]
70
87
 
71
88
  # Max: 25
89
+ # Offense count: 57
90
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods.
91
+ # AllowedMethods: refine
72
92
  Metrics/BlockLength:
73
93
  Max: 25
74
94
  Exclude:
75
95
  - 'test/**/*'
96
+ - 'lib/red_amber/helper.rb' # 32
76
97
 
98
+ # It's ok if class is big
77
99
  # Max: 100
100
+ # Offense count: 15
101
+ # Configuration parameters: CountComments, CountAsOne.
78
102
  Metrics/ClassLength:
79
- Max: 100
80
103
  Exclude:
81
104
  - 'test/**/*'
82
- - 'lib/red_amber/data_frame.rb' #Max: 131
83
- - 'lib/red_amber/vector.rb' #Max: 102
84
- - 'lib/red_amber/group.rb' #Max: 103
105
+ - 'lib/red_amber/data_frame.rb' # 162
106
+ - 'lib/red_amber/group.rb' # 105
107
+ - 'lib/red_amber/vector.rb' # 152
85
108
 
109
+ # Only for monitoring. I will measure by PerceivedComplexity.
86
110
  # Max: 7
111
+ # Offense count: 16
112
+ # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
87
113
  Metrics/CyclomaticComplexity:
88
114
  Max: 12
89
- Exclude:
90
- - 'lib/red_amber/data_frame_combinable.rb' # Max: 15
91
- - 'lib/red_amber/data_frame_displayable.rb' # Max: 18
92
- - 'lib/red_amber/data_frame_selectable.rb' # Max: 14
93
- - 'lib/red_amber/helper.rb' # Max: 15
94
- - 'lib/red_amber/vector_selectable.rb' # Max: 13
95
- - 'lib/red_amber/vector_updatable.rb' # Max: 14
115
+ AllowedMethods: [
116
+ 'join', # 14
117
+ 'format_table', # 21
118
+ 'slice_by', # 16
119
+ 'remove', # 14
120
+ 'normalize_element', # 17
121
+ '[]', # 13
122
+ 'parse_range', # 14
123
+ 'split', # 33
124
+ ]
96
125
 
97
126
  # Max: 10
127
+ # Offense count: 34
128
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods.
98
129
  Metrics/MethodLength:
99
130
  Max: 30
100
- Exclude:
101
- - 'lib/red_amber/data_frame_combinable.rb' # Max: 38
102
- - 'lib/red_amber/data_frame_displayable.rb' # Max: 33
103
- - 'lib/red_amber/data_frame_selectable.rb' # Max: 38
104
- - 'lib/red_amber/data_frame_variable_operation.rb' # Max: 35
131
+ AllowedMethods: [
132
+ 'join', # 47
133
+ 'dataframe_info', # 33
134
+ 'format_table', # 53
135
+ 'slice_by', # 38
136
+ 'assign_update', # 35
137
+ ]
105
138
 
106
139
  # Max: 100
140
+ # Offense count: 5
141
+ # Configuration parameters: CountComments, CountAsOne.
107
142
  Metrics/ModuleLength:
108
143
  Max: 100
109
144
  Exclude:
110
- - 'lib/red_amber/data_frame_combinable.rb' # Max: 108
111
- - 'lib/red_amber/data_frame_displayable.rb' # Max: 132
112
- - 'lib/red_amber/data_frame_selectable.rb' # Max: 141
113
- - 'lib/red_amber/data_frame_variable_operation.rb' # Max: 110
114
- - 'lib/red_amber/vector_functions.rb' # Max: 114
145
+ - 'lib/red_amber/data_frame_combinable.rb' # Max: 149
146
+ - 'lib/red_amber/data_frame_displayable.rb' # Max: 226
147
+ - 'lib/red_amber/data_frame_selectable.rb' # Max: 175
148
+ - 'lib/red_amber/data_frame_variable_operation.rb' # Max: 171
149
+ - 'lib/red_amber/vector_functions.rb' # Max: 165
150
+ - 'lib/red_amber/vector_selectable.rb' # Max: 104
151
+ - 'lib/red_amber/vector_updatable.rb' # Max: 103
115
152
 
116
153
  # Max: 5
154
+ # Offense count: 1
155
+ # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
117
156
  Metrics/ParameterLists:
118
- Max: 6
119
- # Exclude:
120
- # - 'lib/red_amber/data_frame_combinable.rb' # Max: 6
157
+ Exclude:
158
+ - 'lib/red_amber/data_frame_combinable.rb' # Max: 6, at 'join'
121
159
 
122
160
  # Max: 8
161
+ # Offense count: 15
162
+ # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
123
163
  Metrics/PerceivedComplexity:
124
- Max: 13
125
- Exclude:
126
- - 'lib/red_amber/data_frame_combinable.rb' # Max: 14
127
- - 'lib/red_amber/data_frame_selectable.rb' # Max: 14
128
- - 'lib/red_amber/helper.rb' # Max: 15
129
- - 'lib/red_amber/vector_updatable.rb' # Max: 15
130
- - 'lib/red_amber/data_frame_displayable.rb' # Max: 19
131
-
164
+ Max: 10
165
+ AllowedMethods: [
166
+ 'join', # 14
167
+ 'dataframe_info', # 13
168
+ 'format_table', # 22
169
+ 'slice_by', # 20
170
+ 'remove', # 14
171
+ 'drop', # 12
172
+ 'filters', # 11
173
+ 'normalize_element', # 17
174
+ '[]', # 11
175
+ 'parse_range', # 15
176
+ 'split', # 14
177
+ ]
178
+
179
+ # Offense count: 1
180
+ # Configuration parameters: ExpectMatchingDefinition, CheckDefinitionPathHierarchy, CheckDefinitionPathHierarchyRoots, Regex, IgnoreExecutableScripts, AllowedAcronyms.
181
+ # CheckDefinitionPathHierarchyRoots: lib, spec, test, src
182
+ # AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
132
183
  Naming/FileName:
133
184
  Exclude:
134
185
  - 'lib/red-amber.rb'
135
186
 
136
187
  # Necessary to define is_na, is_in, etc.
188
+ # Offense count: 3
189
+ # Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros.
190
+ # NamePrefix: is_, has_, have_
191
+ # ForbiddenPrefixes: is_, has_, have_
192
+ # AllowedMethods: is_a?
193
+ # MethodDefinitionMacros: define_method, define_singleton_method
137
194
  Naming/PredicateName:
138
195
  Exclude:
139
- - 'lib/red_amber/vector_functions.rb'
140
196
  - 'lib/red_amber/vector.rb'
197
+ - 'lib/red_amber/vector_functions.rb'
141
198
  - 'lib/red_amber/vector_selectable.rb'
142
199
 
143
200
  # Necessary to test when range.end == -1
201
+ # Offense count: 2
202
+ # This cop supports unsafe autocorrection (--autocorrect-all).
144
203
  Style/SlicingWithRange:
145
204
  Exclude:
146
205
  - 'test/test_data_frame_selectable.rb'
147
206
 
148
207
  # Necessary to Vector < 0 element-wise comparison
208
+ # Offense count: 5
209
+ # This cop supports unsafe autocorrection (--autocorrect-all).
210
+ # Configuration parameters: EnforcedStyle, AllowedMethods, AllowedPatterns, IgnoredMethods.
211
+ # SupportedStyles: predicate, comparison
149
212
  Style/NumericPredicate:
150
213
  Exclude:
151
214
  - 'lib/red_amber/data_frame_selectable.rb'
data/CHANGELOG.md CHANGED
@@ -1,4 +1,93 @@
1
- ## [0.2.4] - 2022-12-25 (unleleased)
1
+ ## [0.3.0] - 2022-12-18
2
+
3
+ - Breaking change
4
+ - Supported Ruby version has changed from 2.7 to 3.0
5
+ - Upgrade minimum supported/required version of Ruby from 2.7 to 3.0 (#159, #160)
6
+
7
+ - Bug fixes
8
+ - Add check with #key? in DataFrame#method_missing (#140)
9
+ - Delete unnecessary backslash to supress warning in unary functions (#140)
10
+ - Fix syntax in code_climate.yml (144)
11
+ - Temporary disable simplecov test report (#149)
12
+ - Change Vector#[] to return Array or scalar (#148)
13
+ - Add missing simplecov HTML formatter (#148)
14
+ - Change return value of DataFrame#save to self (#160)
15
+ - Originally reported by kojix2.
16
+
17
+ - New features and improvements
18
+ - Update Vector#take to accept block (#148)
19
+ - Add properties of list Vectors (#148)
20
+ - Add Vector#split, #split_to_column, #split_to_row (#148)
21
+ - Add Vector#merge (#148)
22
+
23
+ - Refactoring
24
+ - Refactor code (#140)
25
+ - Add DataFrame.create as a faster constructor
26
+ - Refactor DataFrame.new using refinements and duck typing
27
+ - Refactor Vector.new using refinements and duck typing
28
+ - Add Vector.create as a faster constructor
29
+ - Refactor Group
30
+ - Refactor DataFrame#pick/#drop by refininig Array
31
+ - Refactor DataFrame#pick/#drop
32
+ - Refactor nil treatment in pick/drop
33
+ - Refactor DataFrame#pick/#drop using new parser
34
+ - Refactor DataFrame#[]
35
+ - Refactor Vector#[], #take, #filter by updating parser
36
+ - Add for_keys option to parse_args
37
+ - Refactor Vector properties by refinements for Arrow::Array
38
+ - Refactor DataFrame selectable using Arrow::Array refinements instead of Vector methods
39
+ - Refactor DataFrame#assign
40
+ - Refine error message in DataFrame#to_long/to_wide #143)
41
+ - Refactor Vector#take/filter returns arrow array (#148)
42
+ - Change LineLength in cop from 120 to 90 (#152)
43
+ - Refine DataFrame combinable (join) operations (#159)
44
+ - Refine DataFrame#join effectively using outputs options
45
+ - Simplify DataFrame set operations
46
+
47
+ - Improve in tests/CI
48
+ - Tests
49
+ - Update benchmark using 0.2.3 (#138)
50
+ - Update benchmark basic#02/pick by [] (#140)
51
+ - Update benchmark contexts and loop_count (#140)
52
+ - Add benchmark for vector (#140)
53
+ - Add tests for refinements (#140)
54
+ - Add benchmark for the series of DataFrame operations (#140)
55
+ - Add missing test for tdr and dictionary (#140)
56
+ - Add missing test for group#method with foreign key (#152)
57
+ - Add missing test for set operations and natural join (#152)
58
+ - Add missing test for DataFrame#[] with selecting by Array of illegal type' (#152)
59
+ - Add missing test for DataFrame#assign when assigner size is mismatch (#152)
60
+ - Accept Hash as join keys in DataFrame join methods (#159)
61
+
62
+ - Cops
63
+ - Refactor/clean rubocop.yml (#138)
64
+
65
+ - CI
66
+ - Support Ruby 3.2 in CI test (#141)
67
+ - Send test coverage report to Code Climate (#144)
68
+ - Add test on Fedora (#151)
69
+ - Thanks to Benson Muite.
70
+
71
+ - Add workflow to generate document (#153)
72
+ - Thanks to kojix2.
73
+
74
+ - Support Code Climate test coverage report in CI (#155)
75
+
76
+ - Documentation
77
+ - Add YARD in data_frame.rb (#140)
78
+ - Fix YARD document in the code (#140)
79
+ - Add Code Climate badges of maintainability and coverage (#144)
80
+ - Add installation for Fedora in README (#147)
81
+ - Thanks to Benson Muite.
82
+
83
+ - Add Vector#split/merge in Vector.md (#148)
84
+ - Fix codeclimate badges in README (#155)
85
+ - Update YARD in DataFrame join methods (#159)
86
+ - Update jupyter notebook '89 examples of Redamber' (#160)
87
+
88
+ - Thanks
89
+ - Benson Muite
90
+ - kojix2
2
91
 
3
92
  ## [0.2.3] - 2022-11-16
4
93
 
data/Gemfile CHANGED
@@ -24,4 +24,5 @@ group :test do
24
24
  gem 'red-arrow-numo-narray'
25
25
  gem 'red-datasets-arrow'
26
26
  gem 'simplecov'
27
+ gem 'simplecov-json'
27
28
  end
data/README.md CHANGED
@@ -1,22 +1,25 @@
1
1
  # RedAmber
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/red_amber.svg)](https://badge.fury.io/rb/red_amber)
4
- [![Ruby](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml/badge.svg)](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml)
4
+ [![CI](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml/badge.svg)](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml)
5
+ [![Maintainability](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/maintainability)](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
6
+ [![Test coverage](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/test_coverage)](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
7
+ [![Doc](https://img.shields.io/badge/docs-latest-blue)](https://heronshoes.github.io/red_amber/)
5
8
  [![Discussions](https://img.shields.io/github/discussions/heronshoes/red_amber)](https://github.com/heronshoes/red_amber/discussions)
6
9
 
7
10
  A simple dataframe library for Ruby.
8
11
 
9
- - Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow) [![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en)
12
+ - Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
13
+ [![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en)
10
14
  - Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
11
15
 
12
- ![screenshot from jupyterlab](doc/image/screenshot.png)
16
+ ![screenshot from jupyterlab](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/screenshot.png)
13
17
 
14
18
  ## Requirements
15
19
 
16
- Supported Ruby version is >= 2.7.
20
+ Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
17
21
 
18
- Since v0.2.0, this library uses pattern matching which is an experimental feature in 2.7 . It is usable but a warning message will be shown in 2.7 .
19
- I recommend Ruby 3 for performance.
22
+ - I decided to remove Ruby 2.7 without waiting for EOL because it cannot solve the problem of simultaneous use of Hash and keyword arguments when implementing DataFrame#join.
20
23
 
21
24
  ```ruby
22
25
  # Libraries required
@@ -37,26 +40,35 @@ Install requirements before you install Red Amber.
37
40
  See [Apache Arrow install document](https://arrow.apache.org/install/).
38
41
 
39
42
  - Minimum installation example for the latest Ubuntu:
40
- ```
41
- sudo apt update
42
- sudo apt install -y -V ca-certificates lsb-release wget
43
- wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
44
- sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
45
- sudo apt update
46
- sudo apt install -y -V libarrow-dev
47
- sudo apt install -y -V libarrow-glib-dev
48
- ```
49
- - On macOS, you can install Apache Arrow C++ library using Homebrew:
50
-
51
- ```
52
- brew install apache-arrow
53
- ```
43
+
44
+ ```
45
+ sudo apt update
46
+ sudo apt install -y -V ca-certificates lsb-release wget
47
+ wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
48
+ sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
49
+ sudo apt update
50
+ sudo apt install -y -V libarrow-dev
51
+ sudo apt install -y -V libarrow-glib-dev
52
+ ```
53
+
54
+ - On Fedora 38 (Rawhide):
55
+
56
+ ```
57
+ sudo dnf update
58
+ sudo dnf -y install gcc-c++ libarrow-devel libarrow-glib-devel ruby-devel
59
+ ```
60
+
61
+ - On macOS, you can install Apache Arrow C++ library using Homebrew:
62
+
63
+ ```
64
+ brew install apache-arrow
65
+ ```
54
66
 
55
67
  and GLib (C) package with:
56
68
 
57
- ```
58
- brew install apache-arrow-glib
59
- ```
69
+ ```
70
+ brew install apache-arrow-glib
71
+ ```
60
72
 
61
73
  If you prepared Apache Arrow, add these lines to your Gemfile:
62
74
 
@@ -84,7 +96,7 @@ Also you can try the contents of this README interactively by [Binder](https://m
84
96
  Class `RedAmber::DataFrame` represents a set of data in 2D-shape.
85
97
  The entity is a Red Arrow's Table object.
86
98
 
87
- ![dataframe model of RedAmber](doc/image/dataframe_model.png)
99
+ ![dataframe model of RedAmber](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/dataframe_model.png)
88
100
 
89
101
  Let's load the library and try some examples.
90
102
 
@@ -95,6 +107,11 @@ include RedAmber
95
107
 
96
108
  ### Example: diamonds dataset
97
109
 
110
+ First do (if you do not installed) `
111
+ gem install red-datasets-arrow
112
+ `
113
+ then
114
+
98
115
  ```ruby
99
116
  require 'datasets-arrow' # to load sample data
100
117
 
@@ -196,7 +213,7 @@ See [Vector.md](doc/Vector.md) for details.
196
213
 
197
214
  ## Jupyter notebook
198
215
 
199
- [83 Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
216
+ [89 Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
200
217
  ([raw file](https://raw.githubusercontent.com/heronshoes/docker-stacks/RedAmber-binder/binder/examples_of_red_amber.ipynb)) shows more examples in jupyter notebook.
201
218
 
202
219
  You can try this notebook on [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=examples_of_red_amber.ipynb).
data/benchmark/basic.yml CHANGED
@@ -1,10 +1,17 @@
1
+ loop_count: 3
2
+
1
3
  contexts:
2
4
  - name: HEAD
3
5
  prelude: |
4
6
  $LOAD_PATH.unshift(File.expand_path('lib'))
5
- - gems:
7
+ - name: 0.2.3
8
+ gems:
9
+ red_amber: 0.2.3
10
+ - name: 0.2.0
11
+ gems:
6
12
  red_amber: 0.2.0
7
- - gems:
13
+ - name: 0.1.5
14
+ gems:
8
15
  red_amber: 0.1.5
9
16
 
10
17
  prelude: |
@@ -21,8 +28,8 @@ benchmark:
21
28
  'B01: Pick([]) by a key name': |
22
29
  df[:flight]
23
30
 
24
- 'B02: Pick by index': |
25
- df[df.keys[9]]
31
+ 'B02a: Pick([]) by key names': |
32
+ df[:carrier, :flight]
26
33
 
27
34
  'B03: Pick by key names': |
28
35
  df.pick(:carrier, :flight)
@@ -1,13 +1,12 @@
1
- # --repeat-count 3
2
-
3
1
  loop_count: 3
4
2
 
5
3
  contexts:
6
4
  - name: HEAD
7
5
  prelude: |
8
6
  $LOAD_PATH.unshift(File.expand_path('lib'))
9
- # - gems:
10
- # red_amber: 0.2.3
7
+ - name: 0.2.3
8
+ gems:
9
+ red_amber: 0.2.3
11
10
 
12
11
  prelude: |
13
12
  require 'red_amber'
@@ -0,0 +1,62 @@
1
+ loop_count: 3
2
+
3
+ contexts:
4
+ - name: HEAD
5
+ prelude: |
6
+ $LOAD_PATH.unshift(File.expand_path('lib'))
7
+ - name: 0.2.3
8
+ gems:
9
+ red_amber: 0.2.3
10
+ - name: 0.2.0
11
+ gems:
12
+ red_amber: 0.2.0
13
+
14
+ prelude: |
15
+ require 'red_amber'
16
+ require 'datasets-arrow'
17
+
18
+ diamonds = RedAmber::DataFrame.new(Datasets::Diamonds.new.to_arrow)
19
+
20
+ starwars = RedAmber::DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars').to_arrow)
21
+
22
+ uri = URI("https://raw.githubusercontent.com/heronshoes/red_amber/master/test/entity/import_cars.tsv")
23
+ import_cars = RedAmber::DataFrame.load(uri)
24
+
25
+ ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
26
+ simpsons_paradox_covid = RedAmber::DataFrame.new(ds.to_arrow)
27
+
28
+ benchmark:
29
+ 'D01: Diamonds test': |
30
+ diamonds
31
+ .slice { v(:carat) > 1 }
32
+ .pick(:cut, :price)
33
+ .group(:cut)
34
+ .mean
35
+ .sort('-mean(price)')
36
+ .rename('mean(price)': :mean_price_USD)
37
+ .assign { [:mean_price_JPY, v(:mean_price_USD) * 110.0] }
38
+
39
+ 'D02: Starwars test': |
40
+ starwars
41
+ .drop { keys.select { |key| key.end_with?('color') } }
42
+ .remove { v(:species) == 'NA' }
43
+ .group(:species) { [count(:species), mean(:height, :mass)] }
44
+ .slice { v(:count) > 1 }
45
+
46
+ 'D03: Inport cars test': |
47
+ import_cars
48
+ .to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
49
+ .to_wide(name: :Manufacturer, value: :Num_of_imported)
50
+ .transpose
51
+
52
+ 'D04: Simpsons paradox test': |
53
+ simpsons_paradox_covid[simpsons_paradox_covid[:age_group] == 'under 50']
54
+ .group(:vaccine_status, :outcome)
55
+ .count
56
+ .then { |df| df.to_wide(name: :vaccine_status, value: df.keys[-1]) }
57
+ .assign do
58
+ [
59
+ [:'vaccinated_%', (100.0 * v(:vaccinated) / v(:vaccinated).sum)],
60
+ [:'unvaccinated_%', (100.0 * v(:unvaccinated) / v(:unvaccinated).sum)]
61
+ ]
62
+ end
data/benchmark/group.yml CHANGED
@@ -1,8 +1,14 @@
1
+ loop_count: 3
2
+
1
3
  contexts:
2
4
  - name: HEAD
3
5
  prelude: |
4
6
  $LOAD_PATH.unshift(File.expand_path('lib'))
5
- - gems:
7
+ - name: 0.2.3
8
+ gems:
9
+ red_amber: 0.2.3
10
+ - name: 0.2.2
11
+ gems:
6
12
  red_amber: 0.2.2
7
13
 
8
14
  prelude: |
@@ -1,10 +1,14 @@
1
- # --repeat-count 3
1
+ loop_count: 3
2
2
 
3
3
  contexts:
4
4
  - name: HEAD
5
5
  prelude: |
6
6
  $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - gems:
7
+ - name: 0.2.3
8
+ gems:
9
+ red_amber: 0.2.3
10
+ - name: 0.2.2
11
+ gems:
8
12
  red_amber: 0.2.2
9
13
 
10
14
  prelude: |