red_amber 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +111 -48
- data/CHANGELOG.md +90 -1
- data/Gemfile +1 -0
- data/README.md +42 -25
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +60 -0
- data/doc/DataFrame.md +3 -0
- data/doc/Vector.md +88 -0
- data/lib/red_amber/data_frame.rb +161 -46
- data/lib/red_amber/data_frame_combinable.rb +304 -101
- data/lib/red_amber/data_frame_displayable.rb +4 -4
- data/lib/red_amber/data_frame_indexable.rb +2 -2
- data/lib/red_amber/data_frame_loadsave.rb +4 -1
- data/lib/red_amber/data_frame_reshaping.rb +35 -10
- data/lib/red_amber/data_frame_selectable.rb +221 -116
- data/lib/red_amber/data_frame_variable_operation.rb +146 -82
- data/lib/red_amber/group.rb +16 -7
- data/lib/red_amber/helper.rb +53 -31
- data/lib/red_amber/refinements.rb +199 -0
- data/lib/red_amber/vector.rb +55 -52
- data/lib/red_amber/vector_functions.rb +23 -75
- data/lib/red_amber/vector_selectable.rb +116 -69
- data/lib/red_amber/vector_updatable.rb +136 -7
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +3 -2
- metadata +11 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78fa72064f9494f0f756f15cf1daaacb3640535e899ba71ab080730c0d61b0b2
|
4
|
+
data.tar.gz: 3f2de4a449c38eb995ebcc0394a1a93633f097e533696edfa91267a596dcb580
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 45a7c37cc746c606e8d4d2a43005da8154b60df21bf2cf6b2bafa9f7ad5f962a3e3c8e2f931e6543b20b8f6cd8c8a447b99b7f0127854d3bb716ea763ab3cae5
|
7
|
+
data.tar.gz: b3ac4479df1e30b75e7ccfcc48b09f709cea536c98072bfe937ae283c0cc1d203ab97388cf6f57c39fd31c6beceadcb850c3f14e8e07e5e196cc0c862634f36d
|
data/.rubocop.yml
CHANGED
@@ -8,12 +8,13 @@ require:
|
|
8
8
|
- rubocop-rake
|
9
9
|
|
10
10
|
AllCops:
|
11
|
-
# drop support for
|
12
|
-
TargetRubyVersion:
|
11
|
+
# drop support for 2.7 (since 0.3.0)
|
12
|
+
TargetRubyVersion: 3.0
|
13
13
|
# accept new cops if any
|
14
14
|
NewCops: enable
|
15
15
|
|
16
16
|
# ===
|
17
|
+
# Globally I change these 3 cops from default
|
17
18
|
|
18
19
|
# alias is hard to see separately
|
19
20
|
Style/Alias:
|
@@ -29,123 +30,185 @@ Style/TrailingCommaInHashLiteral:
|
|
29
30
|
# ===
|
30
31
|
|
31
32
|
# To let you know the possibility of refactoring ===
|
32
|
-
|
33
|
+
|
33
34
|
# avoid unused variable asignment
|
34
35
|
Rubycw/Rubycw:
|
35
36
|
Exclude:
|
36
37
|
- 'test/**/*'
|
37
|
-
Lint/UselessAssignment:
|
38
|
-
Exclude:
|
39
|
-
- 'test/**/*'
|
40
38
|
|
41
39
|
# Disabled to define Vector operators
|
40
|
+
# Offense count: 38
|
42
41
|
Lint/BinaryOperatorWithIdenticalOperands:
|
43
42
|
Exclude:
|
44
43
|
- 'test/test_vector_function.rb'
|
45
44
|
|
46
45
|
# Need for test with empty block
|
46
|
+
# Offense count: 1
|
47
|
+
# Configuration parameters: AllowComments, AllowEmptyLambdas.
|
47
48
|
Lint/EmptyBlock:
|
48
49
|
Exclude:
|
49
50
|
- 'test/test_group.rb'
|
50
51
|
|
52
|
+
# avoid unused variable asignment
|
53
|
+
# Offense count: 6
|
54
|
+
Lint/UselessAssignment:
|
55
|
+
Exclude:
|
56
|
+
- 'test/**/*'
|
57
|
+
|
51
58
|
# Max: 120
|
59
|
+
# This cop supports safe autocorrection (--autocorrect).
|
60
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
|
61
|
+
# URISchemes: http, https
|
52
62
|
Layout/LineLength:
|
53
|
-
Max:
|
63
|
+
Max: 90
|
54
64
|
Exclude:
|
55
65
|
- 'test/**/*'
|
56
66
|
|
57
67
|
# <= 17 satisfactory
|
58
68
|
# 18..30 unsatisfactory
|
59
69
|
# > 30 dangerous
|
70
|
+
# Offense count: 28
|
71
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods, CountRepeatedAttributes.
|
60
72
|
Metrics/AbcSize:
|
61
73
|
Max: 30
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
74
|
+
CountRepeatedAttributes: false
|
75
|
+
AllowedMethods: [
|
76
|
+
'join', # 51.87
|
77
|
+
'dataframe_info', # 46.5
|
78
|
+
'format_table', # 84.62
|
79
|
+
'to_long', # 33.66
|
80
|
+
'to_wide', #38.22
|
81
|
+
'slice_by', # 38.29
|
82
|
+
'remove', # 44.42
|
83
|
+
'drop', # 31.42
|
84
|
+
'[]', # 33.76
|
85
|
+
'split', # 37.35
|
86
|
+
]
|
70
87
|
|
71
88
|
# Max: 25
|
89
|
+
# Offense count: 57
|
90
|
+
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods.
|
91
|
+
# AllowedMethods: refine
|
72
92
|
Metrics/BlockLength:
|
73
93
|
Max: 25
|
74
94
|
Exclude:
|
75
95
|
- 'test/**/*'
|
96
|
+
- 'lib/red_amber/helper.rb' # 32
|
76
97
|
|
98
|
+
# It's ok if class is big
|
77
99
|
# Max: 100
|
100
|
+
# Offense count: 15
|
101
|
+
# Configuration parameters: CountComments, CountAsOne.
|
78
102
|
Metrics/ClassLength:
|
79
|
-
Max: 100
|
80
103
|
Exclude:
|
81
104
|
- 'test/**/*'
|
82
|
-
- 'lib/red_amber/data_frame.rb' #
|
83
|
-
- 'lib/red_amber/
|
84
|
-
- 'lib/red_amber/
|
105
|
+
- 'lib/red_amber/data_frame.rb' # 162
|
106
|
+
- 'lib/red_amber/group.rb' # 105
|
107
|
+
- 'lib/red_amber/vector.rb' # 152
|
85
108
|
|
109
|
+
# Only for monitoring. I will measure by PerceivedComplexity.
|
86
110
|
# Max: 7
|
111
|
+
# Offense count: 16
|
112
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
|
87
113
|
Metrics/CyclomaticComplexity:
|
88
114
|
Max: 12
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
115
|
+
AllowedMethods: [
|
116
|
+
'join', # 14
|
117
|
+
'format_table', # 21
|
118
|
+
'slice_by', # 16
|
119
|
+
'remove', # 14
|
120
|
+
'normalize_element', # 17
|
121
|
+
'[]', # 13
|
122
|
+
'parse_range', # 14
|
123
|
+
'split', # 33
|
124
|
+
]
|
96
125
|
|
97
126
|
# Max: 10
|
127
|
+
# Offense count: 34
|
128
|
+
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods.
|
98
129
|
Metrics/MethodLength:
|
99
130
|
Max: 30
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
131
|
+
AllowedMethods: [
|
132
|
+
'join', # 47
|
133
|
+
'dataframe_info', # 33
|
134
|
+
'format_table', # 53
|
135
|
+
'slice_by', # 38
|
136
|
+
'assign_update', # 35
|
137
|
+
]
|
105
138
|
|
106
139
|
# Max: 100
|
140
|
+
# Offense count: 5
|
141
|
+
# Configuration parameters: CountComments, CountAsOne.
|
107
142
|
Metrics/ModuleLength:
|
108
143
|
Max: 100
|
109
144
|
Exclude:
|
110
|
-
- 'lib/red_amber/data_frame_combinable.rb' # Max:
|
111
|
-
- 'lib/red_amber/data_frame_displayable.rb' # Max:
|
112
|
-
- 'lib/red_amber/data_frame_selectable.rb' # Max:
|
113
|
-
- 'lib/red_amber/data_frame_variable_operation.rb' # Max:
|
114
|
-
- 'lib/red_amber/vector_functions.rb' # Max:
|
145
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 149
|
146
|
+
- 'lib/red_amber/data_frame_displayable.rb' # Max: 226
|
147
|
+
- 'lib/red_amber/data_frame_selectable.rb' # Max: 175
|
148
|
+
- 'lib/red_amber/data_frame_variable_operation.rb' # Max: 171
|
149
|
+
- 'lib/red_amber/vector_functions.rb' # Max: 165
|
150
|
+
- 'lib/red_amber/vector_selectable.rb' # Max: 104
|
151
|
+
- 'lib/red_amber/vector_updatable.rb' # Max: 103
|
115
152
|
|
116
153
|
# Max: 5
|
154
|
+
# Offense count: 1
|
155
|
+
# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
|
117
156
|
Metrics/ParameterLists:
|
118
|
-
|
119
|
-
|
120
|
-
# - 'lib/red_amber/data_frame_combinable.rb' # Max: 6
|
157
|
+
Exclude:
|
158
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 6, at 'join'
|
121
159
|
|
122
160
|
# Max: 8
|
161
|
+
# Offense count: 15
|
162
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
|
123
163
|
Metrics/PerceivedComplexity:
|
124
|
-
Max:
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
164
|
+
Max: 10
|
165
|
+
AllowedMethods: [
|
166
|
+
'join', # 14
|
167
|
+
'dataframe_info', # 13
|
168
|
+
'format_table', # 22
|
169
|
+
'slice_by', # 20
|
170
|
+
'remove', # 14
|
171
|
+
'drop', # 12
|
172
|
+
'filters', # 11
|
173
|
+
'normalize_element', # 17
|
174
|
+
'[]', # 11
|
175
|
+
'parse_range', # 15
|
176
|
+
'split', # 14
|
177
|
+
]
|
178
|
+
|
179
|
+
# Offense count: 1
|
180
|
+
# Configuration parameters: ExpectMatchingDefinition, CheckDefinitionPathHierarchy, CheckDefinitionPathHierarchyRoots, Regex, IgnoreExecutableScripts, AllowedAcronyms.
|
181
|
+
# CheckDefinitionPathHierarchyRoots: lib, spec, test, src
|
182
|
+
# AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
|
132
183
|
Naming/FileName:
|
133
184
|
Exclude:
|
134
185
|
- 'lib/red-amber.rb'
|
135
186
|
|
136
187
|
# Necessary to define is_na, is_in, etc.
|
188
|
+
# Offense count: 3
|
189
|
+
# Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros.
|
190
|
+
# NamePrefix: is_, has_, have_
|
191
|
+
# ForbiddenPrefixes: is_, has_, have_
|
192
|
+
# AllowedMethods: is_a?
|
193
|
+
# MethodDefinitionMacros: define_method, define_singleton_method
|
137
194
|
Naming/PredicateName:
|
138
195
|
Exclude:
|
139
|
-
- 'lib/red_amber/vector_functions.rb'
|
140
196
|
- 'lib/red_amber/vector.rb'
|
197
|
+
- 'lib/red_amber/vector_functions.rb'
|
141
198
|
- 'lib/red_amber/vector_selectable.rb'
|
142
199
|
|
143
200
|
# Necessary to test when range.end == -1
|
201
|
+
# Offense count: 2
|
202
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
144
203
|
Style/SlicingWithRange:
|
145
204
|
Exclude:
|
146
205
|
- 'test/test_data_frame_selectable.rb'
|
147
206
|
|
148
207
|
# Necessary to Vector < 0 element-wise comparison
|
208
|
+
# Offense count: 5
|
209
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
210
|
+
# Configuration parameters: EnforcedStyle, AllowedMethods, AllowedPatterns, IgnoredMethods.
|
211
|
+
# SupportedStyles: predicate, comparison
|
149
212
|
Style/NumericPredicate:
|
150
213
|
Exclude:
|
151
214
|
- 'lib/red_amber/data_frame_selectable.rb'
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,93 @@
|
|
1
|
-
## [0.
|
1
|
+
## [0.3.0] - 2022-12-18
|
2
|
+
|
3
|
+
- Breaking change
|
4
|
+
- Supported Ruby version has changed from 2.7 to 3.0
|
5
|
+
- Upgrade minimum supported/required version of Ruby from 2.7 to 3.0 (#159, #160)
|
6
|
+
|
7
|
+
- Bug fixes
|
8
|
+
- Add check with #key? in DataFrame#method_missing (#140)
|
9
|
+
- Delete unnecessary backslash to supress warning in unary functions (#140)
|
10
|
+
- Fix syntax in code_climate.yml (144)
|
11
|
+
- Temporary disable simplecov test report (#149)
|
12
|
+
- Change Vector#[] to return Array or scalar (#148)
|
13
|
+
- Add missing simplecov HTML formatter (#148)
|
14
|
+
- Change return value of DataFrame#save to self (#160)
|
15
|
+
- Originally reported by kojix2.
|
16
|
+
|
17
|
+
- New features and improvements
|
18
|
+
- Update Vector#take to accept block (#148)
|
19
|
+
- Add properties of list Vectors (#148)
|
20
|
+
- Add Vector#split, #split_to_column, #split_to_row (#148)
|
21
|
+
- Add Vector#merge (#148)
|
22
|
+
|
23
|
+
- Refactoring
|
24
|
+
- Refactor code (#140)
|
25
|
+
- Add DataFrame.create as a faster constructor
|
26
|
+
- Refactor DataFrame.new using refinements and duck typing
|
27
|
+
- Refactor Vector.new using refinements and duck typing
|
28
|
+
- Add Vector.create as a faster constructor
|
29
|
+
- Refactor Group
|
30
|
+
- Refactor DataFrame#pick/#drop by refininig Array
|
31
|
+
- Refactor DataFrame#pick/#drop
|
32
|
+
- Refactor nil treatment in pick/drop
|
33
|
+
- Refactor DataFrame#pick/#drop using new parser
|
34
|
+
- Refactor DataFrame#[]
|
35
|
+
- Refactor Vector#[], #take, #filter by updating parser
|
36
|
+
- Add for_keys option to parse_args
|
37
|
+
- Refactor Vector properties by refinements for Arrow::Array
|
38
|
+
- Refactor DataFrame selectable using Arrow::Array refinements instead of Vector methods
|
39
|
+
- Refactor DataFrame#assign
|
40
|
+
- Refine error message in DataFrame#to_long/to_wide #143)
|
41
|
+
- Refactor Vector#take/filter returns arrow array (#148)
|
42
|
+
- Change LineLength in cop from 120 to 90 (#152)
|
43
|
+
- Refine DataFrame combinable (join) operations (#159)
|
44
|
+
- Refine DataFrame#join effectively using outputs options
|
45
|
+
- Simplify DataFrame set operations
|
46
|
+
|
47
|
+
- Improve in tests/CI
|
48
|
+
- Tests
|
49
|
+
- Update benchmark using 0.2.3 (#138)
|
50
|
+
- Update benchmark basic#02/pick by [] (#140)
|
51
|
+
- Update benchmark contexts and loop_count (#140)
|
52
|
+
- Add benchmark for vector (#140)
|
53
|
+
- Add tests for refinements (#140)
|
54
|
+
- Add benchmark for the series of DataFrame operations (#140)
|
55
|
+
- Add missing test for tdr and dictionary (#140)
|
56
|
+
- Add missing test for group#method with foreign key (#152)
|
57
|
+
- Add missing test for set operations and natural join (#152)
|
58
|
+
- Add missing test for DataFrame#[] with selecting by Array of illegal type' (#152)
|
59
|
+
- Add missing test for DataFrame#assign when assigner size is mismatch (#152)
|
60
|
+
- Accept Hash as join keys in DataFrame join methods (#159)
|
61
|
+
|
62
|
+
- Cops
|
63
|
+
- Refactor/clean rubocop.yml (#138)
|
64
|
+
|
65
|
+
- CI
|
66
|
+
- Support Ruby 3.2 in CI test (#141)
|
67
|
+
- Send test coverage report to Code Climate (#144)
|
68
|
+
- Add test on Fedora (#151)
|
69
|
+
- Thanks to Benson Muite.
|
70
|
+
|
71
|
+
- Add workflow to generate document (#153)
|
72
|
+
- Thanks to kojix2.
|
73
|
+
|
74
|
+
- Support Code Climate test coverage report in CI (#155)
|
75
|
+
|
76
|
+
- Documentation
|
77
|
+
- Add YARD in data_frame.rb (#140)
|
78
|
+
- Fix YARD document in the code (#140)
|
79
|
+
- Add Code Climate badges of maintainability and coverage (#144)
|
80
|
+
- Add installation for Fedora in README (#147)
|
81
|
+
- Thanks to Benson Muite.
|
82
|
+
|
83
|
+
- Add Vector#split/merge in Vector.md (#148)
|
84
|
+
- Fix codeclimate badges in README (#155)
|
85
|
+
- Update YARD in DataFrame join methods (#159)
|
86
|
+
- Update jupyter notebook '89 examples of Redamber' (#160)
|
87
|
+
|
88
|
+
- Thanks
|
89
|
+
- Benson Muite
|
90
|
+
- kojix2
|
2
91
|
|
3
92
|
## [0.2.3] - 2022-11-16
|
4
93
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,22 +1,25 @@
|
|
1
1
|
# RedAmber
|
2
2
|
|
3
3
|
[](https://badge.fury.io/rb/red_amber)
|
4
|
-
[](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml)
|
5
|
+
[](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
|
6
|
+
[](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
|
7
|
+
[](https://heronshoes.github.io/red_amber/)
|
5
8
|
[](https://github.com/heronshoes/red_amber/discussions)
|
6
9
|
|
7
10
|
A simple dataframe library for Ruby.
|
8
11
|
|
9
|
-
- Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
|
12
|
+
- Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
|
13
|
+
[](https://gitter.im/red-data-tools/en)
|
10
14
|
- Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
|
11
15
|
|
12
|
-

|
16
|
+

|
13
17
|
|
14
18
|
## Requirements
|
15
19
|
|
16
|
-
Supported Ruby version is >=
|
20
|
+
Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
|
17
21
|
|
18
|
-
|
19
|
-
I recommend Ruby 3 for performance.
|
22
|
+
- I decided to remove Ruby 2.7 without waiting for EOL because it cannot solve the problem of simultaneous use of Hash and keyword arguments when implementing DataFrame#join.
|
20
23
|
|
21
24
|
```ruby
|
22
25
|
# Libraries required
|
@@ -37,26 +40,35 @@ Install requirements before you install Red Amber.
|
|
37
40
|
See [Apache Arrow install document](https://arrow.apache.org/install/).
|
38
41
|
|
39
42
|
- Minimum installation example for the latest Ubuntu:
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
43
|
+
|
44
|
+
```
|
45
|
+
sudo apt update
|
46
|
+
sudo apt install -y -V ca-certificates lsb-release wget
|
47
|
+
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
|
48
|
+
sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
|
49
|
+
sudo apt update
|
50
|
+
sudo apt install -y -V libarrow-dev
|
51
|
+
sudo apt install -y -V libarrow-glib-dev
|
52
|
+
```
|
53
|
+
|
54
|
+
- On Fedora 38 (Rawhide):
|
55
|
+
|
56
|
+
```
|
57
|
+
sudo dnf update
|
58
|
+
sudo dnf -y install gcc-c++ libarrow-devel libarrow-glib-devel ruby-devel
|
59
|
+
```
|
60
|
+
|
61
|
+
- On macOS, you can install Apache Arrow C++ library using Homebrew:
|
62
|
+
|
63
|
+
```
|
64
|
+
brew install apache-arrow
|
65
|
+
```
|
54
66
|
|
55
67
|
and GLib (C) package with:
|
56
68
|
|
57
|
-
|
58
|
-
|
59
|
-
|
69
|
+
```
|
70
|
+
brew install apache-arrow-glib
|
71
|
+
```
|
60
72
|
|
61
73
|
If you prepared Apache Arrow, add these lines to your Gemfile:
|
62
74
|
|
@@ -84,7 +96,7 @@ Also you can try the contents of this README interactively by [Binder](https://m
|
|
84
96
|
Class `RedAmber::DataFrame` represents a set of data in 2D-shape.
|
85
97
|
The entity is a Red Arrow's Table object.
|
86
98
|
|
87
|
-

|
99
|
+

|
88
100
|
|
89
101
|
Let's load the library and try some examples.
|
90
102
|
|
@@ -95,6 +107,11 @@ include RedAmber
|
|
95
107
|
|
96
108
|
### Example: diamonds dataset
|
97
109
|
|
110
|
+
First do (if you do not installed) `
|
111
|
+
gem install red-datasets-arrow
|
112
|
+
`
|
113
|
+
then
|
114
|
+
|
98
115
|
```ruby
|
99
116
|
require 'datasets-arrow' # to load sample data
|
100
117
|
|
@@ -196,7 +213,7 @@ See [Vector.md](doc/Vector.md) for details.
|
|
196
213
|
|
197
214
|
## Jupyter notebook
|
198
215
|
|
199
|
-
[
|
216
|
+
[89 Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
|
200
217
|
([raw file](https://raw.githubusercontent.com/heronshoes/docker-stacks/RedAmber-binder/binder/examples_of_red_amber.ipynb)) shows more examples in jupyter notebook.
|
201
218
|
|
202
219
|
You can try this notebook on [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=examples_of_red_amber.ipynb).
|
data/benchmark/basic.yml
CHANGED
@@ -1,10 +1,17 @@
|
|
1
|
+
loop_count: 3
|
2
|
+
|
1
3
|
contexts:
|
2
4
|
- name: HEAD
|
3
5
|
prelude: |
|
4
6
|
$LOAD_PATH.unshift(File.expand_path('lib'))
|
5
|
-
-
|
7
|
+
- name: 0.2.3
|
8
|
+
gems:
|
9
|
+
red_amber: 0.2.3
|
10
|
+
- name: 0.2.0
|
11
|
+
gems:
|
6
12
|
red_amber: 0.2.0
|
7
|
-
-
|
13
|
+
- name: 0.1.5
|
14
|
+
gems:
|
8
15
|
red_amber: 0.1.5
|
9
16
|
|
10
17
|
prelude: |
|
@@ -21,8 +28,8 @@ benchmark:
|
|
21
28
|
'B01: Pick([]) by a key name': |
|
22
29
|
df[:flight]
|
23
30
|
|
24
|
-
'
|
25
|
-
df[
|
31
|
+
'B02a: Pick([]) by key names': |
|
32
|
+
df[:carrier, :flight]
|
26
33
|
|
27
34
|
'B03: Pick by key names': |
|
28
35
|
df.pick(:carrier, :flight)
|
data/benchmark/combine.yml
CHANGED
@@ -0,0 +1,62 @@
|
|
1
|
+
loop_count: 3
|
2
|
+
|
3
|
+
contexts:
|
4
|
+
- name: HEAD
|
5
|
+
prelude: |
|
6
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
+
- name: 0.2.3
|
8
|
+
gems:
|
9
|
+
red_amber: 0.2.3
|
10
|
+
- name: 0.2.0
|
11
|
+
gems:
|
12
|
+
red_amber: 0.2.0
|
13
|
+
|
14
|
+
prelude: |
|
15
|
+
require 'red_amber'
|
16
|
+
require 'datasets-arrow'
|
17
|
+
|
18
|
+
diamonds = RedAmber::DataFrame.new(Datasets::Diamonds.new.to_arrow)
|
19
|
+
|
20
|
+
starwars = RedAmber::DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars').to_arrow)
|
21
|
+
|
22
|
+
uri = URI("https://raw.githubusercontent.com/heronshoes/red_amber/master/test/entity/import_cars.tsv")
|
23
|
+
import_cars = RedAmber::DataFrame.load(uri)
|
24
|
+
|
25
|
+
ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
|
26
|
+
simpsons_paradox_covid = RedAmber::DataFrame.new(ds.to_arrow)
|
27
|
+
|
28
|
+
benchmark:
|
29
|
+
'D01: Diamonds test': |
|
30
|
+
diamonds
|
31
|
+
.slice { v(:carat) > 1 }
|
32
|
+
.pick(:cut, :price)
|
33
|
+
.group(:cut)
|
34
|
+
.mean
|
35
|
+
.sort('-mean(price)')
|
36
|
+
.rename('mean(price)': :mean_price_USD)
|
37
|
+
.assign { [:mean_price_JPY, v(:mean_price_USD) * 110.0] }
|
38
|
+
|
39
|
+
'D02: Starwars test': |
|
40
|
+
starwars
|
41
|
+
.drop { keys.select { |key| key.end_with?('color') } }
|
42
|
+
.remove { v(:species) == 'NA' }
|
43
|
+
.group(:species) { [count(:species), mean(:height, :mass)] }
|
44
|
+
.slice { v(:count) > 1 }
|
45
|
+
|
46
|
+
'D03: Inport cars test': |
|
47
|
+
import_cars
|
48
|
+
.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
|
49
|
+
.to_wide(name: :Manufacturer, value: :Num_of_imported)
|
50
|
+
.transpose
|
51
|
+
|
52
|
+
'D04: Simpsons paradox test': |
|
53
|
+
simpsons_paradox_covid[simpsons_paradox_covid[:age_group] == 'under 50']
|
54
|
+
.group(:vaccine_status, :outcome)
|
55
|
+
.count
|
56
|
+
.then { |df| df.to_wide(name: :vaccine_status, value: df.keys[-1]) }
|
57
|
+
.assign do
|
58
|
+
[
|
59
|
+
[:'vaccinated_%', (100.0 * v(:vaccinated) / v(:vaccinated).sum)],
|
60
|
+
[:'unvaccinated_%', (100.0 * v(:unvaccinated) / v(:unvaccinated).sum)]
|
61
|
+
]
|
62
|
+
end
|
data/benchmark/group.yml
CHANGED
data/benchmark/reshape.yml
CHANGED