red_amber 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +111 -48
- data/CHANGELOG.md +90 -1
- data/Gemfile +1 -0
- data/README.md +42 -25
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +60 -0
- data/doc/DataFrame.md +3 -0
- data/doc/Vector.md +88 -0
- data/lib/red_amber/data_frame.rb +161 -46
- data/lib/red_amber/data_frame_combinable.rb +304 -101
- data/lib/red_amber/data_frame_displayable.rb +4 -4
- data/lib/red_amber/data_frame_indexable.rb +2 -2
- data/lib/red_amber/data_frame_loadsave.rb +4 -1
- data/lib/red_amber/data_frame_reshaping.rb +35 -10
- data/lib/red_amber/data_frame_selectable.rb +221 -116
- data/lib/red_amber/data_frame_variable_operation.rb +146 -82
- data/lib/red_amber/group.rb +16 -7
- data/lib/red_amber/helper.rb +53 -31
- data/lib/red_amber/refinements.rb +199 -0
- data/lib/red_amber/vector.rb +55 -52
- data/lib/red_amber/vector_functions.rb +23 -75
- data/lib/red_amber/vector_selectable.rb +116 -69
- data/lib/red_amber/vector_updatable.rb +136 -7
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +3 -2
- metadata +11 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78fa72064f9494f0f756f15cf1daaacb3640535e899ba71ab080730c0d61b0b2
|
4
|
+
data.tar.gz: 3f2de4a449c38eb995ebcc0394a1a93633f097e533696edfa91267a596dcb580
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 45a7c37cc746c606e8d4d2a43005da8154b60df21bf2cf6b2bafa9f7ad5f962a3e3c8e2f931e6543b20b8f6cd8c8a447b99b7f0127854d3bb716ea763ab3cae5
|
7
|
+
data.tar.gz: b3ac4479df1e30b75e7ccfcc48b09f709cea536c98072bfe937ae283c0cc1d203ab97388cf6f57c39fd31c6beceadcb850c3f14e8e07e5e196cc0c862634f36d
|
data/.rubocop.yml
CHANGED
@@ -8,12 +8,13 @@ require:
|
|
8
8
|
- rubocop-rake
|
9
9
|
|
10
10
|
AllCops:
|
11
|
-
# drop support for
|
12
|
-
TargetRubyVersion:
|
11
|
+
# drop support for 2.7 (since 0.3.0)
|
12
|
+
TargetRubyVersion: 3.0
|
13
13
|
# accept new cops if any
|
14
14
|
NewCops: enable
|
15
15
|
|
16
16
|
# ===
|
17
|
+
# Globally I change these 3 cops from default
|
17
18
|
|
18
19
|
# alias is hard to see separately
|
19
20
|
Style/Alias:
|
@@ -29,123 +30,185 @@ Style/TrailingCommaInHashLiteral:
|
|
29
30
|
# ===
|
30
31
|
|
31
32
|
# To let you know the possibility of refactoring ===
|
32
|
-
|
33
|
+
|
33
34
|
# avoid unused variable asignment
|
34
35
|
Rubycw/Rubycw:
|
35
36
|
Exclude:
|
36
37
|
- 'test/**/*'
|
37
|
-
Lint/UselessAssignment:
|
38
|
-
Exclude:
|
39
|
-
- 'test/**/*'
|
40
38
|
|
41
39
|
# Disabled to define Vector operators
|
40
|
+
# Offense count: 38
|
42
41
|
Lint/BinaryOperatorWithIdenticalOperands:
|
43
42
|
Exclude:
|
44
43
|
- 'test/test_vector_function.rb'
|
45
44
|
|
46
45
|
# Need for test with empty block
|
46
|
+
# Offense count: 1
|
47
|
+
# Configuration parameters: AllowComments, AllowEmptyLambdas.
|
47
48
|
Lint/EmptyBlock:
|
48
49
|
Exclude:
|
49
50
|
- 'test/test_group.rb'
|
50
51
|
|
52
|
+
# avoid unused variable asignment
|
53
|
+
# Offense count: 6
|
54
|
+
Lint/UselessAssignment:
|
55
|
+
Exclude:
|
56
|
+
- 'test/**/*'
|
57
|
+
|
51
58
|
# Max: 120
|
59
|
+
# This cop supports safe autocorrection (--autocorrect).
|
60
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
|
61
|
+
# URISchemes: http, https
|
52
62
|
Layout/LineLength:
|
53
|
-
Max:
|
63
|
+
Max: 90
|
54
64
|
Exclude:
|
55
65
|
- 'test/**/*'
|
56
66
|
|
57
67
|
# <= 17 satisfactory
|
58
68
|
# 18..30 unsatisfactory
|
59
69
|
# > 30 dangerous
|
70
|
+
# Offense count: 28
|
71
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods, CountRepeatedAttributes.
|
60
72
|
Metrics/AbcSize:
|
61
73
|
Max: 30
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
74
|
+
CountRepeatedAttributes: false
|
75
|
+
AllowedMethods: [
|
76
|
+
'join', # 51.87
|
77
|
+
'dataframe_info', # 46.5
|
78
|
+
'format_table', # 84.62
|
79
|
+
'to_long', # 33.66
|
80
|
+
'to_wide', #38.22
|
81
|
+
'slice_by', # 38.29
|
82
|
+
'remove', # 44.42
|
83
|
+
'drop', # 31.42
|
84
|
+
'[]', # 33.76
|
85
|
+
'split', # 37.35
|
86
|
+
]
|
70
87
|
|
71
88
|
# Max: 25
|
89
|
+
# Offense count: 57
|
90
|
+
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods.
|
91
|
+
# AllowedMethods: refine
|
72
92
|
Metrics/BlockLength:
|
73
93
|
Max: 25
|
74
94
|
Exclude:
|
75
95
|
- 'test/**/*'
|
96
|
+
- 'lib/red_amber/helper.rb' # 32
|
76
97
|
|
98
|
+
# It's ok if class is big
|
77
99
|
# Max: 100
|
100
|
+
# Offense count: 15
|
101
|
+
# Configuration parameters: CountComments, CountAsOne.
|
78
102
|
Metrics/ClassLength:
|
79
|
-
Max: 100
|
80
103
|
Exclude:
|
81
104
|
- 'test/**/*'
|
82
|
-
- 'lib/red_amber/data_frame.rb' #
|
83
|
-
- 'lib/red_amber/
|
84
|
-
- 'lib/red_amber/
|
105
|
+
- 'lib/red_amber/data_frame.rb' # 162
|
106
|
+
- 'lib/red_amber/group.rb' # 105
|
107
|
+
- 'lib/red_amber/vector.rb' # 152
|
85
108
|
|
109
|
+
# Only for monitoring. I will measure by PerceivedComplexity.
|
86
110
|
# Max: 7
|
111
|
+
# Offense count: 16
|
112
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
|
87
113
|
Metrics/CyclomaticComplexity:
|
88
114
|
Max: 12
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
115
|
+
AllowedMethods: [
|
116
|
+
'join', # 14
|
117
|
+
'format_table', # 21
|
118
|
+
'slice_by', # 16
|
119
|
+
'remove', # 14
|
120
|
+
'normalize_element', # 17
|
121
|
+
'[]', # 13
|
122
|
+
'parse_range', # 14
|
123
|
+
'split', # 33
|
124
|
+
]
|
96
125
|
|
97
126
|
# Max: 10
|
127
|
+
# Offense count: 34
|
128
|
+
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods.
|
98
129
|
Metrics/MethodLength:
|
99
130
|
Max: 30
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
131
|
+
AllowedMethods: [
|
132
|
+
'join', # 47
|
133
|
+
'dataframe_info', # 33
|
134
|
+
'format_table', # 53
|
135
|
+
'slice_by', # 38
|
136
|
+
'assign_update', # 35
|
137
|
+
]
|
105
138
|
|
106
139
|
# Max: 100
|
140
|
+
# Offense count: 5
|
141
|
+
# Configuration parameters: CountComments, CountAsOne.
|
107
142
|
Metrics/ModuleLength:
|
108
143
|
Max: 100
|
109
144
|
Exclude:
|
110
|
-
- 'lib/red_amber/data_frame_combinable.rb' # Max:
|
111
|
-
- 'lib/red_amber/data_frame_displayable.rb' # Max:
|
112
|
-
- 'lib/red_amber/data_frame_selectable.rb' # Max:
|
113
|
-
- 'lib/red_amber/data_frame_variable_operation.rb' # Max:
|
114
|
-
- 'lib/red_amber/vector_functions.rb' # Max:
|
145
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 149
|
146
|
+
- 'lib/red_amber/data_frame_displayable.rb' # Max: 226
|
147
|
+
- 'lib/red_amber/data_frame_selectable.rb' # Max: 175
|
148
|
+
- 'lib/red_amber/data_frame_variable_operation.rb' # Max: 171
|
149
|
+
- 'lib/red_amber/vector_functions.rb' # Max: 165
|
150
|
+
- 'lib/red_amber/vector_selectable.rb' # Max: 104
|
151
|
+
- 'lib/red_amber/vector_updatable.rb' # Max: 103
|
115
152
|
|
116
153
|
# Max: 5
|
154
|
+
# Offense count: 1
|
155
|
+
# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
|
117
156
|
Metrics/ParameterLists:
|
118
|
-
|
119
|
-
|
120
|
-
# - 'lib/red_amber/data_frame_combinable.rb' # Max: 6
|
157
|
+
Exclude:
|
158
|
+
- 'lib/red_amber/data_frame_combinable.rb' # Max: 6, at 'join'
|
121
159
|
|
122
160
|
# Max: 8
|
161
|
+
# Offense count: 15
|
162
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
|
123
163
|
Metrics/PerceivedComplexity:
|
124
|
-
Max:
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
164
|
+
Max: 10
|
165
|
+
AllowedMethods: [
|
166
|
+
'join', # 14
|
167
|
+
'dataframe_info', # 13
|
168
|
+
'format_table', # 22
|
169
|
+
'slice_by', # 20
|
170
|
+
'remove', # 14
|
171
|
+
'drop', # 12
|
172
|
+
'filters', # 11
|
173
|
+
'normalize_element', # 17
|
174
|
+
'[]', # 11
|
175
|
+
'parse_range', # 15
|
176
|
+
'split', # 14
|
177
|
+
]
|
178
|
+
|
179
|
+
# Offense count: 1
|
180
|
+
# Configuration parameters: ExpectMatchingDefinition, CheckDefinitionPathHierarchy, CheckDefinitionPathHierarchyRoots, Regex, IgnoreExecutableScripts, AllowedAcronyms.
|
181
|
+
# CheckDefinitionPathHierarchyRoots: lib, spec, test, src
|
182
|
+
# AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
|
132
183
|
Naming/FileName:
|
133
184
|
Exclude:
|
134
185
|
- 'lib/red-amber.rb'
|
135
186
|
|
136
187
|
# Necessary to define is_na, is_in, etc.
|
188
|
+
# Offense count: 3
|
189
|
+
# Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros.
|
190
|
+
# NamePrefix: is_, has_, have_
|
191
|
+
# ForbiddenPrefixes: is_, has_, have_
|
192
|
+
# AllowedMethods: is_a?
|
193
|
+
# MethodDefinitionMacros: define_method, define_singleton_method
|
137
194
|
Naming/PredicateName:
|
138
195
|
Exclude:
|
139
|
-
- 'lib/red_amber/vector_functions.rb'
|
140
196
|
- 'lib/red_amber/vector.rb'
|
197
|
+
- 'lib/red_amber/vector_functions.rb'
|
141
198
|
- 'lib/red_amber/vector_selectable.rb'
|
142
199
|
|
143
200
|
# Necessary to test when range.end == -1
|
201
|
+
# Offense count: 2
|
202
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
144
203
|
Style/SlicingWithRange:
|
145
204
|
Exclude:
|
146
205
|
- 'test/test_data_frame_selectable.rb'
|
147
206
|
|
148
207
|
# Necessary to Vector < 0 element-wise comparison
|
208
|
+
# Offense count: 5
|
209
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
210
|
+
# Configuration parameters: EnforcedStyle, AllowedMethods, AllowedPatterns, IgnoredMethods.
|
211
|
+
# SupportedStyles: predicate, comparison
|
149
212
|
Style/NumericPredicate:
|
150
213
|
Exclude:
|
151
214
|
- 'lib/red_amber/data_frame_selectable.rb'
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,93 @@
|
|
1
|
-
## [0.
|
1
|
+
## [0.3.0] - 2022-12-18
|
2
|
+
|
3
|
+
- Breaking change
|
4
|
+
- Supported Ruby version has changed from 2.7 to 3.0
|
5
|
+
- Upgrade minimum supported/required version of Ruby from 2.7 to 3.0 (#159, #160)
|
6
|
+
|
7
|
+
- Bug fixes
|
8
|
+
- Add check with #key? in DataFrame#method_missing (#140)
|
9
|
+
- Delete unnecessary backslash to supress warning in unary functions (#140)
|
10
|
+
- Fix syntax in code_climate.yml (144)
|
11
|
+
- Temporary disable simplecov test report (#149)
|
12
|
+
- Change Vector#[] to return Array or scalar (#148)
|
13
|
+
- Add missing simplecov HTML formatter (#148)
|
14
|
+
- Change return value of DataFrame#save to self (#160)
|
15
|
+
- Originally reported by kojix2.
|
16
|
+
|
17
|
+
- New features and improvements
|
18
|
+
- Update Vector#take to accept block (#148)
|
19
|
+
- Add properties of list Vectors (#148)
|
20
|
+
- Add Vector#split, #split_to_column, #split_to_row (#148)
|
21
|
+
- Add Vector#merge (#148)
|
22
|
+
|
23
|
+
- Refactoring
|
24
|
+
- Refactor code (#140)
|
25
|
+
- Add DataFrame.create as a faster constructor
|
26
|
+
- Refactor DataFrame.new using refinements and duck typing
|
27
|
+
- Refactor Vector.new using refinements and duck typing
|
28
|
+
- Add Vector.create as a faster constructor
|
29
|
+
- Refactor Group
|
30
|
+
- Refactor DataFrame#pick/#drop by refininig Array
|
31
|
+
- Refactor DataFrame#pick/#drop
|
32
|
+
- Refactor nil treatment in pick/drop
|
33
|
+
- Refactor DataFrame#pick/#drop using new parser
|
34
|
+
- Refactor DataFrame#[]
|
35
|
+
- Refactor Vector#[], #take, #filter by updating parser
|
36
|
+
- Add for_keys option to parse_args
|
37
|
+
- Refactor Vector properties by refinements for Arrow::Array
|
38
|
+
- Refactor DataFrame selectable using Arrow::Array refinements instead of Vector methods
|
39
|
+
- Refactor DataFrame#assign
|
40
|
+
- Refine error message in DataFrame#to_long/to_wide #143)
|
41
|
+
- Refactor Vector#take/filter returns arrow array (#148)
|
42
|
+
- Change LineLength in cop from 120 to 90 (#152)
|
43
|
+
- Refine DataFrame combinable (join) operations (#159)
|
44
|
+
- Refine DataFrame#join effectively using outputs options
|
45
|
+
- Simplify DataFrame set operations
|
46
|
+
|
47
|
+
- Improve in tests/CI
|
48
|
+
- Tests
|
49
|
+
- Update benchmark using 0.2.3 (#138)
|
50
|
+
- Update benchmark basic#02/pick by [] (#140)
|
51
|
+
- Update benchmark contexts and loop_count (#140)
|
52
|
+
- Add benchmark for vector (#140)
|
53
|
+
- Add tests for refinements (#140)
|
54
|
+
- Add benchmark for the series of DataFrame operations (#140)
|
55
|
+
- Add missing test for tdr and dictionary (#140)
|
56
|
+
- Add missing test for group#method with foreign key (#152)
|
57
|
+
- Add missing test for set operations and natural join (#152)
|
58
|
+
- Add missing test for DataFrame#[] with selecting by Array of illegal type' (#152)
|
59
|
+
- Add missing test for DataFrame#assign when assigner size is mismatch (#152)
|
60
|
+
- Accept Hash as join keys in DataFrame join methods (#159)
|
61
|
+
|
62
|
+
- Cops
|
63
|
+
- Refactor/clean rubocop.yml (#138)
|
64
|
+
|
65
|
+
- CI
|
66
|
+
- Support Ruby 3.2 in CI test (#141)
|
67
|
+
- Send test coverage report to Code Climate (#144)
|
68
|
+
- Add test on Fedora (#151)
|
69
|
+
- Thanks to Benson Muite.
|
70
|
+
|
71
|
+
- Add workflow to generate document (#153)
|
72
|
+
- Thanks to kojix2.
|
73
|
+
|
74
|
+
- Support Code Climate test coverage report in CI (#155)
|
75
|
+
|
76
|
+
- Documentation
|
77
|
+
- Add YARD in data_frame.rb (#140)
|
78
|
+
- Fix YARD document in the code (#140)
|
79
|
+
- Add Code Climate badges of maintainability and coverage (#144)
|
80
|
+
- Add installation for Fedora in README (#147)
|
81
|
+
- Thanks to Benson Muite.
|
82
|
+
|
83
|
+
- Add Vector#split/merge in Vector.md (#148)
|
84
|
+
- Fix codeclimate badges in README (#155)
|
85
|
+
- Update YARD in DataFrame join methods (#159)
|
86
|
+
- Update jupyter notebook '89 examples of Redamber' (#160)
|
87
|
+
|
88
|
+
- Thanks
|
89
|
+
- Benson Muite
|
90
|
+
- kojix2
|
2
91
|
|
3
92
|
## [0.2.3] - 2022-11-16
|
4
93
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,22 +1,25 @@
|
|
1
1
|
# RedAmber
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/red_amber.svg)](https://badge.fury.io/rb/red_amber)
|
4
|
-
[![
|
4
|
+
[![CI](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml/badge.svg)](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml)
|
5
|
+
[![Maintainability](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/maintainability)](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
|
6
|
+
[![Test coverage](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/test_coverage)](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
|
7
|
+
[![Doc](https://img.shields.io/badge/docs-latest-blue)](https://heronshoes.github.io/red_amber/)
|
5
8
|
[![Discussions](https://img.shields.io/github/discussions/heronshoes/red_amber)](https://github.com/heronshoes/red_amber/discussions)
|
6
9
|
|
7
10
|
A simple dataframe library for Ruby.
|
8
11
|
|
9
|
-
- Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
|
12
|
+
- Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
|
13
|
+
[![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en)
|
10
14
|
- Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
|
11
15
|
|
12
|
-
![screenshot from jupyterlab](doc/image/screenshot.png)
|
16
|
+
![screenshot from jupyterlab](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/screenshot.png)
|
13
17
|
|
14
18
|
## Requirements
|
15
19
|
|
16
|
-
Supported Ruby version is >=
|
20
|
+
Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
|
17
21
|
|
18
|
-
|
19
|
-
I recommend Ruby 3 for performance.
|
22
|
+
- I decided to remove Ruby 2.7 without waiting for EOL because it cannot solve the problem of simultaneous use of Hash and keyword arguments when implementing DataFrame#join.
|
20
23
|
|
21
24
|
```ruby
|
22
25
|
# Libraries required
|
@@ -37,26 +40,35 @@ Install requirements before you install Red Amber.
|
|
37
40
|
See [Apache Arrow install document](https://arrow.apache.org/install/).
|
38
41
|
|
39
42
|
- Minimum installation example for the latest Ubuntu:
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
43
|
+
|
44
|
+
```
|
45
|
+
sudo apt update
|
46
|
+
sudo apt install -y -V ca-certificates lsb-release wget
|
47
|
+
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
|
48
|
+
sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
|
49
|
+
sudo apt update
|
50
|
+
sudo apt install -y -V libarrow-dev
|
51
|
+
sudo apt install -y -V libarrow-glib-dev
|
52
|
+
```
|
53
|
+
|
54
|
+
- On Fedora 38 (Rawhide):
|
55
|
+
|
56
|
+
```
|
57
|
+
sudo dnf update
|
58
|
+
sudo dnf -y install gcc-c++ libarrow-devel libarrow-glib-devel ruby-devel
|
59
|
+
```
|
60
|
+
|
61
|
+
- On macOS, you can install Apache Arrow C++ library using Homebrew:
|
62
|
+
|
63
|
+
```
|
64
|
+
brew install apache-arrow
|
65
|
+
```
|
54
66
|
|
55
67
|
and GLib (C) package with:
|
56
68
|
|
57
|
-
|
58
|
-
|
59
|
-
|
69
|
+
```
|
70
|
+
brew install apache-arrow-glib
|
71
|
+
```
|
60
72
|
|
61
73
|
If you prepared Apache Arrow, add these lines to your Gemfile:
|
62
74
|
|
@@ -84,7 +96,7 @@ Also you can try the contents of this README interactively by [Binder](https://m
|
|
84
96
|
Class `RedAmber::DataFrame` represents a set of data in 2D-shape.
|
85
97
|
The entity is a Red Arrow's Table object.
|
86
98
|
|
87
|
-
![dataframe model of RedAmber](doc/image/dataframe_model.png)
|
99
|
+
![dataframe model of RedAmber](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/dataframe_model.png)
|
88
100
|
|
89
101
|
Let's load the library and try some examples.
|
90
102
|
|
@@ -95,6 +107,11 @@ include RedAmber
|
|
95
107
|
|
96
108
|
### Example: diamonds dataset
|
97
109
|
|
110
|
+
First do (if you do not installed) `
|
111
|
+
gem install red-datasets-arrow
|
112
|
+
`
|
113
|
+
then
|
114
|
+
|
98
115
|
```ruby
|
99
116
|
require 'datasets-arrow' # to load sample data
|
100
117
|
|
@@ -196,7 +213,7 @@ See [Vector.md](doc/Vector.md) for details.
|
|
196
213
|
|
197
214
|
## Jupyter notebook
|
198
215
|
|
199
|
-
[
|
216
|
+
[89 Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
|
200
217
|
([raw file](https://raw.githubusercontent.com/heronshoes/docker-stacks/RedAmber-binder/binder/examples_of_red_amber.ipynb)) shows more examples in jupyter notebook.
|
201
218
|
|
202
219
|
You can try this notebook on [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=examples_of_red_amber.ipynb).
|
data/benchmark/basic.yml
CHANGED
@@ -1,10 +1,17 @@
|
|
1
|
+
loop_count: 3
|
2
|
+
|
1
3
|
contexts:
|
2
4
|
- name: HEAD
|
3
5
|
prelude: |
|
4
6
|
$LOAD_PATH.unshift(File.expand_path('lib'))
|
5
|
-
-
|
7
|
+
- name: 0.2.3
|
8
|
+
gems:
|
9
|
+
red_amber: 0.2.3
|
10
|
+
- name: 0.2.0
|
11
|
+
gems:
|
6
12
|
red_amber: 0.2.0
|
7
|
-
-
|
13
|
+
- name: 0.1.5
|
14
|
+
gems:
|
8
15
|
red_amber: 0.1.5
|
9
16
|
|
10
17
|
prelude: |
|
@@ -21,8 +28,8 @@ benchmark:
|
|
21
28
|
'B01: Pick([]) by a key name': |
|
22
29
|
df[:flight]
|
23
30
|
|
24
|
-
'
|
25
|
-
df[
|
31
|
+
'B02a: Pick([]) by key names': |
|
32
|
+
df[:carrier, :flight]
|
26
33
|
|
27
34
|
'B03: Pick by key names': |
|
28
35
|
df.pick(:carrier, :flight)
|
data/benchmark/combine.yml
CHANGED
@@ -0,0 +1,62 @@
|
|
1
|
+
loop_count: 3
|
2
|
+
|
3
|
+
contexts:
|
4
|
+
- name: HEAD
|
5
|
+
prelude: |
|
6
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
+
- name: 0.2.3
|
8
|
+
gems:
|
9
|
+
red_amber: 0.2.3
|
10
|
+
- name: 0.2.0
|
11
|
+
gems:
|
12
|
+
red_amber: 0.2.0
|
13
|
+
|
14
|
+
prelude: |
|
15
|
+
require 'red_amber'
|
16
|
+
require 'datasets-arrow'
|
17
|
+
|
18
|
+
diamonds = RedAmber::DataFrame.new(Datasets::Diamonds.new.to_arrow)
|
19
|
+
|
20
|
+
starwars = RedAmber::DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars').to_arrow)
|
21
|
+
|
22
|
+
uri = URI("https://raw.githubusercontent.com/heronshoes/red_amber/master/test/entity/import_cars.tsv")
|
23
|
+
import_cars = RedAmber::DataFrame.load(uri)
|
24
|
+
|
25
|
+
ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
|
26
|
+
simpsons_paradox_covid = RedAmber::DataFrame.new(ds.to_arrow)
|
27
|
+
|
28
|
+
benchmark:
|
29
|
+
'D01: Diamonds test': |
|
30
|
+
diamonds
|
31
|
+
.slice { v(:carat) > 1 }
|
32
|
+
.pick(:cut, :price)
|
33
|
+
.group(:cut)
|
34
|
+
.mean
|
35
|
+
.sort('-mean(price)')
|
36
|
+
.rename('mean(price)': :mean_price_USD)
|
37
|
+
.assign { [:mean_price_JPY, v(:mean_price_USD) * 110.0] }
|
38
|
+
|
39
|
+
'D02: Starwars test': |
|
40
|
+
starwars
|
41
|
+
.drop { keys.select { |key| key.end_with?('color') } }
|
42
|
+
.remove { v(:species) == 'NA' }
|
43
|
+
.group(:species) { [count(:species), mean(:height, :mass)] }
|
44
|
+
.slice { v(:count) > 1 }
|
45
|
+
|
46
|
+
'D03: Inport cars test': |
|
47
|
+
import_cars
|
48
|
+
.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
|
49
|
+
.to_wide(name: :Manufacturer, value: :Num_of_imported)
|
50
|
+
.transpose
|
51
|
+
|
52
|
+
'D04: Simpsons paradox test': |
|
53
|
+
simpsons_paradox_covid[simpsons_paradox_covid[:age_group] == 'under 50']
|
54
|
+
.group(:vaccine_status, :outcome)
|
55
|
+
.count
|
56
|
+
.then { |df| df.to_wide(name: :vaccine_status, value: df.keys[-1]) }
|
57
|
+
.assign do
|
58
|
+
[
|
59
|
+
[:'vaccinated_%', (100.0 * v(:vaccinated) / v(:vaccinated).sum)],
|
60
|
+
[:'unvaccinated_%', (100.0 * v(:unvaccinated) / v(:unvaccinated).sum)]
|
61
|
+
]
|
62
|
+
end
|
data/benchmark/group.yml
CHANGED
data/benchmark/reshape.yml
CHANGED