red_amber 0.3.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +56 -22
- data/.yardopts +2 -0
- data/CHANGELOG.md +178 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +29 -30
- data/benchmark/basic.yml +7 -7
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -3
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +454 -85
- data/lib/red_amber/data_frame_combinable.rb +609 -115
- data/lib/red_amber/data_frame_displayable.rb +313 -34
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +623 -70
- data/lib/red_amber/data_frame_variable_operation.rb +452 -35
- data/lib/red_amber/group.rb +186 -22
- data/lib/red_amber/helper.rb +74 -14
- data/lib/red_amber/refinements.rb +26 -6
- data/lib/red_amber/subframes.rb +1101 -0
- data/lib/red_amber/vector.rb +362 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +506 -0
- data/lib/red_amber/vector_selectable.rb +265 -23
- data/lib/red_amber/vector_unary_element_wise.rb +529 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 264e7637475fd01946900335751a1592a3859e9bfa772ecc0800ab05c4d852f0
|
4
|
+
data.tar.gz: a57400445419698a66d6b5c94e15fa8c040f2f3930f9fbf75603ffb6e18bd9cf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0fdbcdb732e36bb866a8251800ab3fa1a714fa075234bf8cd516f2542ab6704ebfa429a7177da2bd8cd6fa6eb1158efb0d68f46f43d1dc088a9a0f0debdc5c54
|
7
|
+
data.tar.gz: f9c1dffaa157ecf34b0b4fec6c1d7972b4773bbf7a11101a345172d621753cd9fc3818753b329dd2906a506af294d6a96c0180a0fb4dc84c2b54bceef6b520f5
|
data/.rubocop.yml
CHANGED
@@ -31,16 +31,28 @@ Style/TrailingCommaInHashLiteral:
|
|
31
31
|
|
32
32
|
# To let you know the possibility of refactoring ===
|
33
33
|
|
34
|
-
#
|
35
|
-
|
34
|
+
# Max: 120
|
35
|
+
# This cop supports safe autocorrection (--autocorrect).
|
36
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
|
37
|
+
# URISchemes: http, https
|
38
|
+
Layout/LineLength:
|
39
|
+
Max: 90
|
36
40
|
Exclude:
|
37
41
|
- 'test/**/*'
|
38
42
|
|
43
|
+
# EnforcedStyle: aligned
|
44
|
+
Layout/MultilineMethodCallIndentation:
|
45
|
+
EnforcedStyle: indented_relative_to_receiver
|
46
|
+
|
39
47
|
# Disabled to define Vector operators
|
40
48
|
# Offense count: 38
|
41
49
|
Lint/BinaryOperatorWithIdenticalOperands:
|
42
50
|
Exclude:
|
43
|
-
- 'test/
|
51
|
+
- 'test/test_vector_binary_element_wise.rb'
|
52
|
+
|
53
|
+
Lint/Debugger:
|
54
|
+
Exclude:
|
55
|
+
- 'bin/example'
|
44
56
|
|
45
57
|
# Need for test with empty block
|
46
58
|
# Offense count: 1
|
@@ -55,15 +67,6 @@ Lint/UselessAssignment:
|
|
55
67
|
Exclude:
|
56
68
|
- 'test/**/*'
|
57
69
|
|
58
|
-
# Max: 120
|
59
|
-
# This cop supports safe autocorrection (--autocorrect).
|
60
|
-
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
|
61
|
-
# URISchemes: http, https
|
62
|
-
Layout/LineLength:
|
63
|
-
Max: 90
|
64
|
-
Exclude:
|
65
|
-
- 'test/**/*'
|
66
|
-
|
67
70
|
# <= 17 satisfactory
|
68
71
|
# 18..30 unsatisfactory
|
69
72
|
# > 30 dangerous
|
@@ -83,6 +86,7 @@ Metrics/AbcSize:
|
|
83
86
|
'drop', # 31.42
|
84
87
|
'[]', # 33.76
|
85
88
|
'split', # 37.35
|
89
|
+
'aggregate', # 38.13
|
86
90
|
]
|
87
91
|
|
88
92
|
# Max: 25
|
@@ -104,7 +108,9 @@ Metrics/ClassLength:
|
|
104
108
|
- 'test/**/*'
|
105
109
|
- 'lib/red_amber/data_frame.rb' # 162
|
106
110
|
- 'lib/red_amber/group.rb' # 105
|
111
|
+
- 'lib/red_amber/subframes.rb' # 110
|
107
112
|
- 'lib/red_amber/vector.rb' # 152
|
113
|
+
- 'lib/red_amber/vector_binary_element_wise.rb' # 109
|
108
114
|
|
109
115
|
# Only for monitoring. I will measure by PerceivedComplexity.
|
110
116
|
# Max: 7
|
@@ -113,14 +119,17 @@ Metrics/ClassLength:
|
|
113
119
|
Metrics/CyclomaticComplexity:
|
114
120
|
Max: 12
|
115
121
|
AllowedMethods: [
|
116
|
-
'
|
122
|
+
'split', # 33
|
117
123
|
'format_table', # 21
|
124
|
+
'normalize_element', # 17
|
118
125
|
'slice_by', # 16
|
126
|
+
'assign_update', # 14
|
127
|
+
'join', # 14
|
128
|
+
'parse_range', # 14
|
119
129
|
'remove', # 14
|
120
|
-
'normalize_element', # 17
|
121
130
|
'[]', # 13
|
122
|
-
'
|
123
|
-
'
|
131
|
+
'drop', # 13
|
132
|
+
'aggregate', # 13
|
124
133
|
]
|
125
134
|
|
126
135
|
# Max: 10
|
@@ -134,6 +143,8 @@ Metrics/MethodLength:
|
|
134
143
|
'format_table', # 53
|
135
144
|
'slice_by', # 38
|
136
145
|
'assign_update', # 35
|
146
|
+
'drop', # 32
|
147
|
+
'aggregate', # 31
|
137
148
|
]
|
138
149
|
|
139
150
|
# Max: 100
|
@@ -163,17 +174,24 @@ Metrics/ParameterLists:
|
|
163
174
|
Metrics/PerceivedComplexity:
|
164
175
|
Max: 10
|
165
176
|
AllowedMethods: [
|
166
|
-
'join', # 14
|
167
|
-
'dataframe_info', # 13
|
168
177
|
'format_table', # 22
|
169
178
|
'slice_by', # 20
|
170
|
-
'remove', # 14
|
171
|
-
'drop', # 12
|
172
|
-
'filters', # 11
|
173
179
|
'normalize_element', # 17
|
174
|
-
'
|
180
|
+
'assign_update', # 15
|
175
181
|
'parse_range', # 15
|
182
|
+
'join', # 14
|
183
|
+
'remove', # 14
|
176
184
|
'split', # 14
|
185
|
+
'dataframe_info', # 13
|
186
|
+
'replace', # 13
|
187
|
+
'drop', # 12
|
188
|
+
'initialize', # 12
|
189
|
+
'aggregate', # 12
|
190
|
+
'[]', # 11
|
191
|
+
'filters', # 11
|
192
|
+
'html_table', # 11
|
193
|
+
'slice', # 11
|
194
|
+
'pick', # 11
|
177
195
|
]
|
178
196
|
|
179
197
|
# Offense count: 1
|
@@ -197,6 +215,18 @@ Naming/PredicateName:
|
|
197
215
|
- 'lib/red_amber/vector_functions.rb'
|
198
216
|
- 'lib/red_amber/vector_selectable.rb'
|
199
217
|
|
218
|
+
# avoid unused variable asignment
|
219
|
+
Rubycw/Rubycw:
|
220
|
+
Exclude:
|
221
|
+
- 'test/**/*'
|
222
|
+
- 'bin/example'
|
223
|
+
|
224
|
+
# Offense count: 16
|
225
|
+
# This cop supports safe autocorrection (--autocorrect).
|
226
|
+
Style/OperatorMethodCall:
|
227
|
+
Exclude:
|
228
|
+
- 'test/test_vector_binary_element_wise.rb'
|
229
|
+
|
200
230
|
# Necessary to test when range.end == -1
|
201
231
|
# Offense count: 2
|
202
232
|
# This cop supports unsafe autocorrection (--autocorrect-all).
|
@@ -204,6 +234,10 @@ Style/SlicingWithRange:
|
|
204
234
|
Exclude:
|
205
235
|
- 'test/test_data_frame_selectable.rb'
|
206
236
|
|
237
|
+
Style/MixinUsage:
|
238
|
+
Exclude:
|
239
|
+
- 'bin/example'
|
240
|
+
|
207
241
|
# Necessary to Vector < 0 element-wise comparison
|
208
242
|
# Offense count: 5
|
209
243
|
# This cop supports unsafe autocorrection (--autocorrect-all).
|
data/.yardopts
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,181 @@
|
|
1
|
+
## [0.4.1] - 2023-03-11
|
2
|
+
|
3
|
+
- Breaking change
|
4
|
+
- Remove Vector.aggregate? method (#200)
|
5
|
+
|
6
|
+
- Bug fixes
|
7
|
+
- Return self in DataFrame#drop when dropper is empty (reverts 746ac263) (#193)
|
8
|
+
- Return self in DataFrame#rename when renaming to same name (#193)
|
9
|
+
- Return self in DataFrame#pick when pick itself (#199)
|
10
|
+
- Fix column width for non-ascii elemnts in DataFrame#to_s (#193)
|
11
|
+
- This change uses String#width.
|
12
|
+
- Fix DataFrame#to_iruby when data is date32 type (#193)
|
13
|
+
- Fix DataFrame#shorthand to show temporal type data simply (#193)
|
14
|
+
- Fix Vector#rank when data is ChunkedArray (#198)
|
15
|
+
- Fix Vector element-wise functions with nil as scalar (#198)
|
16
|
+
- Support :force_order for all methods of join family (#199)
|
17
|
+
- Supports :force_order option to force sorting after join for all #join familiy.
|
18
|
+
- This will valuable in some cases such as large dataframes.
|
19
|
+
- Ensure baseframe's schema for SubFrames (#200)
|
20
|
+
|
21
|
+
- New features and improvements
|
22
|
+
- Add Vector#first, #last method (#198)
|
23
|
+
- This method will be used in SubFrames feature.
|
24
|
+
- Add Vector#modulo method (#198)
|
25
|
+
- The divmod function in Arrow C++ is still in draft state.
|
26
|
+
This method was created by combining existing functions
|
27
|
+
- Add Vector#quotient method (#198)
|
28
|
+
- Add aliases #div, #mod, #mul, #pow, #quo and #sub for Vector (#198)
|
29
|
+
- Add Vector#*_checked functions (#198)
|
30
|
+
- This functions will check numeric range overflow.
|
31
|
+
- Add 'tdra' and 'plain' in display mode (#193)
|
32
|
+
- The plain mode and default inspect will show up to 128 rows and 128 columns.
|
33
|
+
- Add String#width method in refinements (#193)
|
34
|
+
- This will be used to update DataFrame#to_s.
|
35
|
+
- Introduce pre-loaded REPL environment (#199)
|
36
|
+
- This commit will add bin/example and it will start irb environment
|
37
|
+
with enabled commonly used datasets such as penguins, diamonds, etc.
|
38
|
+
- Upgrade SubFrames#aggregate to accept block (#200)
|
39
|
+
|
40
|
+
- Refactoring
|
41
|
+
- Use symbolized keys in refinements of Table#keys, #key? (#193)
|
42
|
+
- This can be treat Tables and DataFrames as same manner.
|
43
|
+
- Use key_name.succ in suffix of DataFrame#join (#193)
|
44
|
+
- This will make simple to get name candidate.
|
45
|
+
- Use ||= to memorize instance variables (#193)
|
46
|
+
- Refine vector projection to use #variables (#193)
|
47
|
+
- #variables is fastest when picking Vectors.
|
48
|
+
- Refine Vector#is_in to avoid #pack (#198)
|
49
|
+
- Refine Vector#index (#198)
|
50
|
+
|
51
|
+
- Improve in tests/CI
|
52
|
+
- Tests
|
53
|
+
- Update benchmarks to test from older version (#193)
|
54
|
+
- Refine test of Vector function with scalar (#198)
|
55
|
+
- Refine test subframes and test_vector_selectable (#200)
|
56
|
+
|
57
|
+
- Cops
|
58
|
+
- CI
|
59
|
+
|
60
|
+
- Documentation
|
61
|
+
- Update documents(small fix) (#201)
|
62
|
+
|
63
|
+
- GitHub site
|
64
|
+
|
65
|
+
- Thanks
|
66
|
+
|
67
|
+
## [0.4.0] - 2023-02-25
|
68
|
+
|
69
|
+
- Breaking change
|
70
|
+
- Upgrade dependency to Arrow 11.0.0 (#188)
|
71
|
+
|
72
|
+
- Bug fixes
|
73
|
+
- Add :force_order option for DataFrame#join (#174)
|
74
|
+
- Return error for empty DataFrame in DataFrame#filter (#172)
|
75
|
+
- Accept ChunkedArray in DataFrame#filter (#172)
|
76
|
+
- Fix Vector#replace to accept Arrow::Array as a replacer (#179)
|
77
|
+
- Fix Vector#round_to_multiple to accept Float or Integer (#180)
|
78
|
+
- Change Vector atan2 to a class method (#180)
|
79
|
+
- Fix Vector#shift when boolean Vector (#184)
|
80
|
+
- Fix processing empty SubFrames (#183)
|
81
|
+
- Do not check object id in DataFrame#rename, #drop for self (#188)
|
82
|
+
|
83
|
+
- New features and improvements
|
84
|
+
- Accept a block in DataFrame#filter (#172)
|
85
|
+
- Add Vector.aggregate? method (#175)
|
86
|
+
- Introduce Vector#propagate method (#175)
|
87
|
+
- Add Vector#rank methods (#176)
|
88
|
+
- Add Vector#sample method (#176)
|
89
|
+
- Add Vector#sort method (#176)
|
90
|
+
- Promote DataFrame#shape_str to public (#184)
|
91
|
+
- Introduce Vector#concatenate (#184)
|
92
|
+
- Add #numeric? in refinements of Array (#184)
|
93
|
+
- Add Vector#cumulative_sum_checked and #cumsum (#184)
|
94
|
+
- Add Vector#resolve method (#184)
|
95
|
+
- Add DataFrame#tdra method (#184)
|
96
|
+
- Add #expand as an alias for Vector#propagate (#184)
|
97
|
+
- Add #glimpse as an alias for DataFrame#tdr (#184)
|
98
|
+
- New class SubFrames (#183)
|
99
|
+
- Introduce class SubFrames
|
100
|
+
- Memorize dataframes in SubFrames
|
101
|
+
- Add @frames to memorize sub DataFrames
|
102
|
+
- Accept filters in SubFrames.new
|
103
|
+
- Accept block in SubFrames.new
|
104
|
+
- Add SubFrames.by_filter
|
105
|
+
- Introduce methods creating SubFrames from DataFrame
|
106
|
+
- Introduce SubFrames#each method
|
107
|
+
- Add SubFrames#to_s method
|
108
|
+
- Add SubFrames#concatenate method
|
109
|
+
- Add SubFrames#offset_indices method
|
110
|
+
- SubFrames#aggregate method
|
111
|
+
- Redefine SubFrames#map to return SubFrames
|
112
|
+
- Define SubFrame#map dynamically
|
113
|
+
- Add SubFrames#assign method
|
114
|
+
- Redefine SubFrames#select to return SubFrames
|
115
|
+
- Add SubFrames#reject method
|
116
|
+
- Add SubFrames#filter_map method
|
117
|
+
- Refine DataFrame#indices memorizing @indices
|
118
|
+
- Rename SubFrames#universal_frame as #baseframe
|
119
|
+
- Set Group iteration feature to @api private
|
120
|
+
|
121
|
+
- Refactoring
|
122
|
+
- Generate Vector functions in class method (#177)
|
123
|
+
- Set Constant visibility to private (#179)
|
124
|
+
- Separate test_vector_function (#179)
|
125
|
+
- Relocate methods in DataFrameIndexable (#179)
|
126
|
+
- Rename Array refinements to the same name as Vector (#184)
|
127
|
+
|
128
|
+
- Improve in tests/CI
|
129
|
+
- Tests
|
130
|
+
- Update benchmarks to set 0.3.0 as a reference (#167)
|
131
|
+
- Move test of Vector#logb to proper location (#180)
|
132
|
+
|
133
|
+
- Cops
|
134
|
+
- Update .rubocop.yml to align with latest cops (#174)
|
135
|
+
- Unify style of MethodCallIndentation as relative to reciever (#184)
|
136
|
+
|
137
|
+
- CI
|
138
|
+
- Fix setting up Arrow by homebrew in CI (#167)
|
139
|
+
- Fix CI error on homebrew deleting python link (#167)
|
140
|
+
- Set cache-version to get new C extensions in CI (#173)
|
141
|
+
- Thanks to @kou for suggestion.
|
142
|
+
|
143
|
+
- Documentation
|
144
|
+
- Update DataFrame.md about loading csv without headers (#165)
|
145
|
+
- Thanks to kojix2
|
146
|
+
- Update YARD in DataFrame combinable (#168)
|
147
|
+
- Update comment for Ruby 2.7 support in README.md
|
148
|
+
- Update license year
|
149
|
+
- Update README (#172)
|
150
|
+
- Update Vector.md and yardoc in #propagate (#175)
|
151
|
+
- Use customized style sheet for YARD (#179)
|
152
|
+
- Add examples for the doc of #pick and #drop (#179)
|
153
|
+
- Add examples to YARD in DataFrame reshaping methods (#179)
|
154
|
+
- Update documents in DataFrameDisplayable (#179)
|
155
|
+
- Update documents in DataFrameVariableOperation (#179)
|
156
|
+
- Update document for dynamically generated methods (#179)
|
157
|
+
- Unify style in document (#179)
|
158
|
+
- Update documents in DataFrameSelectable (#179)
|
159
|
+
- Update documents of basic Vector methods (#179)
|
160
|
+
- Update document in VectorUpdatable (#179)
|
161
|
+
- Update document of Group (#179)
|
162
|
+
- Update document of DataFrameLoadSave (#180)
|
163
|
+
- Add examples for document of ArrowFunction (#180)
|
164
|
+
- Update document of Vector_unary_aggregation (#180)
|
165
|
+
- Update document of Vector_unary_element_wise (#180)
|
166
|
+
- Update document of Vector_biary_element_wise (#180)
|
167
|
+
- Add documentation to give comparison of dataframes(#169)
|
168
|
+
- Thanks to Benson Muite
|
169
|
+
- Update documents for consistency of method indentation (#189)
|
170
|
+
- Update CHANGELOG (#189)
|
171
|
+
- Update README for 0.4.0 (#189)
|
172
|
+
|
173
|
+
- GitHub site
|
174
|
+
|
175
|
+
- Thanks
|
176
|
+
- kojix2
|
177
|
+
- Benson Muite
|
178
|
+
|
1
179
|
## [0.3.0] - 2022-12-18
|
2
180
|
|
3
181
|
- Breaking change
|
data/Gemfile
CHANGED
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License (MIT)
|
2
2
|
|
3
|
-
Copyright (c) 2022 Hirokazu SUZUKI (heronshoes)
|
3
|
+
Copyright (c) 2022-2023 Hirokazu SUZUKI (heronshoes)
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# RedAmber
|
2
2
|
|
3
|
-
[![Gem Version](https://
|
3
|
+
[![Gem Version](https://img.shields.io/gem/v/red_amber?color=brightgreen)](https://rubygems.org/gems/red_amber)
|
4
4
|
[![CI](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml/badge.svg)](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml)
|
5
5
|
[![Maintainability](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/maintainability)](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
|
6
6
|
[![Test coverage](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/test_coverage)](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
|
@@ -10,34 +10,32 @@
|
|
10
10
|
A simple dataframe library for Ruby.
|
11
11
|
|
12
12
|
- Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
|
13
|
-
[![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en)
|
13
|
+
[![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en) [![Gem Version](https://img.shields.io/gem/v/red-arrow?color=brightgreen)](https://rubygems.org/gems/red-arrow)
|
14
14
|
- Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
|
15
15
|
|
16
16
|
![screenshot from jupyterlab](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/screenshot.png)
|
17
17
|
|
18
18
|
## Requirements
|
19
|
-
|
19
|
+
### Ruby
|
20
20
|
Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
|
21
|
+
- I decided to remove support for Ruby 2.7 without waiting for its EOL. See [Release note for v0.3.0](https://github.com/heronshoes/red_amber/discussions/162) for details.
|
21
22
|
|
22
|
-
|
23
|
-
|
23
|
+
### Libraries
|
24
24
|
```ruby
|
25
|
-
#
|
26
|
-
gem 'red-
|
27
|
-
|
28
|
-
gem 'red-parquet', '~> 10.0.0' # Optional, if you use IO from/to parquet
|
25
|
+
gem 'red-arrow', '~> 11.0.0' # Requires Apache Arrow (see installation below)
|
26
|
+
gem 'red-parquet', '~> 11.0.0' # Optional, if you use IO from/to parquet
|
29
27
|
gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
30
28
|
```
|
31
29
|
|
32
30
|
## Installation
|
33
31
|
|
34
|
-
Install requirements before you install
|
32
|
+
Install requirements before you install RedAmber.
|
35
33
|
|
36
|
-
- Apache Arrow (~>
|
37
|
-
- Apache Arrow GLib (~>
|
38
|
-
- Apache Parquet GLib (~>
|
34
|
+
- Apache Arrow (~> 11.0.0)
|
35
|
+
- Apache Arrow GLib (~> 11.0.0)
|
36
|
+
- Apache Parquet GLib (~> 11.0.0) # If you use IO from/to parquet
|
39
37
|
|
40
|
-
|
38
|
+
See [Apache Arrow install document](https://arrow.apache.org/install/).
|
41
39
|
|
42
40
|
- Minimum installation example for the latest Ubuntu:
|
43
41
|
|
@@ -58,43 +56,44 @@ Install requirements before you install Red Amber.
|
|
58
56
|
sudo dnf -y install gcc-c++ libarrow-devel libarrow-glib-devel ruby-devel
|
59
57
|
```
|
60
58
|
|
61
|
-
- On macOS,
|
59
|
+
- On macOS, using Homebrew:
|
62
60
|
|
63
61
|
```
|
64
62
|
brew install apache-arrow
|
65
|
-
```
|
66
|
-
|
67
|
-
and GLib (C) package with:
|
68
|
-
|
69
|
-
```
|
70
63
|
brew install apache-arrow-glib
|
71
64
|
```
|
72
65
|
|
73
66
|
If you prepared Apache Arrow, add these lines to your Gemfile:
|
74
67
|
|
75
68
|
```ruby
|
76
|
-
gem 'red-arrow', '~>
|
69
|
+
gem 'red-arrow', '~> 11.0.0'
|
77
70
|
gem 'red_amber'
|
78
|
-
gem 'red-parquet', '~>
|
71
|
+
gem 'red-parquet', '~> 11.0.0' # Optional, if you use IO from/to parquet
|
79
72
|
gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
80
73
|
gem 'red-datasets-arrow' # Optional, recommended if you use Red Datasets
|
81
74
|
gem 'red-arrow-numo-narray' # Optional, recommended if you use inputs from Numo::NArray
|
82
75
|
```
|
83
76
|
|
84
|
-
And then execute `bundle install` or install
|
77
|
+
And then execute `bundle install` or install them yourself such as `gem install red_amber`.
|
85
78
|
|
86
79
|
## Docker image and Jupyter Notebook
|
87
80
|
|
88
|
-
[RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to
|
81
|
+
[RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to Kenta Murata).
|
89
82
|
|
90
83
|
Also you can try the contents of this README interactively by [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb).
|
91
84
|
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb)
|
92
85
|
|
86
|
+
## Comparison of DataFrames
|
87
|
+
|
88
|
+
Comparison of basic features of RedAmber with Python
|
89
|
+
[pandas](https://pandas.pydata.org/),
|
90
|
+
R [Tidyverse](https://www.tidyverse.org/) and
|
91
|
+
Julia [Dataframes](https://dataframes.juliadata.org/stable/) is in [DataFrame_Comparison.md](doc/DataFrame_Comparison.md) (Thanks to Benson Muite).
|
93
92
|
|
94
93
|
## Data frame in `RedAmber`
|
95
94
|
|
96
95
|
Class `RedAmber::DataFrame` represents a set of data in 2D-shape.
|
97
|
-
|
96
|
+
Its entity is a Red Arrow's Table object.
|
98
97
|
|
99
98
|
![dataframe model of RedAmber](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/dataframe_model.png)
|
100
99
|
|
@@ -116,7 +115,7 @@ then
|
|
116
115
|
require 'datasets-arrow' # to load sample data
|
117
116
|
|
118
117
|
dataset = Datasets::Diamonds.new
|
119
|
-
diamonds = DataFrame.new(dataset) #
|
118
|
+
diamonds = DataFrame.new(dataset) # before v0.2.3, should be `dataset.to_arrow`
|
120
119
|
|
121
120
|
# =>
|
122
121
|
#<RedAmber::DataFrame : 53940 x 10 Vectors, 0x000000000000f668>
|
@@ -137,7 +136,7 @@ For example, we can compute mean prices per cut for the data larger than 1 carat
|
|
137
136
|
|
138
137
|
```ruby
|
139
138
|
df = diamonds
|
140
|
-
.slice { carat > 1 }
|
139
|
+
.slice { carat > 1 } # or use #filter instead of #slice
|
141
140
|
.group(:cut)
|
142
141
|
.mean(:price) # `pick` prior to `group` is not required if `:price` is specified here.
|
143
142
|
.sort('-mean(price)')
|
@@ -175,7 +174,7 @@ df.rename('mean(price)': :mean_price_USD)
|
|
175
174
|
|
176
175
|
### Example: starwars dataset
|
177
176
|
|
178
|
-
Next example is `starwars` dataset reading from the downloaded CSV file. Followed by minimum data
|
177
|
+
Next example is `starwars` dataset reading from the downloaded CSV file. Followed by minimum data cleaning.
|
179
178
|
|
180
179
|
```ruby
|
181
180
|
uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')
|
@@ -186,7 +185,7 @@ starwars
|
|
186
185
|
.drop(0) # delete unnecessary index column
|
187
186
|
.remove { species == "NA" } # delete unnecessary rows
|
188
187
|
.group(:species) { [count(:species), mean(:height, :mass)] }
|
189
|
-
.slice { count > 1 }
|
188
|
+
.slice { count > 1 } # or use #filter instead of slice
|
190
189
|
|
191
190
|
# =>
|
192
191
|
#<RedAmber::DataFrame : 8 x 4 Vectors, 0x000000000000f848>
|
@@ -213,7 +212,7 @@ See [Vector.md](doc/Vector.md) for details.
|
|
213
212
|
|
214
213
|
## Jupyter notebook
|
215
214
|
|
216
|
-
[
|
215
|
+
[Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
|
217
216
|
([raw file](https://raw.githubusercontent.com/heronshoes/docker-stacks/RedAmber-binder/binder/examples_of_red_amber.ipynb)) shows more examples in jupyter notebook.
|
218
217
|
|
219
218
|
You can try this notebook on [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=examples_of_red_amber.ipynb).
|
data/benchmark/basic.yml
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name:
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.2.3
|
4
|
+
- name: 0.1.5
|
8
5
|
gems:
|
9
|
-
red_amber: 0.
|
6
|
+
red_amber: 0.1.5
|
10
7
|
- name: 0.2.0
|
11
8
|
gems:
|
12
9
|
red_amber: 0.2.0
|
13
|
-
- name: 0.
|
10
|
+
- name: 0.3.0
|
14
11
|
gems:
|
15
|
-
red_amber: 0.
|
12
|
+
red_amber: 0.3.0
|
13
|
+
- name: HEAD
|
14
|
+
prelude: |
|
15
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
16
16
|
|
17
17
|
prelude: |
|
18
18
|
require 'red_amber'
|
data/benchmark/combine.yml
CHANGED
data/benchmark/dataframe.yml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.2.3
|
8
|
-
gems:
|
9
|
-
red_amber: 0.2.3
|
10
4
|
- name: 0.2.0
|
11
5
|
gems:
|
12
6
|
red_amber: 0.2.0
|
7
|
+
- name: 0.3.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.3.0
|
10
|
+
- name: HEAD
|
11
|
+
prelude: |
|
12
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
13
13
|
|
14
14
|
prelude: |
|
15
15
|
require 'red_amber'
|
@@ -19,8 +19,14 @@ prelude: |
|
|
19
19
|
|
20
20
|
starwars = RedAmber::DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars').to_arrow)
|
21
21
|
|
22
|
-
|
23
|
-
|
22
|
+
import_cars = RedAmber::DataFrame.load(Arrow::Buffer.new(<<~TSV), format: :tsv)
|
23
|
+
Year Audi BMW BMW_MINI Mercedes-Benz VW
|
24
|
+
2017 28336 52527 25427 68221 49040
|
25
|
+
2018 26473 50982 25984 67554 51961
|
26
|
+
2019 24222 46814 23813 66553 46794
|
27
|
+
2020 22304 35712 20196 57041 36576
|
28
|
+
2021 22535 35905 18211 51722 35215
|
29
|
+
TSV
|
24
30
|
|
25
31
|
ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
|
26
32
|
simpsons_paradox_covid = RedAmber::DataFrame.new(ds.to_arrow)
|
@@ -43,7 +49,7 @@ benchmark:
|
|
43
49
|
.group(:species) { [count(:species), mean(:height, :mass)] }
|
44
50
|
.slice { v(:count) > 1 }
|
45
51
|
|
46
|
-
'D03:
|
52
|
+
'D03: Import cars test': |
|
47
53
|
import_cars
|
48
54
|
.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
|
49
55
|
.to_wide(name: :Manufacturer, value: :Num_of_imported)
|
data/benchmark/group.yml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.2.3
|
8
|
-
gems:
|
9
|
-
red_amber: 0.2.3
|
10
4
|
- name: 0.2.2
|
11
5
|
gems:
|
12
6
|
red_amber: 0.2.2
|
7
|
+
- name: 0.3.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.3.0
|
10
|
+
- name: HEAD
|
11
|
+
prelude: |
|
12
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
13
13
|
|
14
14
|
prelude: |
|
15
15
|
require 'red_amber'
|
data/benchmark/reshape.yml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.2.3
|
8
|
-
gems:
|
9
|
-
red_amber: 0.2.3
|
10
4
|
- name: 0.2.2
|
11
5
|
gems:
|
12
6
|
red_amber: 0.2.2
|
7
|
+
- name: 0.3.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.3.0
|
10
|
+
- name: HEAD
|
11
|
+
prelude: |
|
12
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
13
13
|
|
14
14
|
prelude: |
|
15
15
|
require 'red_amber'
|
data/benchmark/vector.yml
CHANGED
@@ -1,12 +1,15 @@
|
|
1
1
|
loop_count: 10
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
4
|
- name: 0.2.0
|
8
5
|
gems:
|
9
6
|
red_amber: 0.2.0
|
7
|
+
- name: 0.3.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.3.0
|
10
|
+
- name: HEAD
|
11
|
+
prelude: |
|
12
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
10
13
|
|
11
14
|
prelude: |
|
12
15
|
require 'red_amber'
|