red_amber 0.4.0 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +20 -5
- data/CHANGELOG.md +104 -4
- data/README.md +18 -16
- data/benchmark/basic.yml +8 -8
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -6
- data/doc/CODE_OF_CONDUCT.md +1 -1
- data/docker/.env +4 -0
- data/docker/Dockerfile +66 -0
- data/docker/Gemfile +21 -0
- data/docker/Gemfile.lock +80 -0
- data/docker/docker-compose.yml +21 -0
- data/docker/example +74 -0
- data/docker/notebook/examples_of_red_amber.ipynb +8562 -0
- data/docker/notebook/red-amber.ipynb +188 -0
- data/docker/readme.md +118 -0
- data/lib/red_amber/data_frame.rb +25 -10
- data/lib/red_amber/data_frame_combinable.rb +117 -73
- data/lib/red_amber/data_frame_displayable.rb +100 -51
- data/lib/red_amber/data_frame_indexable.rb +4 -4
- data/lib/red_amber/data_frame_reshaping.rb +1 -1
- data/lib/red_amber/data_frame_selectable.rb +1 -4
- data/lib/red_amber/data_frame_variable_operation.rb +7 -2
- data/lib/red_amber/group.rb +17 -18
- data/lib/red_amber/helper.rb +4 -4
- data/lib/red_amber/refinements.rb +15 -2
- data/lib/red_amber/subframes.rb +319 -191
- data/lib/red_amber/vector.rb +7 -30
- data/lib/red_amber/vector_binary_element_wise.rb +149 -1
- data/lib/red_amber/vector_selectable.rb +49 -12
- data/lib/red_amber/vector_unary_element_wise.rb +93 -0
- data/lib/red_amber/version.rb +1 -1
- data/red_amber.gemspec +3 -3
- metadata +16 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 498f580bc6fc79e36b199cc3ec62c38638e4df903e956a7a78947d9091748d0c
|
4
|
+
data.tar.gz: 24c3f25ff47b6bba0af26d1a1c77a80a56c2115e0bd76d26ba3f769f71d3557b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '085caa83703f4b9be0a3baae3318f0bcb606bbf264347ad3db431b29fb59c87e05b142ab1fd7e8a0af0a3d64b0b81055f1e85d82414f773d01456cf5a386df25'
|
7
|
+
data.tar.gz: 901e7eda6560eb2a9ab378d672b39d3ba08ae47012d8465ce541a465d744ce45d93a451ef2bff8fdf838fc1040b12facc58c87d95c6f6bedb89c5c138b95005c
|
data/.rubocop.yml
CHANGED
@@ -44,17 +44,16 @@ Layout/LineLength:
|
|
44
44
|
Layout/MultilineMethodCallIndentation:
|
45
45
|
EnforcedStyle: indented_relative_to_receiver
|
46
46
|
|
47
|
-
# avoid unused variable asignment
|
48
|
-
Rubycw/Rubycw:
|
49
|
-
Exclude:
|
50
|
-
- 'test/**/*'
|
51
|
-
|
52
47
|
# Disabled to define Vector operators
|
53
48
|
# Offense count: 38
|
54
49
|
Lint/BinaryOperatorWithIdenticalOperands:
|
55
50
|
Exclude:
|
56
51
|
- 'test/test_vector_binary_element_wise.rb'
|
57
52
|
|
53
|
+
Lint/Debugger:
|
54
|
+
Exclude:
|
55
|
+
- 'docker/example'
|
56
|
+
|
58
57
|
# Need for test with empty block
|
59
58
|
# Offense count: 1
|
60
59
|
# Configuration parameters: AllowComments, AllowEmptyLambdas.
|
@@ -87,6 +86,7 @@ Metrics/AbcSize:
|
|
87
86
|
'drop', # 31.42
|
88
87
|
'[]', # 33.76
|
89
88
|
'split', # 37.35
|
89
|
+
'aggregate', # 38.13
|
90
90
|
]
|
91
91
|
|
92
92
|
# Max: 25
|
@@ -110,6 +110,7 @@ Metrics/ClassLength:
|
|
110
110
|
- 'lib/red_amber/group.rb' # 105
|
111
111
|
- 'lib/red_amber/subframes.rb' # 110
|
112
112
|
- 'lib/red_amber/vector.rb' # 152
|
113
|
+
- 'lib/red_amber/vector_binary_element_wise.rb' # 109
|
113
114
|
|
114
115
|
# Only for monitoring. I will measure by PerceivedComplexity.
|
115
116
|
# Max: 7
|
@@ -127,6 +128,8 @@ Metrics/CyclomaticComplexity:
|
|
127
128
|
'parse_range', # 14
|
128
129
|
'remove', # 14
|
129
130
|
'[]', # 13
|
131
|
+
'drop', # 13
|
132
|
+
'aggregate', # 13
|
130
133
|
]
|
131
134
|
|
132
135
|
# Max: 10
|
@@ -140,6 +143,7 @@ Metrics/MethodLength:
|
|
140
143
|
'format_table', # 53
|
141
144
|
'slice_by', # 38
|
142
145
|
'assign_update', # 35
|
146
|
+
'drop', # 32
|
143
147
|
'aggregate', # 31
|
144
148
|
]
|
145
149
|
|
@@ -187,6 +191,7 @@ Metrics/PerceivedComplexity:
|
|
187
191
|
'filters', # 11
|
188
192
|
'html_table', # 11
|
189
193
|
'slice', # 11
|
194
|
+
'pick', # 11
|
190
195
|
]
|
191
196
|
|
192
197
|
# Offense count: 1
|
@@ -210,6 +215,12 @@ Naming/PredicateName:
|
|
210
215
|
- 'lib/red_amber/vector_functions.rb'
|
211
216
|
- 'lib/red_amber/vector_selectable.rb'
|
212
217
|
|
218
|
+
# avoid unused variable asignment
|
219
|
+
Rubycw/Rubycw:
|
220
|
+
Exclude:
|
221
|
+
- 'test/**/*'
|
222
|
+
- 'docker/example'
|
223
|
+
|
213
224
|
# Offense count: 16
|
214
225
|
# This cop supports safe autocorrection (--autocorrect).
|
215
226
|
Style/OperatorMethodCall:
|
@@ -223,6 +234,10 @@ Style/SlicingWithRange:
|
|
223
234
|
Exclude:
|
224
235
|
- 'test/test_data_frame_selectable.rb'
|
225
236
|
|
237
|
+
Style/MixinUsage:
|
238
|
+
Exclude:
|
239
|
+
- 'docker/example'
|
240
|
+
|
226
241
|
# Necessary to Vector < 0 element-wise comparison
|
227
242
|
# Offense count: 5
|
228
243
|
# This cop supports unsafe autocorrection (--autocorrect-all).
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,105 @@
|
|
1
|
-
## [0.4.
|
1
|
+
## [0.4.2] - 2023-04-02
|
2
|
+
|
3
|
+
- Breaking change
|
4
|
+
|
5
|
+
- Bug fixes
|
6
|
+
- Fix Vector#modulo, #fdiv, #remainder (#203)
|
7
|
+
|
8
|
+
- New features and improvements
|
9
|
+
- Update SubFrames#take to return SubFrames (#212)
|
10
|
+
|
11
|
+
- Refactoring
|
12
|
+
- Refine SubFrames to support partial retrieval (#207)
|
13
|
+
- Upgrade SubFrames#frames and promote to public (#207)
|
14
|
+
- Use faster count in Group#inspect (#207)
|
15
|
+
|
16
|
+
- Improve in tests/CI
|
17
|
+
|
18
|
+
- Documentation and Example
|
19
|
+
- Introduce minimum docker environment (#205)
|
20
|
+
- Move example REPL to docker (#205)
|
21
|
+
- Add readme.md in docker (#205)
|
22
|
+
- Add example_of_red_amber.ipynb (#205)
|
23
|
+
- Use smaller dataset in irb example
|
24
|
+
- Fix docker/example
|
25
|
+
- Updated link to red-data-tools (#213)
|
26
|
+
- Thanks to Soumya Kushwaha
|
27
|
+
|
28
|
+
- GitHub site
|
29
|
+
- Migrated to [Red Data Tools](https://github.com/red-data-tools)
|
30
|
+
- Thanks to Sutou Kouhei
|
31
|
+
|
32
|
+
- Thanks
|
33
|
+
- Sutou Kouhei
|
34
|
+
- Soumya Kushwaha
|
35
|
+
|
36
|
+
## [0.4.1] - 2023-03-11
|
37
|
+
|
38
|
+
- Breaking change
|
39
|
+
- Remove Vector.aggregate? method (#200)
|
2
40
|
|
3
|
-
|
41
|
+
- Bug fixes
|
42
|
+
- Return self in DataFrame#drop when dropper is empty (reverts 746ac263) (#193)
|
43
|
+
- Return self in DataFrame#rename when renaming to same name (#193)
|
44
|
+
- Return self in DataFrame#pick when pick itself (#199)
|
45
|
+
- Fix column width for non-ascii elemnts in DataFrame#to_s (#193)
|
46
|
+
- This change uses String#width.
|
47
|
+
- Fix DataFrame#to_iruby when data is date32 type (#193)
|
48
|
+
- Fix DataFrame#shorthand to show temporal type data simply (#193)
|
49
|
+
- Fix Vector#rank when data is ChunkedArray (#198)
|
50
|
+
- Fix Vector element-wise functions with nil as scalar (#198)
|
51
|
+
- Support :force_order for all methods of join family (#199)
|
52
|
+
- Supports :force_order option to force sorting after join for all #join familiy.
|
53
|
+
- This will valuable in some cases such as large dataframes.
|
54
|
+
- Ensure baseframe's schema for SubFrames (#200)
|
55
|
+
|
56
|
+
- New features and improvements
|
57
|
+
- Add Vector#first, #last method (#198)
|
58
|
+
- This method will be used in SubFrames feature.
|
59
|
+
- Add Vector#modulo method (#198)
|
60
|
+
- The divmod function in Arrow C++ is still in draft state.
|
61
|
+
This method was created by combining existing functions
|
62
|
+
- Add Vector#quotient method (#198)
|
63
|
+
- Add aliases #div, #mod, #mul, #pow, #quo and #sub for Vector (#198)
|
64
|
+
- Add Vector#*_checked functions (#198)
|
65
|
+
- This functions will check numeric range overflow.
|
66
|
+
- Add 'tdra' and 'plain' in display mode (#193)
|
67
|
+
- The plain mode and default inspect will show up to 128 rows and 128 columns.
|
68
|
+
- Add String#width method in refinements (#193)
|
69
|
+
- This will be used to update DataFrame#to_s.
|
70
|
+
- Introduce pre-loaded REPL environment (#199)
|
71
|
+
- This commit will add bin/example and it will start irb environment
|
72
|
+
with enabled commonly used datasets such as penguins, diamonds, etc.
|
73
|
+
- Upgrade SubFrames#aggregate to accept block (#200)
|
74
|
+
|
75
|
+
- Refactoring
|
76
|
+
- Use symbolized keys in refinements of Table#keys, #key? (#193)
|
77
|
+
- This can be treat Tables and DataFrames as same manner.
|
78
|
+
- Use key_name.succ in suffix of DataFrame#join (#193)
|
79
|
+
- This will make simple to get name candidate.
|
80
|
+
- Use ||= to memorize instance variables (#193)
|
81
|
+
- Refine vector projection to use #variables (#193)
|
82
|
+
- #variables is fastest when picking Vectors.
|
83
|
+
- Refine Vector#is_in to avoid #pack (#198)
|
84
|
+
- Refine Vector#index (#198)
|
85
|
+
|
86
|
+
- Improve in tests/CI
|
87
|
+
- Tests
|
88
|
+
- Update benchmarks to test from older version (#193)
|
89
|
+
- Refine test of Vector function with scalar (#198)
|
90
|
+
- Refine test subframes and test_vector_selectable (#200)
|
91
|
+
|
92
|
+
- Cops
|
93
|
+
- CI
|
94
|
+
|
95
|
+
- Documentation
|
96
|
+
- Update documents(small fix) (#201)
|
97
|
+
|
98
|
+
- GitHub site
|
99
|
+
|
100
|
+
- Thanks
|
101
|
+
|
102
|
+
## [0.4.0] - 2023-02-25
|
4
103
|
|
5
104
|
- Breaking change
|
6
105
|
- Upgrade dependency to Arrow 11.0.0 (#188)
|
@@ -73,7 +172,8 @@
|
|
73
172
|
- CI
|
74
173
|
- Fix setting up Arrow by homebrew in CI (#167)
|
75
174
|
- Fix CI error on homebrew deleting python link (#167)
|
76
|
-
- Set cache-version to get new C extensions in CI (#173)
|
175
|
+
- Set cache-version to get new C extensions in CI (#173)
|
176
|
+
- Thanks to @kou for suggestion.
|
77
177
|
|
78
178
|
- Documentation
|
79
179
|
- Update DataFrame.md about loading csv without headers (#165)
|
@@ -611,7 +711,7 @@
|
|
611
711
|
- Improve `DataFrame#[]`, `#slice`, `#remove` by a new engine
|
612
712
|
- It parses arguments to Vector internally.
|
613
713
|
- Used Kernel#Array to simplify code (#16) .
|
614
|
-
-
|
714
|
+
- Move `DataFrame#slice`, `#remove` to Selectable
|
615
715
|
- Refine `DataFrame#take`, `#filter` (undocumented)
|
616
716
|
|
617
717
|
- Introduce coerce in Vector (#35)
|
data/README.md
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
# RedAmber
|
2
2
|
|
3
3
|
[![Gem Version](https://img.shields.io/gem/v/red_amber?color=brightgreen)](https://rubygems.org/gems/red_amber)
|
4
|
-
[![CI](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml/badge.svg)](https://github.com/
|
4
|
+
[![CI](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml/badge.svg)](https://github.com/red-data-tools/red_amber/actions/workflows/ci.yml)
|
5
5
|
[![Maintainability](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/maintainability)](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
|
6
6
|
[![Test coverage](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/test_coverage)](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
|
7
7
|
[![Doc](https://img.shields.io/badge/docs-latest-blue)](https://heronshoes.github.io/red_amber/)
|
8
|
-
[![Discussions](https://img.shields.io/github/discussions/heronshoes/red_amber)](https://github.com/
|
8
|
+
[![Discussions](https://img.shields.io/github/discussions/heronshoes/red_amber)](https://github.com/red-data-tools/red_amber/discussions)
|
9
9
|
|
10
10
|
A simple dataframe library for Ruby.
|
11
11
|
|
@@ -13,12 +13,12 @@ A simple dataframe library for Ruby.
|
|
13
13
|
[![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en) [![Gem Version](https://img.shields.io/gem/v/red-arrow?color=brightgreen)](https://rubygems.org/gems/red-arrow)
|
14
14
|
- Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
|
15
15
|
|
16
|
-
![screenshot from jupyterlab](https://raw.githubusercontent.com/
|
16
|
+
![screenshot from jupyterlab](https://raw.githubusercontent.com/red-data-tools/red_amber/main/doc/image/screenshot.png)
|
17
17
|
|
18
18
|
## Requirements
|
19
19
|
### Ruby
|
20
20
|
Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
|
21
|
-
- I decided to remove Ruby 2.7 without waiting for EOL. See [Release note for v0.3.0](https://github.com/
|
21
|
+
- I decided to remove support for Ruby 2.7 without waiting for its EOL. See [Release note for v0.3.0](https://github.com/red-data-tools/red_amber/discussions/162) for details.
|
22
22
|
|
23
23
|
### Libraries
|
24
24
|
```ruby
|
@@ -29,7 +29,7 @@ gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
|
29
29
|
|
30
30
|
## Installation
|
31
31
|
|
32
|
-
Install requirements before you install
|
32
|
+
Install requirements before you install RedAmber.
|
33
33
|
|
34
34
|
- Apache Arrow (~> 11.0.0)
|
35
35
|
- Apache Arrow GLib (~> 11.0.0)
|
@@ -78,24 +78,26 @@ And then execute `bundle install` or install them yourself such as `gem install
|
|
78
78
|
|
79
79
|
## Docker image and Jupyter Notebook
|
80
80
|
|
81
|
-
|
81
|
+
Docker image is available from docker folder. See [readme](docker/readme.md) for instruction. Integrated Jypyter notebook is in docker/notebook folder.
|
82
82
|
|
83
|
-
|
83
|
+
You can try the contents of this README interactively by [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb).
|
84
84
|
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb)
|
85
85
|
|
86
|
+
[RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to Kenta Murata).
|
87
|
+
|
86
88
|
## Comparison of DataFrames
|
87
89
|
|
88
90
|
Comparison of basic features of RedAmber with Python
|
89
91
|
[pandas](https://pandas.pydata.org/),
|
90
92
|
R [Tidyverse](https://www.tidyverse.org/) and
|
91
|
-
Julia [Dataframes](https://dataframes.juliadata.org/stable/) is [
|
93
|
+
Julia [Dataframes](https://dataframes.juliadata.org/stable/) is in [DataFrame_Comparison.md](doc/DataFrame_Comparison.md) (Thanks to Benson Muite).
|
92
94
|
|
93
95
|
## Data frame in `RedAmber`
|
94
96
|
|
95
97
|
Class `RedAmber::DataFrame` represents a set of data in 2D-shape.
|
96
|
-
|
98
|
+
Its entity is a Red Arrow's Table object.
|
97
99
|
|
98
|
-
![dataframe model of RedAmber](https://raw.githubusercontent.com/
|
100
|
+
![dataframe model of RedAmber](https://raw.githubusercontent.com/red-data-tools/red_amber/main/doc/image/dataframe_model.png)
|
99
101
|
|
100
102
|
Let's load the library and try some examples.
|
101
103
|
|
@@ -115,7 +117,7 @@ then
|
|
115
117
|
require 'datasets-arrow' # to load sample data
|
116
118
|
|
117
119
|
dataset = Datasets::Diamonds.new
|
118
|
-
diamonds = DataFrame.new(dataset) #
|
120
|
+
diamonds = DataFrame.new(dataset) # before v0.2.3, should be `dataset.to_arrow`
|
119
121
|
|
120
122
|
# =>
|
121
123
|
#<RedAmber::DataFrame : 53940 x 10 Vectors, 0x000000000000f668>
|
@@ -174,7 +176,7 @@ df.rename('mean(price)': :mean_price_USD)
|
|
174
176
|
|
175
177
|
### Example: starwars dataset
|
176
178
|
|
177
|
-
Next example is `starwars` dataset reading from the downloaded CSV file. Followed by minimum data
|
179
|
+
Next example is `starwars` dataset reading from the downloaded CSV file. Followed by minimum data cleaning.
|
178
180
|
|
179
181
|
```ruby
|
180
182
|
uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')
|
@@ -222,7 +224,7 @@ You can try this notebook on [Binder](https://mybinder.org/v2/gh/heronshoes/dock
|
|
222
224
|
## Development
|
223
225
|
|
224
226
|
```shell
|
225
|
-
git clone https://github.com/
|
227
|
+
git clone https://github.com/red-data-tools/red_amber.git
|
226
228
|
cd red_amber
|
227
229
|
bundle install
|
228
230
|
bundle exec rake test
|
@@ -232,12 +234,12 @@ bundle exec rake test
|
|
232
234
|
|
233
235
|
I will appreciate if you could help to improve this project. Here are a few ways you can help:
|
234
236
|
|
235
|
-
- Let's talk in the [discussions](https://github.com/heronshoes/red_amber/discussions). [![Discussions](https://img.shields.io/github/discussions/heronshoes/red_amber)](https://github.com/
|
237
|
+
- Let's talk in the [discussions](https://github.com/heronshoes/red_amber/discussions). [![Discussions](https://img.shields.io/github/discussions/heronshoes/red_amber)](https://github.com/red-data-tools/red_amber/discussions)
|
236
238
|
- Browse Q and A, how to use, tips, etc.
|
237
239
|
- Ask questions you’re wondering about.
|
238
240
|
- Share ideas. The idea may be promoted to issues or pull requests.
|
239
|
-
- [Report bugs or suggest new features](https://github.com/
|
240
|
-
- Fix bugs and [submit pull requests](https://github.com/
|
241
|
+
- [Report bugs or suggest new features](https://github.com/red-data-tools/red_amber/issues)
|
242
|
+
- Fix bugs and [submit pull requests](https://github.com/red-data-tools/red_amber/pulls)
|
241
243
|
- Write, clarify, or fix documentation
|
242
244
|
|
243
245
|
## License
|
data/benchmark/basic.yml
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name:
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.3.0
|
4
|
+
- name: 0.1.5
|
8
5
|
gems:
|
9
|
-
red_amber: 0.
|
6
|
+
red_amber: 0.1.5
|
10
7
|
- name: 0.2.0
|
11
8
|
gems:
|
12
9
|
red_amber: 0.2.0
|
13
|
-
- name: 0.
|
10
|
+
- name: 0.3.0
|
14
11
|
gems:
|
15
|
-
red_amber: 0.
|
12
|
+
red_amber: 0.3.0
|
13
|
+
- name: HEAD
|
14
|
+
prelude: |
|
15
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
16
16
|
|
17
17
|
prelude: |
|
18
18
|
require 'red_amber'
|
@@ -43,7 +43,7 @@ benchmark:
|
|
43
43
|
'B06: Pick by a block': |
|
44
44
|
df.pick { keys.map { |key| key.end_with?('time') } }
|
45
45
|
|
46
|
-
'B07: Slice([]) by
|
46
|
+
'B07: Slice([]) by an index': |
|
47
47
|
df[877]
|
48
48
|
|
49
49
|
'B08: Slice by indeces': |
|
data/benchmark/combine.yml
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
4
|
- name: 0.3.0
|
8
5
|
gems:
|
9
6
|
red_amber: 0.3.0
|
7
|
+
- name: HEAD
|
8
|
+
prelude: |
|
9
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
10
10
|
|
11
11
|
prelude: |
|
12
12
|
require 'red_amber'
|
data/benchmark/dataframe.yml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.3.0
|
8
|
-
gems:
|
9
|
-
red_amber: 0.3.0
|
10
4
|
- name: 0.2.0
|
11
5
|
gems:
|
12
6
|
red_amber: 0.2.0
|
7
|
+
- name: 0.3.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.3.0
|
10
|
+
- name: HEAD
|
11
|
+
prelude: |
|
12
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
13
13
|
|
14
14
|
prelude: |
|
15
15
|
require 'red_amber'
|
@@ -19,8 +19,14 @@ prelude: |
|
|
19
19
|
|
20
20
|
starwars = RedAmber::DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars').to_arrow)
|
21
21
|
|
22
|
-
|
23
|
-
|
22
|
+
import_cars = RedAmber::DataFrame.load(Arrow::Buffer.new(<<~TSV), format: :tsv)
|
23
|
+
Year Audi BMW BMW_MINI Mercedes-Benz VW
|
24
|
+
2017 28336 52527 25427 68221 49040
|
25
|
+
2018 26473 50982 25984 67554 51961
|
26
|
+
2019 24222 46814 23813 66553 46794
|
27
|
+
2020 22304 35712 20196 57041 36576
|
28
|
+
2021 22535 35905 18211 51722 35215
|
29
|
+
TSV
|
24
30
|
|
25
31
|
ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
|
26
32
|
simpsons_paradox_covid = RedAmber::DataFrame.new(ds.to_arrow)
|
@@ -43,7 +49,7 @@ benchmark:
|
|
43
49
|
.group(:species) { [count(:species), mean(:height, :mass)] }
|
44
50
|
.slice { v(:count) > 1 }
|
45
51
|
|
46
|
-
'D03:
|
52
|
+
'D03: Import cars test': |
|
47
53
|
import_cars
|
48
54
|
.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
|
49
55
|
.to_wide(name: :Manufacturer, value: :Num_of_imported)
|
data/benchmark/group.yml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.3.0
|
8
|
-
gems:
|
9
|
-
red_amber: 0.3.0
|
10
4
|
- name: 0.2.2
|
11
5
|
gems:
|
12
6
|
red_amber: 0.2.2
|
7
|
+
- name: 0.3.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.3.0
|
10
|
+
- name: HEAD
|
11
|
+
prelude: |
|
12
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
13
13
|
|
14
14
|
prelude: |
|
15
15
|
require 'red_amber'
|
data/benchmark/reshape.yml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.3.0
|
8
|
-
gems:
|
9
|
-
red_amber: 0.3.0
|
10
4
|
- name: 0.2.2
|
11
5
|
gems:
|
12
6
|
red_amber: 0.2.2
|
7
|
+
- name: 0.3.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.3.0
|
10
|
+
- name: HEAD
|
11
|
+
prelude: |
|
12
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
13
13
|
|
14
14
|
prelude: |
|
15
15
|
require 'red_amber'
|
data/benchmark/vector.yml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
loop_count: 10
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.3.0
|
8
|
-
gems:
|
9
|
-
red_amber: 0.3.0
|
10
4
|
- name: 0.2.0
|
11
5
|
gems:
|
12
6
|
red_amber: 0.2.0
|
7
|
+
- name: 0.3.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.3.0
|
10
|
+
- name: HEAD
|
11
|
+
prelude: |
|
12
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
13
13
|
|
14
14
|
prelude: |
|
15
15
|
require 'red_amber'
|
data/doc/CODE_OF_CONDUCT.md
CHANGED
@@ -39,7 +39,7 @@ This Code of Conduct applies within all community spaces, and also applies when
|
|
39
39
|
|
40
40
|
## Enforcement
|
41
41
|
|
42
|
-
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at
|
42
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at heronshoes877@gmail.com. All complaints will be reviewed and investigated promptly and fairly.
|
43
43
|
|
44
44
|
All community leaders are obligated to respect the privacy and security of the reporter of any incident.
|
45
45
|
|
data/docker/.env
ADDED
data/docker/Dockerfile
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
# x86-64/Ubuntu-22.04/python-3.10.9/lab-3.6.1/notebook-6.5.3/2023-03-13
|
2
|
+
ARG BASE_IMAGE_TAG=295612d3ade4
|
3
|
+
|
4
|
+
FROM jupyter/minimal-notebook:$BASE_IMAGE_TAG
|
5
|
+
|
6
|
+
USER root
|
7
|
+
|
8
|
+
RUN set -eux; \
|
9
|
+
apt update; \
|
10
|
+
apt install -y --no-install-recommends \
|
11
|
+
curl \
|
12
|
+
\
|
13
|
+
# For Apache Arrow
|
14
|
+
ca-certificates \
|
15
|
+
lsb-release \
|
16
|
+
\
|
17
|
+
# Ruby 3.0.2
|
18
|
+
ruby-dev \
|
19
|
+
\
|
20
|
+
# For iruby
|
21
|
+
libczmq-dev \
|
22
|
+
libzmq3-dev
|
23
|
+
|
24
|
+
# Apache Arrow
|
25
|
+
ARG ARROW_DEB_TMP=/tmp/apache-arrow-apt-source-latest.deb
|
26
|
+
|
27
|
+
RUN set -eux; \
|
28
|
+
apt update; \
|
29
|
+
curl -sfSL -o ${ARROW_DEB_TMP} \
|
30
|
+
https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb; \
|
31
|
+
apt install -y --no-install-recommends \
|
32
|
+
${ARROW_DEB_TMP}; \
|
33
|
+
rm -f ${ARROW_DEB_TMP}; \
|
34
|
+
apt update; \
|
35
|
+
apt install -y \
|
36
|
+
libarrow-dev \
|
37
|
+
libarrow-glib-dev \
|
38
|
+
libparquet-dev \
|
39
|
+
libparquet-glib-dev \
|
40
|
+
libgirepository1.0-dev; \
|
41
|
+
rm -rf /var/lib/apt/lists/*
|
42
|
+
|
43
|
+
USER $NB_UID
|
44
|
+
|
45
|
+
WORKDIR $HOME
|
46
|
+
|
47
|
+
ENV PATH $HOME/.local/share/gem/bin:$HOME/.local/share/gem/ruby/3.0.0/bin:$PATH
|
48
|
+
|
49
|
+
# IRuby
|
50
|
+
RUN set -eux; \
|
51
|
+
gem install --user-install \
|
52
|
+
rake \
|
53
|
+
bundler \
|
54
|
+
iruby; \
|
55
|
+
iruby register --force
|
56
|
+
|
57
|
+
COPY --chown=$NB_UID:$NB_GID Gemfile $HOME
|
58
|
+
COPY --chown=$NB_UID:$NB_GID Gemfile.lock $HOME
|
59
|
+
|
60
|
+
ENV GEM_HOME=$HOME/.local/share/gem
|
61
|
+
ENV BUNDLE_PATH=$HOME/.local/share/gem
|
62
|
+
ENV BUNDLE_BIN=$HOME/.local/share/gem/bin
|
63
|
+
|
64
|
+
RUN bundle install --jobs=4
|
65
|
+
|
66
|
+
COPY --chown=$NB_UID:$NB_GID example $HOME
|
data/docker/Gemfile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source 'https://rubygems.org'
|
4
|
+
|
5
|
+
gem 'irb'
|
6
|
+
|
7
|
+
gem 'numo-narray'
|
8
|
+
gem 'red-arrow', '~> 11.0.0'
|
9
|
+
gem 'red-arrow-numo-narray'
|
10
|
+
gem 'red-parquet', '~> 11.0.0'
|
11
|
+
|
12
|
+
gem 'red_amber', '>=0.4.0'
|
13
|
+
gem 'red-amber-view'
|
14
|
+
gem 'rover-df'
|
15
|
+
|
16
|
+
gem 'red-datasets'
|
17
|
+
gem 'red-datasets-arrow'
|
18
|
+
|
19
|
+
gem 'benchmark_driver'
|
20
|
+
gem 'benchmark-ips'
|
21
|
+
gem 'faker'
|