red_amber 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +20 -5
- data/CHANGELOG.md +104 -4
- data/README.md +18 -16
- data/benchmark/basic.yml +8 -8
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -6
- data/doc/CODE_OF_CONDUCT.md +1 -1
- data/docker/.env +4 -0
- data/docker/Dockerfile +66 -0
- data/docker/Gemfile +21 -0
- data/docker/Gemfile.lock +80 -0
- data/docker/docker-compose.yml +21 -0
- data/docker/example +74 -0
- data/docker/notebook/examples_of_red_amber.ipynb +8562 -0
- data/docker/notebook/red-amber.ipynb +188 -0
- data/docker/readme.md +118 -0
- data/lib/red_amber/data_frame.rb +25 -10
- data/lib/red_amber/data_frame_combinable.rb +117 -73
- data/lib/red_amber/data_frame_displayable.rb +100 -51
- data/lib/red_amber/data_frame_indexable.rb +4 -4
- data/lib/red_amber/data_frame_reshaping.rb +1 -1
- data/lib/red_amber/data_frame_selectable.rb +1 -4
- data/lib/red_amber/data_frame_variable_operation.rb +7 -2
- data/lib/red_amber/group.rb +17 -18
- data/lib/red_amber/helper.rb +4 -4
- data/lib/red_amber/refinements.rb +15 -2
- data/lib/red_amber/subframes.rb +319 -191
- data/lib/red_amber/vector.rb +7 -30
- data/lib/red_amber/vector_binary_element_wise.rb +149 -1
- data/lib/red_amber/vector_selectable.rb +49 -12
- data/lib/red_amber/vector_unary_element_wise.rb +93 -0
- data/lib/red_amber/version.rb +1 -1
- data/red_amber.gemspec +3 -3
- metadata +16 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 498f580bc6fc79e36b199cc3ec62c38638e4df903e956a7a78947d9091748d0c
|
4
|
+
data.tar.gz: 24c3f25ff47b6bba0af26d1a1c77a80a56c2115e0bd76d26ba3f769f71d3557b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '085caa83703f4b9be0a3baae3318f0bcb606bbf264347ad3db431b29fb59c87e05b142ab1fd7e8a0af0a3d64b0b81055f1e85d82414f773d01456cf5a386df25'
|
7
|
+
data.tar.gz: 901e7eda6560eb2a9ab378d672b39d3ba08ae47012d8465ce541a465d744ce45d93a451ef2bff8fdf838fc1040b12facc58c87d95c6f6bedb89c5c138b95005c
|
data/.rubocop.yml
CHANGED
@@ -44,17 +44,16 @@ Layout/LineLength:
|
|
44
44
|
Layout/MultilineMethodCallIndentation:
|
45
45
|
EnforcedStyle: indented_relative_to_receiver
|
46
46
|
|
47
|
-
# avoid unused variable asignment
|
48
|
-
Rubycw/Rubycw:
|
49
|
-
Exclude:
|
50
|
-
- 'test/**/*'
|
51
|
-
|
52
47
|
# Disabled to define Vector operators
|
53
48
|
# Offense count: 38
|
54
49
|
Lint/BinaryOperatorWithIdenticalOperands:
|
55
50
|
Exclude:
|
56
51
|
- 'test/test_vector_binary_element_wise.rb'
|
57
52
|
|
53
|
+
Lint/Debugger:
|
54
|
+
Exclude:
|
55
|
+
- 'docker/example'
|
56
|
+
|
58
57
|
# Need for test with empty block
|
59
58
|
# Offense count: 1
|
60
59
|
# Configuration parameters: AllowComments, AllowEmptyLambdas.
|
@@ -87,6 +86,7 @@ Metrics/AbcSize:
|
|
87
86
|
'drop', # 31.42
|
88
87
|
'[]', # 33.76
|
89
88
|
'split', # 37.35
|
89
|
+
'aggregate', # 38.13
|
90
90
|
]
|
91
91
|
|
92
92
|
# Max: 25
|
@@ -110,6 +110,7 @@ Metrics/ClassLength:
|
|
110
110
|
- 'lib/red_amber/group.rb' # 105
|
111
111
|
- 'lib/red_amber/subframes.rb' # 110
|
112
112
|
- 'lib/red_amber/vector.rb' # 152
|
113
|
+
- 'lib/red_amber/vector_binary_element_wise.rb' # 109
|
113
114
|
|
114
115
|
# Only for monitoring. I will measure by PerceivedComplexity.
|
115
116
|
# Max: 7
|
@@ -127,6 +128,8 @@ Metrics/CyclomaticComplexity:
|
|
127
128
|
'parse_range', # 14
|
128
129
|
'remove', # 14
|
129
130
|
'[]', # 13
|
131
|
+
'drop', # 13
|
132
|
+
'aggregate', # 13
|
130
133
|
]
|
131
134
|
|
132
135
|
# Max: 10
|
@@ -140,6 +143,7 @@ Metrics/MethodLength:
|
|
140
143
|
'format_table', # 53
|
141
144
|
'slice_by', # 38
|
142
145
|
'assign_update', # 35
|
146
|
+
'drop', # 32
|
143
147
|
'aggregate', # 31
|
144
148
|
]
|
145
149
|
|
@@ -187,6 +191,7 @@ Metrics/PerceivedComplexity:
|
|
187
191
|
'filters', # 11
|
188
192
|
'html_table', # 11
|
189
193
|
'slice', # 11
|
194
|
+
'pick', # 11
|
190
195
|
]
|
191
196
|
|
192
197
|
# Offense count: 1
|
@@ -210,6 +215,12 @@ Naming/PredicateName:
|
|
210
215
|
- 'lib/red_amber/vector_functions.rb'
|
211
216
|
- 'lib/red_amber/vector_selectable.rb'
|
212
217
|
|
218
|
+
# avoid unused variable asignment
|
219
|
+
Rubycw/Rubycw:
|
220
|
+
Exclude:
|
221
|
+
- 'test/**/*'
|
222
|
+
- 'docker/example'
|
223
|
+
|
213
224
|
# Offense count: 16
|
214
225
|
# This cop supports safe autocorrection (--autocorrect).
|
215
226
|
Style/OperatorMethodCall:
|
@@ -223,6 +234,10 @@ Style/SlicingWithRange:
|
|
223
234
|
Exclude:
|
224
235
|
- 'test/test_data_frame_selectable.rb'
|
225
236
|
|
237
|
+
Style/MixinUsage:
|
238
|
+
Exclude:
|
239
|
+
- 'docker/example'
|
240
|
+
|
226
241
|
# Necessary to Vector < 0 element-wise comparison
|
227
242
|
# Offense count: 5
|
228
243
|
# This cop supports unsafe autocorrection (--autocorrect-all).
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,105 @@
|
|
1
|
-
## [0.4.
|
1
|
+
## [0.4.2] - 2023-04-02
|
2
|
+
|
3
|
+
- Breaking change
|
4
|
+
|
5
|
+
- Bug fixes
|
6
|
+
- Fix Vector#modulo, #fdiv, #remainder (#203)
|
7
|
+
|
8
|
+
- New features and improvements
|
9
|
+
- Update SubFrames#take to return SubFrames (#212)
|
10
|
+
|
11
|
+
- Refactoring
|
12
|
+
- Refine SubFrames to support partial retrieval (#207)
|
13
|
+
- Upgrade SubFrames#frames and promote to public (#207)
|
14
|
+
- Use faster count in Group#inspect (#207)
|
15
|
+
|
16
|
+
- Improve in tests/CI
|
17
|
+
|
18
|
+
- Documentation and Example
|
19
|
+
- Introduce minimum docker environment (#205)
|
20
|
+
- Move example REPL to docker (#205)
|
21
|
+
- Add readme.md in docker (#205)
|
22
|
+
- Add example_of_red_amber.ipynb (#205)
|
23
|
+
- Use smaller dataset in irb example
|
24
|
+
- Fix docker/example
|
25
|
+
- Updated link to red-data-tools (#213)
|
26
|
+
- Thanks to Soumya Kushwaha
|
27
|
+
|
28
|
+
- GitHub site
|
29
|
+
- Migrated to [Red Data Tools](https://github.com/red-data-tools)
|
30
|
+
- Thanks to Sutou Kouhei
|
31
|
+
|
32
|
+
- Thanks
|
33
|
+
- Sutou Kouhei
|
34
|
+
- Soumya Kushwaha
|
35
|
+
|
36
|
+
## [0.4.1] - 2023-03-11
|
37
|
+
|
38
|
+
- Breaking change
|
39
|
+
- Remove Vector.aggregate? method (#200)
|
2
40
|
|
3
|
-
|
41
|
+
- Bug fixes
|
42
|
+
- Return self in DataFrame#drop when dropper is empty (reverts 746ac263) (#193)
|
43
|
+
- Return self in DataFrame#rename when renaming to same name (#193)
|
44
|
+
- Return self in DataFrame#pick when pick itself (#199)
|
45
|
+
- Fix column width for non-ascii elemnts in DataFrame#to_s (#193)
|
46
|
+
- This change uses String#width.
|
47
|
+
- Fix DataFrame#to_iruby when data is date32 type (#193)
|
48
|
+
- Fix DataFrame#shorthand to show temporal type data simply (#193)
|
49
|
+
- Fix Vector#rank when data is ChunkedArray (#198)
|
50
|
+
- Fix Vector element-wise functions with nil as scalar (#198)
|
51
|
+
- Support :force_order for all methods of join family (#199)
|
52
|
+
- Supports :force_order option to force sorting after join for all #join familiy.
|
53
|
+
- This will valuable in some cases such as large dataframes.
|
54
|
+
- Ensure baseframe's schema for SubFrames (#200)
|
55
|
+
|
56
|
+
- New features and improvements
|
57
|
+
- Add Vector#first, #last method (#198)
|
58
|
+
- This method will be used in SubFrames feature.
|
59
|
+
- Add Vector#modulo method (#198)
|
60
|
+
- The divmod function in Arrow C++ is still in draft state.
|
61
|
+
This method was created by combining existing functions
|
62
|
+
- Add Vector#quotient method (#198)
|
63
|
+
- Add aliases #div, #mod, #mul, #pow, #quo and #sub for Vector (#198)
|
64
|
+
- Add Vector#*_checked functions (#198)
|
65
|
+
- This functions will check numeric range overflow.
|
66
|
+
- Add 'tdra' and 'plain' in display mode (#193)
|
67
|
+
- The plain mode and default inspect will show up to 128 rows and 128 columns.
|
68
|
+
- Add String#width method in refinements (#193)
|
69
|
+
- This will be used to update DataFrame#to_s.
|
70
|
+
- Introduce pre-loaded REPL environment (#199)
|
71
|
+
- This commit will add bin/example and it will start irb environment
|
72
|
+
with enabled commonly used datasets such as penguins, diamonds, etc.
|
73
|
+
- Upgrade SubFrames#aggregate to accept block (#200)
|
74
|
+
|
75
|
+
- Refactoring
|
76
|
+
- Use symbolized keys in refinements of Table#keys, #key? (#193)
|
77
|
+
- This can be treat Tables and DataFrames as same manner.
|
78
|
+
- Use key_name.succ in suffix of DataFrame#join (#193)
|
79
|
+
- This will make simple to get name candidate.
|
80
|
+
- Use ||= to memorize instance variables (#193)
|
81
|
+
- Refine vector projection to use #variables (#193)
|
82
|
+
- #variables is fastest when picking Vectors.
|
83
|
+
- Refine Vector#is_in to avoid #pack (#198)
|
84
|
+
- Refine Vector#index (#198)
|
85
|
+
|
86
|
+
- Improve in tests/CI
|
87
|
+
- Tests
|
88
|
+
- Update benchmarks to test from older version (#193)
|
89
|
+
- Refine test of Vector function with scalar (#198)
|
90
|
+
- Refine test subframes and test_vector_selectable (#200)
|
91
|
+
|
92
|
+
- Cops
|
93
|
+
- CI
|
94
|
+
|
95
|
+
- Documentation
|
96
|
+
- Update documents(small fix) (#201)
|
97
|
+
|
98
|
+
- GitHub site
|
99
|
+
|
100
|
+
- Thanks
|
101
|
+
|
102
|
+
## [0.4.0] - 2023-02-25
|
4
103
|
|
5
104
|
- Breaking change
|
6
105
|
- Upgrade dependency to Arrow 11.0.0 (#188)
|
@@ -73,7 +172,8 @@
|
|
73
172
|
- CI
|
74
173
|
- Fix setting up Arrow by homebrew in CI (#167)
|
75
174
|
- Fix CI error on homebrew deleting python link (#167)
|
76
|
-
- Set cache-version to get new C extensions in CI (#173)
|
175
|
+
- Set cache-version to get new C extensions in CI (#173)
|
176
|
+
- Thanks to @kou for suggestion.
|
77
177
|
|
78
178
|
- Documentation
|
79
179
|
- Update DataFrame.md about loading csv without headers (#165)
|
@@ -611,7 +711,7 @@
|
|
611
711
|
- Improve `DataFrame#[]`, `#slice`, `#remove` by a new engine
|
612
712
|
- It parses arguments to Vector internally.
|
613
713
|
- Used Kernel#Array to simplify code (#16) .
|
614
|
-
-
|
714
|
+
- Move `DataFrame#slice`, `#remove` to Selectable
|
615
715
|
- Refine `DataFrame#take`, `#filter` (undocumented)
|
616
716
|
|
617
717
|
- Introduce coerce in Vector (#35)
|
data/README.md
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
# RedAmber
|
2
2
|
|
3
3
|
[](https://rubygems.org/gems/red_amber)
|
4
|
-
[](https://github.com/
|
4
|
+
[](https://github.com/red-data-tools/red_amber/actions/workflows/ci.yml)
|
5
5
|
[](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
|
6
6
|
[](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
|
7
7
|
[](https://heronshoes.github.io/red_amber/)
|
8
|
-
[](https://github.com/
|
8
|
+
[](https://github.com/red-data-tools/red_amber/discussions)
|
9
9
|
|
10
10
|
A simple dataframe library for Ruby.
|
11
11
|
|
@@ -13,12 +13,12 @@ A simple dataframe library for Ruby.
|
|
13
13
|
[](https://gitter.im/red-data-tools/en) [](https://rubygems.org/gems/red-arrow)
|
14
14
|
- Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
|
15
15
|
|
16
|
-

|
17
17
|
|
18
18
|
## Requirements
|
19
19
|
### Ruby
|
20
20
|
Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
|
21
|
-
- I decided to remove Ruby 2.7 without waiting for EOL. See [Release note for v0.3.0](https://github.com/
|
21
|
+
- I decided to remove support for Ruby 2.7 without waiting for its EOL. See [Release note for v0.3.0](https://github.com/red-data-tools/red_amber/discussions/162) for details.
|
22
22
|
|
23
23
|
### Libraries
|
24
24
|
```ruby
|
@@ -29,7 +29,7 @@ gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
|
29
29
|
|
30
30
|
## Installation
|
31
31
|
|
32
|
-
Install requirements before you install
|
32
|
+
Install requirements before you install RedAmber.
|
33
33
|
|
34
34
|
- Apache Arrow (~> 11.0.0)
|
35
35
|
- Apache Arrow GLib (~> 11.0.0)
|
@@ -78,24 +78,26 @@ And then execute `bundle install` or install them yourself such as `gem install
|
|
78
78
|
|
79
79
|
## Docker image and Jupyter Notebook
|
80
80
|
|
81
|
-
|
81
|
+
Docker image is available from docker folder. See [readme](docker/readme.md) for instruction. Integrated Jypyter notebook is in docker/notebook folder.
|
82
82
|
|
83
|
-
|
83
|
+
You can try the contents of this README interactively by [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb).
|
84
84
|
[](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb)
|
85
85
|
|
86
|
+
[RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to Kenta Murata).
|
87
|
+
|
86
88
|
## Comparison of DataFrames
|
87
89
|
|
88
90
|
Comparison of basic features of RedAmber with Python
|
89
91
|
[pandas](https://pandas.pydata.org/),
|
90
92
|
R [Tidyverse](https://www.tidyverse.org/) and
|
91
|
-
Julia [Dataframes](https://dataframes.juliadata.org/stable/) is [
|
93
|
+
Julia [Dataframes](https://dataframes.juliadata.org/stable/) is in [DataFrame_Comparison.md](doc/DataFrame_Comparison.md) (Thanks to Benson Muite).
|
92
94
|
|
93
95
|
## Data frame in `RedAmber`
|
94
96
|
|
95
97
|
Class `RedAmber::DataFrame` represents a set of data in 2D-shape.
|
96
|
-
|
98
|
+
Its entity is a Red Arrow's Table object.
|
97
99
|
|
98
|
-

|
99
101
|
|
100
102
|
Let's load the library and try some examples.
|
101
103
|
|
@@ -115,7 +117,7 @@ then
|
|
115
117
|
require 'datasets-arrow' # to load sample data
|
116
118
|
|
117
119
|
dataset = Datasets::Diamonds.new
|
118
|
-
diamonds = DataFrame.new(dataset) #
|
120
|
+
diamonds = DataFrame.new(dataset) # before v0.2.3, should be `dataset.to_arrow`
|
119
121
|
|
120
122
|
# =>
|
121
123
|
#<RedAmber::DataFrame : 53940 x 10 Vectors, 0x000000000000f668>
|
@@ -174,7 +176,7 @@ df.rename('mean(price)': :mean_price_USD)
|
|
174
176
|
|
175
177
|
### Example: starwars dataset
|
176
178
|
|
177
|
-
Next example is `starwars` dataset reading from the downloaded CSV file. Followed by minimum data
|
179
|
+
Next example is `starwars` dataset reading from the downloaded CSV file. Followed by minimum data cleaning.
|
178
180
|
|
179
181
|
```ruby
|
180
182
|
uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')
|
@@ -222,7 +224,7 @@ You can try this notebook on [Binder](https://mybinder.org/v2/gh/heronshoes/dock
|
|
222
224
|
## Development
|
223
225
|
|
224
226
|
```shell
|
225
|
-
git clone https://github.com/
|
227
|
+
git clone https://github.com/red-data-tools/red_amber.git
|
226
228
|
cd red_amber
|
227
229
|
bundle install
|
228
230
|
bundle exec rake test
|
@@ -232,12 +234,12 @@ bundle exec rake test
|
|
232
234
|
|
233
235
|
I will appreciate if you could help to improve this project. Here are a few ways you can help:
|
234
236
|
|
235
|
-
- Let's talk in the [discussions](https://github.com/heronshoes/red_amber/discussions). [](https://github.com/
|
237
|
+
- Let's talk in the [discussions](https://github.com/heronshoes/red_amber/discussions). [](https://github.com/red-data-tools/red_amber/discussions)
|
236
238
|
- Browse Q and A, how to use, tips, etc.
|
237
239
|
- Ask questions you’re wondering about.
|
238
240
|
- Share ideas. The idea may be promoted to issues or pull requests.
|
239
|
-
- [Report bugs or suggest new features](https://github.com/
|
240
|
-
- Fix bugs and [submit pull requests](https://github.com/
|
241
|
+
- [Report bugs or suggest new features](https://github.com/red-data-tools/red_amber/issues)
|
242
|
+
- Fix bugs and [submit pull requests](https://github.com/red-data-tools/red_amber/pulls)
|
241
243
|
- Write, clarify, or fix documentation
|
242
244
|
|
243
245
|
## License
|
data/benchmark/basic.yml
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name:
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.3.0
|
4
|
+
- name: 0.1.5
|
8
5
|
gems:
|
9
|
-
red_amber: 0.
|
6
|
+
red_amber: 0.1.5
|
10
7
|
- name: 0.2.0
|
11
8
|
gems:
|
12
9
|
red_amber: 0.2.0
|
13
|
-
- name: 0.
|
10
|
+
- name: 0.3.0
|
14
11
|
gems:
|
15
|
-
red_amber: 0.
|
12
|
+
red_amber: 0.3.0
|
13
|
+
- name: HEAD
|
14
|
+
prelude: |
|
15
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
16
16
|
|
17
17
|
prelude: |
|
18
18
|
require 'red_amber'
|
@@ -43,7 +43,7 @@ benchmark:
|
|
43
43
|
'B06: Pick by a block': |
|
44
44
|
df.pick { keys.map { |key| key.end_with?('time') } }
|
45
45
|
|
46
|
-
'B07: Slice([]) by
|
46
|
+
'B07: Slice([]) by an index': |
|
47
47
|
df[877]
|
48
48
|
|
49
49
|
'B08: Slice by indeces': |
|
data/benchmark/combine.yml
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
4
|
- name: 0.3.0
|
8
5
|
gems:
|
9
6
|
red_amber: 0.3.0
|
7
|
+
- name: HEAD
|
8
|
+
prelude: |
|
9
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
10
10
|
|
11
11
|
prelude: |
|
12
12
|
require 'red_amber'
|
data/benchmark/dataframe.yml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.3.0
|
8
|
-
gems:
|
9
|
-
red_amber: 0.3.0
|
10
4
|
- name: 0.2.0
|
11
5
|
gems:
|
12
6
|
red_amber: 0.2.0
|
7
|
+
- name: 0.3.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.3.0
|
10
|
+
- name: HEAD
|
11
|
+
prelude: |
|
12
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
13
13
|
|
14
14
|
prelude: |
|
15
15
|
require 'red_amber'
|
@@ -19,8 +19,14 @@ prelude: |
|
|
19
19
|
|
20
20
|
starwars = RedAmber::DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars').to_arrow)
|
21
21
|
|
22
|
-
|
23
|
-
|
22
|
+
import_cars = RedAmber::DataFrame.load(Arrow::Buffer.new(<<~TSV), format: :tsv)
|
23
|
+
Year Audi BMW BMW_MINI Mercedes-Benz VW
|
24
|
+
2017 28336 52527 25427 68221 49040
|
25
|
+
2018 26473 50982 25984 67554 51961
|
26
|
+
2019 24222 46814 23813 66553 46794
|
27
|
+
2020 22304 35712 20196 57041 36576
|
28
|
+
2021 22535 35905 18211 51722 35215
|
29
|
+
TSV
|
24
30
|
|
25
31
|
ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
|
26
32
|
simpsons_paradox_covid = RedAmber::DataFrame.new(ds.to_arrow)
|
@@ -43,7 +49,7 @@ benchmark:
|
|
43
49
|
.group(:species) { [count(:species), mean(:height, :mass)] }
|
44
50
|
.slice { v(:count) > 1 }
|
45
51
|
|
46
|
-
'D03:
|
52
|
+
'D03: Import cars test': |
|
47
53
|
import_cars
|
48
54
|
.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
|
49
55
|
.to_wide(name: :Manufacturer, value: :Num_of_imported)
|
data/benchmark/group.yml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.3.0
|
8
|
-
gems:
|
9
|
-
red_amber: 0.3.0
|
10
4
|
- name: 0.2.2
|
11
5
|
gems:
|
12
6
|
red_amber: 0.2.2
|
7
|
+
- name: 0.3.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.3.0
|
10
|
+
- name: HEAD
|
11
|
+
prelude: |
|
12
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
13
13
|
|
14
14
|
prelude: |
|
15
15
|
require 'red_amber'
|
data/benchmark/reshape.yml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.3.0
|
8
|
-
gems:
|
9
|
-
red_amber: 0.3.0
|
10
4
|
- name: 0.2.2
|
11
5
|
gems:
|
12
6
|
red_amber: 0.2.2
|
7
|
+
- name: 0.3.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.3.0
|
10
|
+
- name: HEAD
|
11
|
+
prelude: |
|
12
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
13
13
|
|
14
14
|
prelude: |
|
15
15
|
require 'red_amber'
|
data/benchmark/vector.yml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
loop_count: 10
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: HEAD
|
5
|
-
prelude: |
|
6
|
-
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
-
- name: 0.3.0
|
8
|
-
gems:
|
9
|
-
red_amber: 0.3.0
|
10
4
|
- name: 0.2.0
|
11
5
|
gems:
|
12
6
|
red_amber: 0.2.0
|
7
|
+
- name: 0.3.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.3.0
|
10
|
+
- name: HEAD
|
11
|
+
prelude: |
|
12
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
13
13
|
|
14
14
|
prelude: |
|
15
15
|
require 'red_amber'
|
data/doc/CODE_OF_CONDUCT.md
CHANGED
@@ -39,7 +39,7 @@ This Code of Conduct applies within all community spaces, and also applies when
|
|
39
39
|
|
40
40
|
## Enforcement
|
41
41
|
|
42
|
-
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at
|
42
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at heronshoes877@gmail.com. All complaints will be reviewed and investigated promptly and fairly.
|
43
43
|
|
44
44
|
All community leaders are obligated to respect the privacy and security of the reporter of any incident.
|
45
45
|
|
data/docker/.env
ADDED
data/docker/Dockerfile
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
# x86-64/Ubuntu-22.04/python-3.10.9/lab-3.6.1/notebook-6.5.3/2023-03-13
|
2
|
+
ARG BASE_IMAGE_TAG=295612d3ade4
|
3
|
+
|
4
|
+
FROM jupyter/minimal-notebook:$BASE_IMAGE_TAG
|
5
|
+
|
6
|
+
USER root
|
7
|
+
|
8
|
+
RUN set -eux; \
|
9
|
+
apt update; \
|
10
|
+
apt install -y --no-install-recommends \
|
11
|
+
curl \
|
12
|
+
\
|
13
|
+
# For Apache Arrow
|
14
|
+
ca-certificates \
|
15
|
+
lsb-release \
|
16
|
+
\
|
17
|
+
# Ruby 3.0.2
|
18
|
+
ruby-dev \
|
19
|
+
\
|
20
|
+
# For iruby
|
21
|
+
libczmq-dev \
|
22
|
+
libzmq3-dev
|
23
|
+
|
24
|
+
# Apache Arrow
|
25
|
+
ARG ARROW_DEB_TMP=/tmp/apache-arrow-apt-source-latest.deb
|
26
|
+
|
27
|
+
RUN set -eux; \
|
28
|
+
apt update; \
|
29
|
+
curl -sfSL -o ${ARROW_DEB_TMP} \
|
30
|
+
https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb; \
|
31
|
+
apt install -y --no-install-recommends \
|
32
|
+
${ARROW_DEB_TMP}; \
|
33
|
+
rm -f ${ARROW_DEB_TMP}; \
|
34
|
+
apt update; \
|
35
|
+
apt install -y \
|
36
|
+
libarrow-dev \
|
37
|
+
libarrow-glib-dev \
|
38
|
+
libparquet-dev \
|
39
|
+
libparquet-glib-dev \
|
40
|
+
libgirepository1.0-dev; \
|
41
|
+
rm -rf /var/lib/apt/lists/*
|
42
|
+
|
43
|
+
USER $NB_UID
|
44
|
+
|
45
|
+
WORKDIR $HOME
|
46
|
+
|
47
|
+
ENV PATH $HOME/.local/share/gem/bin:$HOME/.local/share/gem/ruby/3.0.0/bin:$PATH
|
48
|
+
|
49
|
+
# IRuby
|
50
|
+
RUN set -eux; \
|
51
|
+
gem install --user-install \
|
52
|
+
rake \
|
53
|
+
bundler \
|
54
|
+
iruby; \
|
55
|
+
iruby register --force
|
56
|
+
|
57
|
+
COPY --chown=$NB_UID:$NB_GID Gemfile $HOME
|
58
|
+
COPY --chown=$NB_UID:$NB_GID Gemfile.lock $HOME
|
59
|
+
|
60
|
+
ENV GEM_HOME=$HOME/.local/share/gem
|
61
|
+
ENV BUNDLE_PATH=$HOME/.local/share/gem
|
62
|
+
ENV BUNDLE_BIN=$HOME/.local/share/gem/bin
|
63
|
+
|
64
|
+
RUN bundle install --jobs=4
|
65
|
+
|
66
|
+
COPY --chown=$NB_UID:$NB_GID example $HOME
|
data/docker/Gemfile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source 'https://rubygems.org'
|
4
|
+
|
5
|
+
gem 'irb'
|
6
|
+
|
7
|
+
gem 'numo-narray'
|
8
|
+
gem 'red-arrow', '~> 11.0.0'
|
9
|
+
gem 'red-arrow-numo-narray'
|
10
|
+
gem 'red-parquet', '~> 11.0.0'
|
11
|
+
|
12
|
+
gem 'red_amber', '>=0.4.0'
|
13
|
+
gem 'red-amber-view'
|
14
|
+
gem 'rover-df'
|
15
|
+
|
16
|
+
gem 'red-datasets'
|
17
|
+
gem 'red-datasets-arrow'
|
18
|
+
|
19
|
+
gem 'benchmark_driver'
|
20
|
+
gem 'benchmark-ips'
|
21
|
+
gem 'faker'
|