red_amber 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +39 -20
- data/.yardopts +2 -0
- data/CHANGELOG.md +113 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +25 -26
- data/benchmark/basic.yml +2 -2
- data/benchmark/combine.yml +2 -2
- data/benchmark/dataframe.yml +2 -2
- data/benchmark/group.yml +2 -2
- data/benchmark/reshape.yml +2 -2
- data/benchmark/vector.yml +3 -0
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +429 -75
- data/lib/red_amber/data_frame_combinable.rb +516 -66
- data/lib/red_amber/data_frame_displayable.rb +244 -14
- data/lib/red_amber/data_frame_indexable.rb +121 -18
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +622 -66
- data/lib/red_amber/data_frame_variable_operation.rb +446 -34
- data/lib/red_amber/group.rb +187 -22
- data/lib/red_amber/helper.rb +70 -10
- data/lib/red_amber/refinements.rb +12 -5
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +385 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +217 -12
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa6f3c47b47df7271d7d150a800013c7c9d8bd75ca6066f54506c922f12eea09
|
4
|
+
data.tar.gz: 763f19f54a6508648fe9f1bdd0a11f678a86f554b58b71d7bed66aa5df7df2a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 433ca52f7a62f055f327e0426426cfd86f563009e4ec4811d7cf8297152309081271b7b7625d39ffa31ecf455d352ee305d76b6d09e4d1dab0d90aa6c2bffb3e
|
7
|
+
data.tar.gz: 717d8618dd428d165c80420e7c35f1b7f870a059227a91bd5224f67b9cd3b8bdafcaed523fee170524738036cc9b43e914712fa01e88f7eb9ca1f0cc18c98dbf
|
data/.rubocop.yml
CHANGED
@@ -31,6 +31,19 @@ Style/TrailingCommaInHashLiteral:
|
|
31
31
|
|
32
32
|
# To let you know the possibility of refactoring ===
|
33
33
|
|
34
|
+
# Max: 120
|
35
|
+
# This cop supports safe autocorrection (--autocorrect).
|
36
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
|
37
|
+
# URISchemes: http, https
|
38
|
+
Layout/LineLength:
|
39
|
+
Max: 90
|
40
|
+
Exclude:
|
41
|
+
- 'test/**/*'
|
42
|
+
|
43
|
+
# EnforcedStyle: aligned
|
44
|
+
Layout/MultilineMethodCallIndentation:
|
45
|
+
EnforcedStyle: indented_relative_to_receiver
|
46
|
+
|
34
47
|
# avoid unused variable asignment
|
35
48
|
Rubycw/Rubycw:
|
36
49
|
Exclude:
|
@@ -40,7 +53,7 @@ Rubycw/Rubycw:
|
|
40
53
|
# Offense count: 38
|
41
54
|
Lint/BinaryOperatorWithIdenticalOperands:
|
42
55
|
Exclude:
|
43
|
-
- 'test/
|
56
|
+
- 'test/test_vector_binary_element_wise.rb'
|
44
57
|
|
45
58
|
# Need for test with empty block
|
46
59
|
# Offense count: 1
|
@@ -55,15 +68,6 @@ Lint/UselessAssignment:
|
|
55
68
|
Exclude:
|
56
69
|
- 'test/**/*'
|
57
70
|
|
58
|
-
# Max: 120
|
59
|
-
# This cop supports safe autocorrection (--autocorrect).
|
60
|
-
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
|
61
|
-
# URISchemes: http, https
|
62
|
-
Layout/LineLength:
|
63
|
-
Max: 90
|
64
|
-
Exclude:
|
65
|
-
- 'test/**/*'
|
66
|
-
|
67
71
|
# <= 17 satisfactory
|
68
72
|
# 18..30 unsatisfactory
|
69
73
|
# > 30 dangerous
|
@@ -104,6 +108,7 @@ Metrics/ClassLength:
|
|
104
108
|
- 'test/**/*'
|
105
109
|
- 'lib/red_amber/data_frame.rb' # 162
|
106
110
|
- 'lib/red_amber/group.rb' # 105
|
111
|
+
- 'lib/red_amber/subframes.rb' # 110
|
107
112
|
- 'lib/red_amber/vector.rb' # 152
|
108
113
|
|
109
114
|
# Only for monitoring. I will measure by PerceivedComplexity.
|
@@ -113,14 +118,15 @@ Metrics/ClassLength:
|
|
113
118
|
Metrics/CyclomaticComplexity:
|
114
119
|
Max: 12
|
115
120
|
AllowedMethods: [
|
116
|
-
'
|
121
|
+
'split', # 33
|
117
122
|
'format_table', # 21
|
123
|
+
'normalize_element', # 17
|
118
124
|
'slice_by', # 16
|
125
|
+
'assign_update', # 14
|
126
|
+
'join', # 14
|
127
|
+
'parse_range', # 14
|
119
128
|
'remove', # 14
|
120
|
-
'normalize_element', # 17
|
121
129
|
'[]', # 13
|
122
|
-
'parse_range', # 14
|
123
|
-
'split', # 33
|
124
130
|
]
|
125
131
|
|
126
132
|
# Max: 10
|
@@ -134,6 +140,7 @@ Metrics/MethodLength:
|
|
134
140
|
'format_table', # 53
|
135
141
|
'slice_by', # 38
|
136
142
|
'assign_update', # 35
|
143
|
+
'aggregate', # 31
|
137
144
|
]
|
138
145
|
|
139
146
|
# Max: 100
|
@@ -163,17 +170,23 @@ Metrics/ParameterLists:
|
|
163
170
|
Metrics/PerceivedComplexity:
|
164
171
|
Max: 10
|
165
172
|
AllowedMethods: [
|
166
|
-
'join', # 14
|
167
|
-
'dataframe_info', # 13
|
168
173
|
'format_table', # 22
|
169
174
|
'slice_by', # 20
|
170
|
-
'remove', # 14
|
171
|
-
'drop', # 12
|
172
|
-
'filters', # 11
|
173
175
|
'normalize_element', # 17
|
174
|
-
'
|
176
|
+
'assign_update', # 15
|
175
177
|
'parse_range', # 15
|
178
|
+
'join', # 14
|
179
|
+
'remove', # 14
|
176
180
|
'split', # 14
|
181
|
+
'dataframe_info', # 13
|
182
|
+
'replace', # 13
|
183
|
+
'drop', # 12
|
184
|
+
'initialize', # 12
|
185
|
+
'aggregate', # 12
|
186
|
+
'[]', # 11
|
187
|
+
'filters', # 11
|
188
|
+
'html_table', # 11
|
189
|
+
'slice', # 11
|
177
190
|
]
|
178
191
|
|
179
192
|
# Offense count: 1
|
@@ -197,6 +210,12 @@ Naming/PredicateName:
|
|
197
210
|
- 'lib/red_amber/vector_functions.rb'
|
198
211
|
- 'lib/red_amber/vector_selectable.rb'
|
199
212
|
|
213
|
+
# Offense count: 16
|
214
|
+
# This cop supports safe autocorrection (--autocorrect).
|
215
|
+
Style/OperatorMethodCall:
|
216
|
+
Exclude:
|
217
|
+
- 'test/test_vector_binary_element_wise.rb'
|
218
|
+
|
200
219
|
# Necessary to test when range.end == -1
|
201
220
|
# Offense count: 2
|
202
221
|
# This cop supports unsafe autocorrection (--autocorrect-all).
|
data/.yardopts
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,116 @@
|
|
1
|
+
## [0.4.0] - 2023-02-25
|
2
|
+
|
3
|
+
:memo: Update documents for consistency
|
4
|
+
|
5
|
+
- Breaking change
|
6
|
+
- Upgrade dependency to Arrow 11.0.0 (#188)
|
7
|
+
|
8
|
+
- Bug fixes
|
9
|
+
- Add :force_order option for DataFrame#join (#174)
|
10
|
+
- Return error for empty DataFrame in DataFrame#filter (#172)
|
11
|
+
- Accept ChunkedArray in DataFrame#filter (#172)
|
12
|
+
- Fix Vector#replace to accept Arrow::Array as a replacer (#179)
|
13
|
+
- Fix Vector#round_to_multiple to accept Float or Integer (#180)
|
14
|
+
- Change Vector atan2 to a class method (#180)
|
15
|
+
- Fix Vector#shift when boolean Vector (#184)
|
16
|
+
- Fix processing empty SubFrames (#183)
|
17
|
+
- Do not check object id in DataFrame#rename, #drop for self (#188)
|
18
|
+
|
19
|
+
- New features and improvements
|
20
|
+
- Accept a block in DataFrame#filter (#172)
|
21
|
+
- Add Vector.aggregate? method (#175)
|
22
|
+
- Introduce Vector#propagate method (#175)
|
23
|
+
- Add Vector#rank methods (#176)
|
24
|
+
- Add Vector#sample method (#176)
|
25
|
+
- Add Vector#sort method (#176)
|
26
|
+
- Promote DataFrame#shape_str to public (#184)
|
27
|
+
- Introduce Vector#concatenate (#184)
|
28
|
+
- Add #numeric? in refinements of Array (#184)
|
29
|
+
- Add Vector#cumulative_sum_checked and #cumsum (#184)
|
30
|
+
- Add Vector#resolve method (#184)
|
31
|
+
- Add DataFrame#tdra method (#184)
|
32
|
+
- Add #expand as an alias for Vector#propagate (#184)
|
33
|
+
- Add #glimpse as an alias for DataFrame#tdr (#184)
|
34
|
+
- New class SubFrames (#183)
|
35
|
+
- Introduce class SubFrames
|
36
|
+
- Memorize dataframes in SubFrames
|
37
|
+
- Add @frames to memorize sub DataFrames
|
38
|
+
- Accept filters in SubFrames.new
|
39
|
+
- Accept block in SubFrames.new
|
40
|
+
- Add SubFrames.by_filter
|
41
|
+
- Introduce methods creating SubFrames from DataFrame
|
42
|
+
- Introduce SubFrames#each method
|
43
|
+
- Add SubFrames#to_s method
|
44
|
+
- Add SubFrames#concatenate method
|
45
|
+
- Add SubFrames#offset_indices method
|
46
|
+
- SubFrames#aggregate method
|
47
|
+
- Redefine SubFrames#map to return SubFrames
|
48
|
+
- Define SubFrame#map dynamically
|
49
|
+
- Add SubFrames#assign method
|
50
|
+
- Redefine SubFrames#select to return SubFrames
|
51
|
+
- Add SubFrames#reject method
|
52
|
+
- Add SubFrames#filter_map method
|
53
|
+
- Refine DataFrame#indices memorizing @indices
|
54
|
+
- Rename SubFrames#universal_frame as #baseframe
|
55
|
+
- Set Group iteration feature to @api private
|
56
|
+
|
57
|
+
- Refactoring
|
58
|
+
- Generate Vector functions in class method (#177)
|
59
|
+
- Set Constant visibility to private (#179)
|
60
|
+
- Separate test_vector_function (#179)
|
61
|
+
- Relocate methods in DataFrameIndexable (#179)
|
62
|
+
- Rename Array refinements to the same name as Vector (#184)
|
63
|
+
|
64
|
+
- Improve in tests/CI
|
65
|
+
- Tests
|
66
|
+
- Update benchmarks to set 0.3.0 as a reference (#167)
|
67
|
+
- Move test of Vector#logb to proper location (#180)
|
68
|
+
|
69
|
+
- Cops
|
70
|
+
- Update .rubocop.yml to align with latest cops (#174)
|
71
|
+
- Unify style of MethodCallIndentation as relative to reciever (#184)
|
72
|
+
|
73
|
+
- CI
|
74
|
+
- Fix setting up Arrow by homebrew in CI (#167)
|
75
|
+
- Fix CI error on homebrew deleting python link (#167)
|
76
|
+
- Set cache-version to get new C extensions in CI (#173) Thanks to @kou for suggestion.
|
77
|
+
|
78
|
+
- Documentation
|
79
|
+
- Update DataFrame.md about loading csv without headers (#165)
|
80
|
+
- Thanks to kojix2
|
81
|
+
- Update YARD in DataFrame combinable (#168)
|
82
|
+
- Update comment for Ruby 2.7 support in README.md
|
83
|
+
- Update license year
|
84
|
+
- Update README (#172)
|
85
|
+
- Update Vector.md and yardoc in #propagate (#175)
|
86
|
+
- Use customized style sheet for YARD (#179)
|
87
|
+
- Add examples for the doc of #pick and #drop (#179)
|
88
|
+
- Add examples to YARD in DataFrame reshaping methods (#179)
|
89
|
+
- Update documents in DataFrameDisplayable (#179)
|
90
|
+
- Update documents in DataFrameVariableOperation (#179)
|
91
|
+
- Update document for dynamically generated methods (#179)
|
92
|
+
- Unify style in document (#179)
|
93
|
+
- Update documents in DataFrameSelectable (#179)
|
94
|
+
- Update documents of basic Vector methods (#179)
|
95
|
+
- Update document in VectorUpdatable (#179)
|
96
|
+
- Update document of Group (#179)
|
97
|
+
- Update document of DataFrameLoadSave (#180)
|
98
|
+
- Add examples for document of ArrowFunction (#180)
|
99
|
+
- Update document of Vector_unary_aggregation (#180)
|
100
|
+
- Update document of Vector_unary_element_wise (#180)
|
101
|
+
- Update document of Vector_biary_element_wise (#180)
|
102
|
+
- Add documentation to give comparison of dataframes(#169)
|
103
|
+
- Thanks to Benson Muite
|
104
|
+
- Update documents for consistency of method indentation (#189)
|
105
|
+
- Update CHANGELOG (#189)
|
106
|
+
- Update README for 0.4.0 (#189)
|
107
|
+
|
108
|
+
- GitHub site
|
109
|
+
|
110
|
+
- Thanks
|
111
|
+
- kojix2
|
112
|
+
- Benson Muite
|
113
|
+
|
1
114
|
## [0.3.0] - 2022-12-18
|
2
115
|
|
3
116
|
- Breaking change
|
data/Gemfile
CHANGED
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License (MIT)
|
2
2
|
|
3
|
-
Copyright (c) 2022 Hirokazu SUZUKI (heronshoes)
|
3
|
+
Copyright (c) 2022-2023 Hirokazu SUZUKI (heronshoes)
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# RedAmber
|
2
2
|
|
3
|
-
[![Gem Version](https://
|
3
|
+
[![Gem Version](https://img.shields.io/gem/v/red_amber?color=brightgreen)](https://rubygems.org/gems/red_amber)
|
4
4
|
[![CI](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml/badge.svg)](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml)
|
5
5
|
[![Maintainability](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/maintainability)](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
|
6
6
|
[![Test coverage](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/test_coverage)](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
|
@@ -10,22 +10,20 @@
|
|
10
10
|
A simple dataframe library for Ruby.
|
11
11
|
|
12
12
|
- Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
|
13
|
-
[![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en)
|
13
|
+
[![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en) [![Gem Version](https://img.shields.io/gem/v/red-arrow?color=brightgreen)](https://rubygems.org/gems/red-arrow)
|
14
14
|
- Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
|
15
15
|
|
16
16
|
![screenshot from jupyterlab](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/screenshot.png)
|
17
17
|
|
18
18
|
## Requirements
|
19
|
-
|
19
|
+
### Ruby
|
20
20
|
Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
|
21
|
+
- I decided to remove Ruby 2.7 without waiting for EOL. See [Release note for v0.3.0](https://github.com/heronshoes/red_amber/discussions/162) for details.
|
21
22
|
|
22
|
-
|
23
|
-
|
23
|
+
### Libraries
|
24
24
|
```ruby
|
25
|
-
#
|
26
|
-
gem 'red-
|
27
|
-
|
28
|
-
gem 'red-parquet', '~> 10.0.0' # Optional, if you use IO from/to parquet
|
25
|
+
gem 'red-arrow', '~> 11.0.0' # Requires Apache Arrow (see installation below)
|
26
|
+
gem 'red-parquet', '~> 11.0.0' # Optional, if you use IO from/to parquet
|
29
27
|
gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
30
28
|
```
|
31
29
|
|
@@ -33,11 +31,11 @@ gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
|
33
31
|
|
34
32
|
Install requirements before you install Red Amber.
|
35
33
|
|
36
|
-
- Apache Arrow (~>
|
37
|
-
- Apache Arrow GLib (~>
|
38
|
-
- Apache Parquet GLib (~>
|
34
|
+
- Apache Arrow (~> 11.0.0)
|
35
|
+
- Apache Arrow GLib (~> 11.0.0)
|
36
|
+
- Apache Parquet GLib (~> 11.0.0) # If you use IO from/to parquet
|
39
37
|
|
40
|
-
|
38
|
+
See [Apache Arrow install document](https://arrow.apache.org/install/).
|
41
39
|
|
42
40
|
- Minimum installation example for the latest Ubuntu:
|
43
41
|
|
@@ -58,38 +56,39 @@ Install requirements before you install Red Amber.
|
|
58
56
|
sudo dnf -y install gcc-c++ libarrow-devel libarrow-glib-devel ruby-devel
|
59
57
|
```
|
60
58
|
|
61
|
-
- On macOS,
|
59
|
+
- On macOS, using Homebrew:
|
62
60
|
|
63
61
|
```
|
64
62
|
brew install apache-arrow
|
65
|
-
```
|
66
|
-
|
67
|
-
and GLib (C) package with:
|
68
|
-
|
69
|
-
```
|
70
63
|
brew install apache-arrow-glib
|
71
64
|
```
|
72
65
|
|
73
66
|
If you prepared Apache Arrow, add these lines to your Gemfile:
|
74
67
|
|
75
68
|
```ruby
|
76
|
-
gem 'red-arrow', '~>
|
69
|
+
gem 'red-arrow', '~> 11.0.0'
|
77
70
|
gem 'red_amber'
|
78
|
-
gem 'red-parquet', '~>
|
71
|
+
gem 'red-parquet', '~> 11.0.0' # Optional, if you use IO from/to parquet
|
79
72
|
gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
80
73
|
gem 'red-datasets-arrow' # Optional, recommended if you use Red Datasets
|
81
74
|
gem 'red-arrow-numo-narray' # Optional, recommended if you use inputs from Numo::NArray
|
82
75
|
```
|
83
76
|
|
84
|
-
And then execute `bundle install` or install
|
77
|
+
And then execute `bundle install` or install them yourself such as `gem install red_amber`.
|
85
78
|
|
86
79
|
## Docker image and Jupyter Notebook
|
87
80
|
|
88
|
-
[RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to
|
81
|
+
[RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to Kenta Murata).
|
89
82
|
|
90
83
|
Also you can try the contents of this README interactively by [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb).
|
91
84
|
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb)
|
92
85
|
|
86
|
+
## Comparison of DataFrames
|
87
|
+
|
88
|
+
Comparison of basic features of RedAmber with Python
|
89
|
+
[pandas](https://pandas.pydata.org/),
|
90
|
+
R [Tidyverse](https://www.tidyverse.org/) and
|
91
|
+
Julia [Dataframes](https://dataframes.juliadata.org/stable/) is [here](doc/DataFrame_Comparison.md) (Thanks to Benson Muite).
|
93
92
|
|
94
93
|
## Data frame in `RedAmber`
|
95
94
|
|
@@ -137,7 +136,7 @@ For example, we can compute mean prices per cut for the data larger than 1 carat
|
|
137
136
|
|
138
137
|
```ruby
|
139
138
|
df = diamonds
|
140
|
-
.slice { carat > 1 }
|
139
|
+
.slice { carat > 1 } # or use #filter instead of #slice
|
141
140
|
.group(:cut)
|
142
141
|
.mean(:price) # `pick` prior to `group` is not required if `:price` is specified here.
|
143
142
|
.sort('-mean(price)')
|
@@ -186,7 +185,7 @@ starwars
|
|
186
185
|
.drop(0) # delete unnecessary index column
|
187
186
|
.remove { species == "NA" } # delete unnecessary rows
|
188
187
|
.group(:species) { [count(:species), mean(:height, :mass)] }
|
189
|
-
.slice { count > 1 }
|
188
|
+
.slice { count > 1 } # or use #filter instead of slice
|
190
189
|
|
191
190
|
# =>
|
192
191
|
#<RedAmber::DataFrame : 8 x 4 Vectors, 0x000000000000f848>
|
@@ -213,7 +212,7 @@ See [Vector.md](doc/Vector.md) for details.
|
|
213
212
|
|
214
213
|
## Jupyter notebook
|
215
214
|
|
216
|
-
[
|
215
|
+
[Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
|
217
216
|
([raw file](https://raw.githubusercontent.com/heronshoes/docker-stacks/RedAmber-binder/binder/examples_of_red_amber.ipynb)) shows more examples in jupyter notebook.
|
218
217
|
|
219
218
|
You can try this notebook on [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=examples_of_red_amber.ipynb).
|
data/benchmark/basic.yml
CHANGED
data/benchmark/combine.yml
CHANGED
data/benchmark/dataframe.yml
CHANGED
data/benchmark/group.yml
CHANGED
data/benchmark/reshape.yml
CHANGED
data/benchmark/vector.yml
CHANGED
data/doc/DataFrame.md
CHANGED
@@ -57,6 +57,10 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
57
57
|
```ruby
|
58
58
|
RedAmber::DataFrame.load("test/entity/with_header.csv")
|
59
59
|
```
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
RedAmber::DataFrame.load("test/entity/without_header.csv", headers: [:x, :y, :z])
|
63
|
+
```
|
60
64
|
|
61
65
|
- from a string buffer
|
62
66
|
|
@@ -275,6 +279,7 @@ penguins.to_rover
|
|
275
279
|
|
276
280
|
- Shows some information about self in a transposed style.
|
277
281
|
- `tdr_str` returns same info as a String.
|
282
|
+
- `glimpse` is an alias. It is similar to dplyr's (or Polars's) `glimpse()`.
|
278
283
|
|
279
284
|
```ruby
|
280
285
|
require 'red_amber'
|
@@ -568,7 +573,7 @@ penguins.to_rover
|
|
568
573
|
[1, 2, 3]
|
569
574
|
```
|
570
575
|
|
571
|
-
### `slice ` -
|
576
|
+
### `slice ` - cut into slices of records -
|
572
577
|
|
573
578
|
Slice and select records (rows) to create a sub DataFrame.
|
574
579
|
|
@@ -601,11 +606,14 @@ penguins.to_rover
|
|
601
606
|
|
602
607
|
- Booleans as an argument
|
603
608
|
|
604
|
-
`slice(booleans)` accepts booleans as an argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
|
609
|
+
`filter(booleans)` or `slice(booleans)` accepts booleans as an argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
|
610
|
+
|
611
|
+
note: `slice(booleans)` is acceptable for orthogonality of `slice`/`remove`.
|
605
612
|
|
606
613
|
```ruby
|
607
614
|
vector = penguins[:bill_length_mm]
|
608
|
-
penguins.
|
615
|
+
penguins.filter(vector >= 40)
|
616
|
+
# penguins.slice(vector >= 40) is also acceptable
|
609
617
|
|
610
618
|
# =>
|
611
619
|
#<RedAmber::DataFrame : 242 x 8 Vectors, 0x0000000000043d3c>
|
@@ -833,14 +841,14 @@ penguins.to_rover
|
|
833
841
|
|
834
842
|
Assign new or updated variables (columns) and create an updated DataFrame.
|
835
843
|
|
836
|
-
- Variables with new keys will append new columns from
|
844
|
+
- Variables with new keys will append new columns from right.
|
837
845
|
- Variables with exisiting keys will update corresponding vectors.
|
838
846
|
|
839
847
|
![assign method image](doc/../image/dataframe/assign.png)
|
840
848
|
|
841
849
|
- Variables as arguments
|
842
850
|
|
843
|
-
`assign(
|
851
|
+
`assign(key_value_pairs)` accepts pairs of key and values as parameters. `key_value_pairs` should be a Hash of `{key => array_like}` or an Array of Arrays like `[[key, array_like], ... ]`. `array_like` is ether `Vector`, `Array` or `Arrow::Array`.
|
844
852
|
|
845
853
|
```ruby
|
846
854
|
df = RedAmber::DataFrame.new(
|
@@ -857,12 +865,12 @@ penguins.to_rover
|
|
857
865
|
2 Hinata 28
|
858
866
|
|
859
867
|
# update :age and add :brother
|
860
|
-
df.assign
|
868
|
+
df.assign(
|
861
869
|
{
|
862
870
|
age: age + 29,
|
863
871
|
brother: ['Santa', nil, 'Momotaro']
|
864
872
|
}
|
865
|
-
|
873
|
+
)
|
866
874
|
|
867
875
|
# =>
|
868
876
|
#<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000658b0>
|
@@ -932,7 +940,7 @@ penguins.to_rover
|
|
932
940
|
|
933
941
|
- Append from left
|
934
942
|
|
935
|
-
`assign_left` method accepts the same parameters and block as `assign`, but append new columns from
|
943
|
+
`assign_left` method accepts the same parameters and block as `assign`, but append new columns from left.
|
936
944
|
|
937
945
|
```ruby
|
938
946
|
df.assign_left(new_index: df.indices(1))
|
@@ -1453,6 +1461,8 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1453
1461
|
1 B 4
|
1454
1462
|
2 D 5
|
1455
1463
|
```
|
1464
|
+
##### `set_operable?(other)`
|
1465
|
+
Check if `types` of self and other are same.
|
1456
1466
|
|
1457
1467
|
##### `intersect(other)`
|
1458
1468
|
|
@@ -1498,15 +1508,23 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1498
1508
|
<string> <uint8>
|
1499
1509
|
1 B 2
|
1500
1510
|
2 C 3
|
1511
|
+
|
1512
|
+
other.differencr(df)
|
1513
|
+
#=>
|
1514
|
+
#<RedAmber::DataFrame : 2 x 2 Vectors, 0x0000000000040e0c>
|
1515
|
+
KEY1 KEY2
|
1516
|
+
<string> <uint8>
|
1517
|
+
0 B 4
|
1518
|
+
1 D 5
|
1501
1519
|
```
|
1502
1520
|
|
1503
1521
|
## Binding
|
1504
1522
|
|
1505
1523
|
### `concatenate(other)`
|
1506
1524
|
|
1507
|
-
Concatenate another DataFrame or Table onto the bottom of self. The
|
1525
|
+
Concatenate another DataFrame or Table onto the bottom of self. The types of other must be the same as self.
|
1508
1526
|
|
1509
|
-
The alias is `concat`.
|
1527
|
+
The alias is `concat` and `bind_rows`.
|
1510
1528
|
|
1511
1529
|
An array of DataFrames or Tables is also acceptable as other.
|
1512
1530
|
|
@@ -1538,9 +1556,11 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1538
1556
|
3 4 D
|
1539
1557
|
```
|
1540
1558
|
|
1541
|
-
### `merge(other)`
|
1559
|
+
### `merge(*other)`
|
1560
|
+
|
1561
|
+
Concatenate another DataFrame or Table onto the bottom of self. The size of other must be the same as self. Self and other must not share the same key.
|
1542
1562
|
|
1543
|
-
|
1563
|
+
The alias is `bind_cols`.
|
1544
1564
|
|
1545
1565
|
```ruby
|
1546
1566
|
df
|