red_amber 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +39 -20
- data/.yardopts +2 -0
- data/CHANGELOG.md +113 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +25 -26
- data/benchmark/basic.yml +2 -2
- data/benchmark/combine.yml +2 -2
- data/benchmark/dataframe.yml +2 -2
- data/benchmark/group.yml +2 -2
- data/benchmark/reshape.yml +2 -2
- data/benchmark/vector.yml +3 -0
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +429 -75
- data/lib/red_amber/data_frame_combinable.rb +516 -66
- data/lib/red_amber/data_frame_displayable.rb +244 -14
- data/lib/red_amber/data_frame_indexable.rb +121 -18
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +622 -66
- data/lib/red_amber/data_frame_variable_operation.rb +446 -34
- data/lib/red_amber/group.rb +187 -22
- data/lib/red_amber/helper.rb +70 -10
- data/lib/red_amber/refinements.rb +12 -5
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +385 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +217 -12
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: aa6f3c47b47df7271d7d150a800013c7c9d8bd75ca6066f54506c922f12eea09
|
|
4
|
+
data.tar.gz: 763f19f54a6508648fe9f1bdd0a11f678a86f554b58b71d7bed66aa5df7df2a7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 433ca52f7a62f055f327e0426426cfd86f563009e4ec4811d7cf8297152309081271b7b7625d39ffa31ecf455d352ee305d76b6d09e4d1dab0d90aa6c2bffb3e
|
|
7
|
+
data.tar.gz: 717d8618dd428d165c80420e7c35f1b7f870a059227a91bd5224f67b9cd3b8bdafcaed523fee170524738036cc9b43e914712fa01e88f7eb9ca1f0cc18c98dbf
|
data/.rubocop.yml
CHANGED
|
@@ -31,6 +31,19 @@ Style/TrailingCommaInHashLiteral:
|
|
|
31
31
|
|
|
32
32
|
# To let you know the possibility of refactoring ===
|
|
33
33
|
|
|
34
|
+
# Max: 120
|
|
35
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
36
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
|
|
37
|
+
# URISchemes: http, https
|
|
38
|
+
Layout/LineLength:
|
|
39
|
+
Max: 90
|
|
40
|
+
Exclude:
|
|
41
|
+
- 'test/**/*'
|
|
42
|
+
|
|
43
|
+
# EnforcedStyle: aligned
|
|
44
|
+
Layout/MultilineMethodCallIndentation:
|
|
45
|
+
EnforcedStyle: indented_relative_to_receiver
|
|
46
|
+
|
|
34
47
|
# avoid unused variable asignment
|
|
35
48
|
Rubycw/Rubycw:
|
|
36
49
|
Exclude:
|
|
@@ -40,7 +53,7 @@ Rubycw/Rubycw:
|
|
|
40
53
|
# Offense count: 38
|
|
41
54
|
Lint/BinaryOperatorWithIdenticalOperands:
|
|
42
55
|
Exclude:
|
|
43
|
-
- 'test/
|
|
56
|
+
- 'test/test_vector_binary_element_wise.rb'
|
|
44
57
|
|
|
45
58
|
# Need for test with empty block
|
|
46
59
|
# Offense count: 1
|
|
@@ -55,15 +68,6 @@ Lint/UselessAssignment:
|
|
|
55
68
|
Exclude:
|
|
56
69
|
- 'test/**/*'
|
|
57
70
|
|
|
58
|
-
# Max: 120
|
|
59
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
60
|
-
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
|
|
61
|
-
# URISchemes: http, https
|
|
62
|
-
Layout/LineLength:
|
|
63
|
-
Max: 90
|
|
64
|
-
Exclude:
|
|
65
|
-
- 'test/**/*'
|
|
66
|
-
|
|
67
71
|
# <= 17 satisfactory
|
|
68
72
|
# 18..30 unsatisfactory
|
|
69
73
|
# > 30 dangerous
|
|
@@ -104,6 +108,7 @@ Metrics/ClassLength:
|
|
|
104
108
|
- 'test/**/*'
|
|
105
109
|
- 'lib/red_amber/data_frame.rb' # 162
|
|
106
110
|
- 'lib/red_amber/group.rb' # 105
|
|
111
|
+
- 'lib/red_amber/subframes.rb' # 110
|
|
107
112
|
- 'lib/red_amber/vector.rb' # 152
|
|
108
113
|
|
|
109
114
|
# Only for monitoring. I will measure by PerceivedComplexity.
|
|
@@ -113,14 +118,15 @@ Metrics/ClassLength:
|
|
|
113
118
|
Metrics/CyclomaticComplexity:
|
|
114
119
|
Max: 12
|
|
115
120
|
AllowedMethods: [
|
|
116
|
-
'
|
|
121
|
+
'split', # 33
|
|
117
122
|
'format_table', # 21
|
|
123
|
+
'normalize_element', # 17
|
|
118
124
|
'slice_by', # 16
|
|
125
|
+
'assign_update', # 14
|
|
126
|
+
'join', # 14
|
|
127
|
+
'parse_range', # 14
|
|
119
128
|
'remove', # 14
|
|
120
|
-
'normalize_element', # 17
|
|
121
129
|
'[]', # 13
|
|
122
|
-
'parse_range', # 14
|
|
123
|
-
'split', # 33
|
|
124
130
|
]
|
|
125
131
|
|
|
126
132
|
# Max: 10
|
|
@@ -134,6 +140,7 @@ Metrics/MethodLength:
|
|
|
134
140
|
'format_table', # 53
|
|
135
141
|
'slice_by', # 38
|
|
136
142
|
'assign_update', # 35
|
|
143
|
+
'aggregate', # 31
|
|
137
144
|
]
|
|
138
145
|
|
|
139
146
|
# Max: 100
|
|
@@ -163,17 +170,23 @@ Metrics/ParameterLists:
|
|
|
163
170
|
Metrics/PerceivedComplexity:
|
|
164
171
|
Max: 10
|
|
165
172
|
AllowedMethods: [
|
|
166
|
-
'join', # 14
|
|
167
|
-
'dataframe_info', # 13
|
|
168
173
|
'format_table', # 22
|
|
169
174
|
'slice_by', # 20
|
|
170
|
-
'remove', # 14
|
|
171
|
-
'drop', # 12
|
|
172
|
-
'filters', # 11
|
|
173
175
|
'normalize_element', # 17
|
|
174
|
-
'
|
|
176
|
+
'assign_update', # 15
|
|
175
177
|
'parse_range', # 15
|
|
178
|
+
'join', # 14
|
|
179
|
+
'remove', # 14
|
|
176
180
|
'split', # 14
|
|
181
|
+
'dataframe_info', # 13
|
|
182
|
+
'replace', # 13
|
|
183
|
+
'drop', # 12
|
|
184
|
+
'initialize', # 12
|
|
185
|
+
'aggregate', # 12
|
|
186
|
+
'[]', # 11
|
|
187
|
+
'filters', # 11
|
|
188
|
+
'html_table', # 11
|
|
189
|
+
'slice', # 11
|
|
177
190
|
]
|
|
178
191
|
|
|
179
192
|
# Offense count: 1
|
|
@@ -197,6 +210,12 @@ Naming/PredicateName:
|
|
|
197
210
|
- 'lib/red_amber/vector_functions.rb'
|
|
198
211
|
- 'lib/red_amber/vector_selectable.rb'
|
|
199
212
|
|
|
213
|
+
# Offense count: 16
|
|
214
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
215
|
+
Style/OperatorMethodCall:
|
|
216
|
+
Exclude:
|
|
217
|
+
- 'test/test_vector_binary_element_wise.rb'
|
|
218
|
+
|
|
200
219
|
# Necessary to test when range.end == -1
|
|
201
220
|
# Offense count: 2
|
|
202
221
|
# This cop supports unsafe autocorrection (--autocorrect-all).
|
data/.yardopts
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,116 @@
|
|
|
1
|
+
## [0.4.0] - 2023-02-25
|
|
2
|
+
|
|
3
|
+
:memo: Update documents for consistency
|
|
4
|
+
|
|
5
|
+
- Breaking change
|
|
6
|
+
- Upgrade dependency to Arrow 11.0.0 (#188)
|
|
7
|
+
|
|
8
|
+
- Bug fixes
|
|
9
|
+
- Add :force_order option for DataFrame#join (#174)
|
|
10
|
+
- Return error for empty DataFrame in DataFrame#filter (#172)
|
|
11
|
+
- Accept ChunkedArray in DataFrame#filter (#172)
|
|
12
|
+
- Fix Vector#replace to accept Arrow::Array as a replacer (#179)
|
|
13
|
+
- Fix Vector#round_to_multiple to accept Float or Integer (#180)
|
|
14
|
+
- Change Vector atan2 to a class method (#180)
|
|
15
|
+
- Fix Vector#shift when boolean Vector (#184)
|
|
16
|
+
- Fix processing empty SubFrames (#183)
|
|
17
|
+
- Do not check object id in DataFrame#rename, #drop for self (#188)
|
|
18
|
+
|
|
19
|
+
- New features and improvements
|
|
20
|
+
- Accept a block in DataFrame#filter (#172)
|
|
21
|
+
- Add Vector.aggregate? method (#175)
|
|
22
|
+
- Introduce Vector#propagate method (#175)
|
|
23
|
+
- Add Vector#rank methods (#176)
|
|
24
|
+
- Add Vector#sample method (#176)
|
|
25
|
+
- Add Vector#sort method (#176)
|
|
26
|
+
- Promote DataFrame#shape_str to public (#184)
|
|
27
|
+
- Introduce Vector#concatenate (#184)
|
|
28
|
+
- Add #numeric? in refinements of Array (#184)
|
|
29
|
+
- Add Vector#cumulative_sum_checked and #cumsum (#184)
|
|
30
|
+
- Add Vector#resolve method (#184)
|
|
31
|
+
- Add DataFrame#tdra method (#184)
|
|
32
|
+
- Add #expand as an alias for Vector#propagate (#184)
|
|
33
|
+
- Add #glimpse as an alias for DataFrame#tdr (#184)
|
|
34
|
+
- New class SubFrames (#183)
|
|
35
|
+
- Introduce class SubFrames
|
|
36
|
+
- Memorize dataframes in SubFrames
|
|
37
|
+
- Add @frames to memorize sub DataFrames
|
|
38
|
+
- Accept filters in SubFrames.new
|
|
39
|
+
- Accept block in SubFrames.new
|
|
40
|
+
- Add SubFrames.by_filter
|
|
41
|
+
- Introduce methods creating SubFrames from DataFrame
|
|
42
|
+
- Introduce SubFrames#each method
|
|
43
|
+
- Add SubFrames#to_s method
|
|
44
|
+
- Add SubFrames#concatenate method
|
|
45
|
+
- Add SubFrames#offset_indices method
|
|
46
|
+
- SubFrames#aggregate method
|
|
47
|
+
- Redefine SubFrames#map to return SubFrames
|
|
48
|
+
- Define SubFrame#map dynamically
|
|
49
|
+
- Add SubFrames#assign method
|
|
50
|
+
- Redefine SubFrames#select to return SubFrames
|
|
51
|
+
- Add SubFrames#reject method
|
|
52
|
+
- Add SubFrames#filter_map method
|
|
53
|
+
- Refine DataFrame#indices memorizing @indices
|
|
54
|
+
- Rename SubFrames#universal_frame as #baseframe
|
|
55
|
+
- Set Group iteration feature to @api private
|
|
56
|
+
|
|
57
|
+
- Refactoring
|
|
58
|
+
- Generate Vector functions in class method (#177)
|
|
59
|
+
- Set Constant visibility to private (#179)
|
|
60
|
+
- Separate test_vector_function (#179)
|
|
61
|
+
- Relocate methods in DataFrameIndexable (#179)
|
|
62
|
+
- Rename Array refinements to the same name as Vector (#184)
|
|
63
|
+
|
|
64
|
+
- Improve in tests/CI
|
|
65
|
+
- Tests
|
|
66
|
+
- Update benchmarks to set 0.3.0 as a reference (#167)
|
|
67
|
+
- Move test of Vector#logb to proper location (#180)
|
|
68
|
+
|
|
69
|
+
- Cops
|
|
70
|
+
- Update .rubocop.yml to align with latest cops (#174)
|
|
71
|
+
- Unify style of MethodCallIndentation as relative to reciever (#184)
|
|
72
|
+
|
|
73
|
+
- CI
|
|
74
|
+
- Fix setting up Arrow by homebrew in CI (#167)
|
|
75
|
+
- Fix CI error on homebrew deleting python link (#167)
|
|
76
|
+
- Set cache-version to get new C extensions in CI (#173) Thanks to @kou for suggestion.
|
|
77
|
+
|
|
78
|
+
- Documentation
|
|
79
|
+
- Update DataFrame.md about loading csv without headers (#165)
|
|
80
|
+
- Thanks to kojix2
|
|
81
|
+
- Update YARD in DataFrame combinable (#168)
|
|
82
|
+
- Update comment for Ruby 2.7 support in README.md
|
|
83
|
+
- Update license year
|
|
84
|
+
- Update README (#172)
|
|
85
|
+
- Update Vector.md and yardoc in #propagate (#175)
|
|
86
|
+
- Use customized style sheet for YARD (#179)
|
|
87
|
+
- Add examples for the doc of #pick and #drop (#179)
|
|
88
|
+
- Add examples to YARD in DataFrame reshaping methods (#179)
|
|
89
|
+
- Update documents in DataFrameDisplayable (#179)
|
|
90
|
+
- Update documents in DataFrameVariableOperation (#179)
|
|
91
|
+
- Update document for dynamically generated methods (#179)
|
|
92
|
+
- Unify style in document (#179)
|
|
93
|
+
- Update documents in DataFrameSelectable (#179)
|
|
94
|
+
- Update documents of basic Vector methods (#179)
|
|
95
|
+
- Update document in VectorUpdatable (#179)
|
|
96
|
+
- Update document of Group (#179)
|
|
97
|
+
- Update document of DataFrameLoadSave (#180)
|
|
98
|
+
- Add examples for document of ArrowFunction (#180)
|
|
99
|
+
- Update document of Vector_unary_aggregation (#180)
|
|
100
|
+
- Update document of Vector_unary_element_wise (#180)
|
|
101
|
+
- Update document of Vector_biary_element_wise (#180)
|
|
102
|
+
- Add documentation to give comparison of dataframes(#169)
|
|
103
|
+
- Thanks to Benson Muite
|
|
104
|
+
- Update documents for consistency of method indentation (#189)
|
|
105
|
+
- Update CHANGELOG (#189)
|
|
106
|
+
- Update README for 0.4.0 (#189)
|
|
107
|
+
|
|
108
|
+
- GitHub site
|
|
109
|
+
|
|
110
|
+
- Thanks
|
|
111
|
+
- kojix2
|
|
112
|
+
- Benson Muite
|
|
113
|
+
|
|
1
114
|
## [0.3.0] - 2022-12-18
|
|
2
115
|
|
|
3
116
|
- Breaking change
|
data/Gemfile
CHANGED
data/LICENSE
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
The MIT License (MIT)
|
|
2
2
|
|
|
3
|
-
Copyright (c) 2022 Hirokazu SUZUKI (heronshoes)
|
|
3
|
+
Copyright (c) 2022-2023 Hirokazu SUZUKI (heronshoes)
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# RedAmber
|
|
2
2
|
|
|
3
|
-
[](https://rubygems.org/gems/red_amber)
|
|
4
4
|
[](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml)
|
|
5
5
|
[](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
|
|
6
6
|
[](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
|
|
@@ -10,22 +10,20 @@
|
|
|
10
10
|
A simple dataframe library for Ruby.
|
|
11
11
|
|
|
12
12
|
- Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
|
|
13
|
-
[](https://gitter.im/red-data-tools/en)
|
|
13
|
+
[](https://gitter.im/red-data-tools/en) [](https://rubygems.org/gems/red-arrow)
|
|
14
14
|
- Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
|
|
15
15
|
|
|
16
16
|

|
|
17
17
|
|
|
18
18
|
## Requirements
|
|
19
|
-
|
|
19
|
+
### Ruby
|
|
20
20
|
Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
|
|
21
|
+
- I decided to remove Ruby 2.7 without waiting for EOL. See [Release note for v0.3.0](https://github.com/heronshoes/red_amber/discussions/162) for details.
|
|
21
22
|
|
|
22
|
-
|
|
23
|
-
|
|
23
|
+
### Libraries
|
|
24
24
|
```ruby
|
|
25
|
-
#
|
|
26
|
-
gem 'red-
|
|
27
|
-
|
|
28
|
-
gem 'red-parquet', '~> 10.0.0' # Optional, if you use IO from/to parquet
|
|
25
|
+
gem 'red-arrow', '~> 11.0.0' # Requires Apache Arrow (see installation below)
|
|
26
|
+
gem 'red-parquet', '~> 11.0.0' # Optional, if you use IO from/to parquet
|
|
29
27
|
gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
|
30
28
|
```
|
|
31
29
|
|
|
@@ -33,11 +31,11 @@ gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
|
|
33
31
|
|
|
34
32
|
Install requirements before you install Red Amber.
|
|
35
33
|
|
|
36
|
-
- Apache Arrow (~>
|
|
37
|
-
- Apache Arrow GLib (~>
|
|
38
|
-
- Apache Parquet GLib (~>
|
|
34
|
+
- Apache Arrow (~> 11.0.0)
|
|
35
|
+
- Apache Arrow GLib (~> 11.0.0)
|
|
36
|
+
- Apache Parquet GLib (~> 11.0.0) # If you use IO from/to parquet
|
|
39
37
|
|
|
40
|
-
|
|
38
|
+
See [Apache Arrow install document](https://arrow.apache.org/install/).
|
|
41
39
|
|
|
42
40
|
- Minimum installation example for the latest Ubuntu:
|
|
43
41
|
|
|
@@ -58,38 +56,39 @@ Install requirements before you install Red Amber.
|
|
|
58
56
|
sudo dnf -y install gcc-c++ libarrow-devel libarrow-glib-devel ruby-devel
|
|
59
57
|
```
|
|
60
58
|
|
|
61
|
-
- On macOS,
|
|
59
|
+
- On macOS, using Homebrew:
|
|
62
60
|
|
|
63
61
|
```
|
|
64
62
|
brew install apache-arrow
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
and GLib (C) package with:
|
|
68
|
-
|
|
69
|
-
```
|
|
70
63
|
brew install apache-arrow-glib
|
|
71
64
|
```
|
|
72
65
|
|
|
73
66
|
If you prepared Apache Arrow, add these lines to your Gemfile:
|
|
74
67
|
|
|
75
68
|
```ruby
|
|
76
|
-
gem 'red-arrow', '~>
|
|
69
|
+
gem 'red-arrow', '~> 11.0.0'
|
|
77
70
|
gem 'red_amber'
|
|
78
|
-
gem 'red-parquet', '~>
|
|
71
|
+
gem 'red-parquet', '~> 11.0.0' # Optional, if you use IO from/to parquet
|
|
79
72
|
gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
|
80
73
|
gem 'red-datasets-arrow' # Optional, recommended if you use Red Datasets
|
|
81
74
|
gem 'red-arrow-numo-narray' # Optional, recommended if you use inputs from Numo::NArray
|
|
82
75
|
```
|
|
83
76
|
|
|
84
|
-
And then execute `bundle install` or install
|
|
77
|
+
And then execute `bundle install` or install them yourself such as `gem install red_amber`.
|
|
85
78
|
|
|
86
79
|
## Docker image and Jupyter Notebook
|
|
87
80
|
|
|
88
|
-
[RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to
|
|
81
|
+
[RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to Kenta Murata).
|
|
89
82
|
|
|
90
83
|
Also you can try the contents of this README interactively by [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb).
|
|
91
84
|
[](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb)
|
|
92
85
|
|
|
86
|
+
## Comparison of DataFrames
|
|
87
|
+
|
|
88
|
+
Comparison of basic features of RedAmber with Python
|
|
89
|
+
[pandas](https://pandas.pydata.org/),
|
|
90
|
+
R [Tidyverse](https://www.tidyverse.org/) and
|
|
91
|
+
Julia [Dataframes](https://dataframes.juliadata.org/stable/) is [here](doc/DataFrame_Comparison.md) (Thanks to Benson Muite).
|
|
93
92
|
|
|
94
93
|
## Data frame in `RedAmber`
|
|
95
94
|
|
|
@@ -137,7 +136,7 @@ For example, we can compute mean prices per cut for the data larger than 1 carat
|
|
|
137
136
|
|
|
138
137
|
```ruby
|
|
139
138
|
df = diamonds
|
|
140
|
-
.slice { carat > 1 }
|
|
139
|
+
.slice { carat > 1 } # or use #filter instead of #slice
|
|
141
140
|
.group(:cut)
|
|
142
141
|
.mean(:price) # `pick` prior to `group` is not required if `:price` is specified here.
|
|
143
142
|
.sort('-mean(price)')
|
|
@@ -186,7 +185,7 @@ starwars
|
|
|
186
185
|
.drop(0) # delete unnecessary index column
|
|
187
186
|
.remove { species == "NA" } # delete unnecessary rows
|
|
188
187
|
.group(:species) { [count(:species), mean(:height, :mass)] }
|
|
189
|
-
.slice { count > 1 }
|
|
188
|
+
.slice { count > 1 } # or use #filter instead of slice
|
|
190
189
|
|
|
191
190
|
# =>
|
|
192
191
|
#<RedAmber::DataFrame : 8 x 4 Vectors, 0x000000000000f848>
|
|
@@ -213,7 +212,7 @@ See [Vector.md](doc/Vector.md) for details.
|
|
|
213
212
|
|
|
214
213
|
## Jupyter notebook
|
|
215
214
|
|
|
216
|
-
[
|
|
215
|
+
[Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
|
|
217
216
|
([raw file](https://raw.githubusercontent.com/heronshoes/docker-stacks/RedAmber-binder/binder/examples_of_red_amber.ipynb)) shows more examples in jupyter notebook.
|
|
218
217
|
|
|
219
218
|
You can try this notebook on [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=examples_of_red_amber.ipynb).
|
data/benchmark/basic.yml
CHANGED
data/benchmark/combine.yml
CHANGED
data/benchmark/dataframe.yml
CHANGED
data/benchmark/group.yml
CHANGED
data/benchmark/reshape.yml
CHANGED
data/benchmark/vector.yml
CHANGED
data/doc/DataFrame.md
CHANGED
|
@@ -57,6 +57,10 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
|
57
57
|
```ruby
|
|
58
58
|
RedAmber::DataFrame.load("test/entity/with_header.csv")
|
|
59
59
|
```
|
|
60
|
+
|
|
61
|
+
```ruby
|
|
62
|
+
RedAmber::DataFrame.load("test/entity/without_header.csv", headers: [:x, :y, :z])
|
|
63
|
+
```
|
|
60
64
|
|
|
61
65
|
- from a string buffer
|
|
62
66
|
|
|
@@ -275,6 +279,7 @@ penguins.to_rover
|
|
|
275
279
|
|
|
276
280
|
- Shows some information about self in a transposed style.
|
|
277
281
|
- `tdr_str` returns same info as a String.
|
|
282
|
+
- `glimpse` is an alias. It is similar to dplyr's (or Polars's) `glimpse()`.
|
|
278
283
|
|
|
279
284
|
```ruby
|
|
280
285
|
require 'red_amber'
|
|
@@ -568,7 +573,7 @@ penguins.to_rover
|
|
|
568
573
|
[1, 2, 3]
|
|
569
574
|
```
|
|
570
575
|
|
|
571
|
-
### `slice ` -
|
|
576
|
+
### `slice ` - cut into slices of records -
|
|
572
577
|
|
|
573
578
|
Slice and select records (rows) to create a sub DataFrame.
|
|
574
579
|
|
|
@@ -601,11 +606,14 @@ penguins.to_rover
|
|
|
601
606
|
|
|
602
607
|
- Booleans as an argument
|
|
603
608
|
|
|
604
|
-
`slice(booleans)` accepts booleans as an argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
|
|
609
|
+
`filter(booleans)` or `slice(booleans)` accepts booleans as an argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
|
|
610
|
+
|
|
611
|
+
note: `slice(booleans)` is acceptable for orthogonality of `slice`/`remove`.
|
|
605
612
|
|
|
606
613
|
```ruby
|
|
607
614
|
vector = penguins[:bill_length_mm]
|
|
608
|
-
penguins.
|
|
615
|
+
penguins.filter(vector >= 40)
|
|
616
|
+
# penguins.slice(vector >= 40) is also acceptable
|
|
609
617
|
|
|
610
618
|
# =>
|
|
611
619
|
#<RedAmber::DataFrame : 242 x 8 Vectors, 0x0000000000043d3c>
|
|
@@ -833,14 +841,14 @@ penguins.to_rover
|
|
|
833
841
|
|
|
834
842
|
Assign new or updated variables (columns) and create an updated DataFrame.
|
|
835
843
|
|
|
836
|
-
- Variables with new keys will append new columns from
|
|
844
|
+
- Variables with new keys will append new columns from right.
|
|
837
845
|
- Variables with exisiting keys will update corresponding vectors.
|
|
838
846
|
|
|
839
847
|

|
|
840
848
|
|
|
841
849
|
- Variables as arguments
|
|
842
850
|
|
|
843
|
-
`assign(
|
|
851
|
+
`assign(key_value_pairs)` accepts pairs of key and values as parameters. `key_value_pairs` should be a Hash of `{key => array_like}` or an Array of Arrays like `[[key, array_like], ... ]`. `array_like` is ether `Vector`, `Array` or `Arrow::Array`.
|
|
844
852
|
|
|
845
853
|
```ruby
|
|
846
854
|
df = RedAmber::DataFrame.new(
|
|
@@ -857,12 +865,12 @@ penguins.to_rover
|
|
|
857
865
|
2 Hinata 28
|
|
858
866
|
|
|
859
867
|
# update :age and add :brother
|
|
860
|
-
df.assign
|
|
868
|
+
df.assign(
|
|
861
869
|
{
|
|
862
870
|
age: age + 29,
|
|
863
871
|
brother: ['Santa', nil, 'Momotaro']
|
|
864
872
|
}
|
|
865
|
-
|
|
873
|
+
)
|
|
866
874
|
|
|
867
875
|
# =>
|
|
868
876
|
#<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000658b0>
|
|
@@ -932,7 +940,7 @@ penguins.to_rover
|
|
|
932
940
|
|
|
933
941
|
- Append from left
|
|
934
942
|
|
|
935
|
-
`assign_left` method accepts the same parameters and block as `assign`, but append new columns from
|
|
943
|
+
`assign_left` method accepts the same parameters and block as `assign`, but append new columns from left.
|
|
936
944
|
|
|
937
945
|
```ruby
|
|
938
946
|
df.assign_left(new_index: df.indices(1))
|
|
@@ -1453,6 +1461,8 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
|
1453
1461
|
1 B 4
|
|
1454
1462
|
2 D 5
|
|
1455
1463
|
```
|
|
1464
|
+
##### `set_operable?(other)`
|
|
1465
|
+
Check if `types` of self and other are same.
|
|
1456
1466
|
|
|
1457
1467
|
##### `intersect(other)`
|
|
1458
1468
|
|
|
@@ -1498,15 +1508,23 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
|
1498
1508
|
<string> <uint8>
|
|
1499
1509
|
1 B 2
|
|
1500
1510
|
2 C 3
|
|
1511
|
+
|
|
1512
|
+
other.differencr(df)
|
|
1513
|
+
#=>
|
|
1514
|
+
#<RedAmber::DataFrame : 2 x 2 Vectors, 0x0000000000040e0c>
|
|
1515
|
+
KEY1 KEY2
|
|
1516
|
+
<string> <uint8>
|
|
1517
|
+
0 B 4
|
|
1518
|
+
1 D 5
|
|
1501
1519
|
```
|
|
1502
1520
|
|
|
1503
1521
|
## Binding
|
|
1504
1522
|
|
|
1505
1523
|
### `concatenate(other)`
|
|
1506
1524
|
|
|
1507
|
-
Concatenate another DataFrame or Table onto the bottom of self. The
|
|
1525
|
+
Concatenate another DataFrame or Table onto the bottom of self. The types of other must be the same as self.
|
|
1508
1526
|
|
|
1509
|
-
The alias is `concat`.
|
|
1527
|
+
The alias is `concat` and `bind_rows`.
|
|
1510
1528
|
|
|
1511
1529
|
An array of DataFrames or Tables is also acceptable as other.
|
|
1512
1530
|
|
|
@@ -1538,9 +1556,11 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
|
1538
1556
|
3 4 D
|
|
1539
1557
|
```
|
|
1540
1558
|
|
|
1541
|
-
### `merge(other)`
|
|
1559
|
+
### `merge(*other)`
|
|
1560
|
+
|
|
1561
|
+
Concatenate another DataFrame or Table onto the bottom of self. The size of other must be the same as self. Self and other must not share the same key.
|
|
1542
1562
|
|
|
1543
|
-
|
|
1563
|
+
The alias is `bind_cols`.
|
|
1544
1564
|
|
|
1545
1565
|
```ruby
|
|
1546
1566
|
df
|