red_amber 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +44 -18
- data/Gemfile +4 -1
- data/README.md +51 -76
- data/Rakefile +1 -0
- data/benchmark/csv_load_penguins.yml +1 -1
- data/doc/47_examples_of_red_amber.ipynb +4872 -0
- data/doc/DataFrame.md +370 -210
- data/doc/Vector.md +68 -15
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red-amber.rb +1 -25
- data/lib/red_amber/data_frame.rb +9 -7
- data/lib/red_amber/data_frame_displayable.rb +79 -4
- data/lib/red_amber/group.rb +61 -0
- data/lib/red_amber/vector.rb +17 -3
- data/lib/red_amber/vector_functions.rb +22 -20
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +27 -1
- data/red_amber.gemspec +0 -2
- metadata +4 -31
- data/lib/red_amber/data_frame_observation_operation.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88bdd603d8daec1a95c0277ef68857f84346ad7cf95d0ba23a306e6b70567c29
|
4
|
+
data.tar.gz: 40add80cbaa5183ca0e93eadcdcd1fead37015cac1cb2360660002c0b1878255
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d043eea51117ecc48bdc52fa951e24d2618f273eb289a30f5bbb182e1a891763cdd35f6a7c6764f6e0061bddeaaa86b2374de1dc2b48f25a5b6b05c9af83a0e3
|
7
|
+
data.tar.gz: cdbba19750bf71fe99e55bf6c46cb4522018f43563d7a93fdc375987f9388234e4f7e833297fdb6b8dd5a41b5a1bfdbf287ea47663f5f8a90facb56a4c63daef
|
data/.rubocop.yml
CHANGED
@@ -80,6 +80,7 @@ Metrics/CyclomaticComplexity:
|
|
80
80
|
Exclude:
|
81
81
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 14
|
82
82
|
- 'lib/red_amber/vector_updatable.rb' # Max: 14
|
83
|
+
- 'lib/red_amber/data_frame_displayable.rb' # Max: 18
|
83
84
|
|
84
85
|
# Max: 10
|
85
86
|
Metrics/MethodLength:
|
@@ -93,6 +94,7 @@ Metrics/ModuleLength:
|
|
93
94
|
Exclude:
|
94
95
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 141
|
95
96
|
- 'lib/red_amber/vector_functions.rb' # Max: 114
|
97
|
+
- 'lib/red_amber/data_frame_displayable.rb' # Max: 132
|
96
98
|
|
97
99
|
# Max: 8
|
98
100
|
Metrics/PerceivedComplexity:
|
@@ -100,6 +102,7 @@ Metrics/PerceivedComplexity:
|
|
100
102
|
Exclude:
|
101
103
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 14
|
102
104
|
- 'lib/red_amber/vector_updatable.rb' # Max: 15
|
105
|
+
- 'lib/red_amber/data_frame_displayable.rb' # Max: 19
|
103
106
|
|
104
107
|
Naming/FileName:
|
105
108
|
Exclude:
|
data/CHANGELOG.md
CHANGED
@@ -1,29 +1,55 @@
|
|
1
|
-
## -
|
1
|
+
## [0.1.9] - Unreleased
|
2
2
|
|
3
|
-
-
|
4
|
-
- YARD support
|
3
|
+
- Supports Arrow 9.0.0
|
5
4
|
|
6
|
-
-
|
7
|
-
- `red-amber` gem
|
5
|
+
## [0.1.7] - 2022-07-15 (experimental)
|
8
6
|
|
9
|
-
-
|
10
|
-
|
7
|
+
- Bug fixes
|
8
|
+
|
9
|
+
- Remove development dependency for red-dataset-arrow (#47)
|
10
|
+
- To avoid irregular fails in CI test
|
11
|
+
- Add red-datasets to development dependency instead (#49)
|
11
12
|
|
12
|
-
|
13
|
+
- Supress useless log in tests (#46)
|
14
|
+
Suppress log of Webrick and iruby.
|
13
15
|
|
14
|
-
-
|
15
|
-
|
16
|
-
|
16
|
+
- New features and improvements
|
17
|
+
|
18
|
+
- Use Table mode as default preview mode in `inspect`/`to_s` (#40)
|
19
|
+
- Show examples in documents in Table
|
20
|
+
- Use the word rows/columns
|
21
|
+
- Update images of data processing in Table style
|
22
|
+
|
23
|
+
- Introduce a new Table formatter (#47)
|
24
|
+
- Migrate from the Arrow's formatter
|
25
|
+
- Do not use TAB, format by spaces only.
|
26
|
+
- Align column width with head rows and tail rows.
|
27
|
+
- Show nils.
|
28
|
+
- Show data types.
|
29
|
+
- Refine documents to use new formatter output
|
30
|
+
|
31
|
+
- Simplify options of Vector functions (#46)
|
32
|
+
Vector functions with options use optional argument opt in previous code.
|
33
|
+
|
34
|
+
- Add `#float?`, `#integer?` to Vector (#46)
|
35
|
+
- Add `#each` to Vector (#47)
|
36
|
+
|
37
|
+
- Introduce class `Group` (#48)
|
38
|
+
- Refine `DataFrame#group` to use class Group
|
39
|
+
- Add methods to Group
|
40
|
+
|
41
|
+
- Move parquet and rover to development dependency (#49)
|
42
|
+
|
43
|
+
- Refine text in `DataFrame#to_iruby` (#40)
|
17
44
|
|
18
|
-
|
45
|
+
- Add badges in Github site
|
46
|
+
- Gitter badge for Red Data Tools (#42)
|
47
|
+
- Gem version and CI status badge (#45)
|
19
48
|
|
20
|
-
-
|
21
|
-
-
|
22
|
-
- Improve as more performant
|
23
|
-
- More examples of frequently needed tasks
|
49
|
+
- Exchange containers in red-amber.rb and red_amber.rb (#47)
|
50
|
+
- Mainly use red_amber by consistency with the folder name
|
24
51
|
|
25
|
-
-
|
26
|
-
- `DataFrame#join features
|
52
|
+
- Add Jupyter notebook '47 Examples of Red Amber' (#49)
|
27
53
|
|
28
54
|
## [0.1.6] - 2022-06-26 (experimental)
|
29
55
|
|
data/Gemfile
CHANGED
@@ -7,6 +7,9 @@ gemspec
|
|
7
7
|
group :test do
|
8
8
|
gem 'rake'
|
9
9
|
|
10
|
+
gem 'red-parquet', '>= 8.0.0'
|
11
|
+
gem 'rover-df', '~> 0.3.0'
|
12
|
+
|
10
13
|
gem 'rubocop'
|
11
14
|
gem 'rubocop-performance', require: false
|
12
15
|
gem 'rubocop-rake'
|
@@ -17,5 +20,5 @@ group :test do
|
|
17
20
|
gem 'webrick'
|
18
21
|
|
19
22
|
gem 'benchmark_driver'
|
20
|
-
gem 'red-datasets
|
23
|
+
gem 'red-datasets'
|
21
24
|
end
|
data/README.md
CHANGED
@@ -1,16 +1,20 @@
|
|
1
1
|
# RedAmber
|
2
2
|
|
3
|
+
[](https://badge.fury.io/rb/red_amber)
|
4
|
+
[](https://github.com/heronshoes/red_amber/actions/workflows/test.yml)
|
5
|
+
|
3
6
|
A simple dataframe library for Ruby (experimental).
|
4
7
|
|
5
|
-
- Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
|
8
|
+
- Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow) [](https://gitter.im/red-data-tools/en)
|
6
9
|
- Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
|
7
10
|
|
8
11
|
## Requirements
|
9
12
|
|
10
13
|
```ruby
|
11
14
|
gem 'red-arrow', '>= 8.0.0'
|
12
|
-
|
13
|
-
gem '
|
15
|
+
|
16
|
+
gem 'red-parquet', '>= 8.0.0' # Optional, if you use IO from/to parquet
|
17
|
+
gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
14
18
|
```
|
15
19
|
|
16
20
|
## Installation
|
@@ -18,7 +22,8 @@ gem 'rover-df', '~> 0.3.0' # if you use IO from/to Rover::DataFrame
|
|
18
22
|
Install requirements before you install Red Amber.
|
19
23
|
|
20
24
|
- Apache Arrow GLib (>= 8.0.0)
|
21
|
-
|
25
|
+
|
26
|
+
- Apache Parquet GLib (>= 8.0.0) # If you use IO from/to parquet
|
22
27
|
|
23
28
|
See [Apache Arrow install document](https://arrow.apache.org/install/).
|
24
29
|
|
@@ -42,11 +47,6 @@ Or install it yourself as:
|
|
42
47
|
gem install red_amber
|
43
48
|
```
|
44
49
|
|
45
|
-
(From v0.1.6)
|
46
|
-
|
47
|
-
RedAmber uses TDR mode for `#inspect` and `#to_iruby` by default. If you prefer Table mode, please set the environment variable
|
48
|
-
`RED_AMBER_OUTPUT_MODE` to `"table"`. See [TDR section](#TDR) for detail.
|
49
|
-
|
50
50
|
## `RedAmber::DataFrame`
|
51
51
|
|
52
52
|
Represents a set of data in 2D-shape. The entity is a Red Arrow's Table object.
|
@@ -56,54 +56,21 @@ require 'red_amber' # require 'red-amber' is also OK.
|
|
56
56
|
require 'datasets-arrow'
|
57
57
|
|
58
58
|
arrow = Datasets::Penguins.new.to_arrow
|
59
|
-
|
60
|
-
penguins.table
|
61
|
-
|
62
|
-
# =>
|
63
|
-
#<Arrow::Table:0x111271098 ptr=0x7f9118b3e0b0>
|
64
|
-
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year
|
65
|
-
0 Adelie Torgersen 39.100000 18.700000 181 3750 male 2007
|
66
|
-
1 Adelie Torgersen 39.500000 17.400000 186 3800 female 2007
|
67
|
-
2 Adelie Torgersen 40.300000 18.000000 195 3250 female 2007
|
68
|
-
3 Adelie Torgersen (null) (null) (null) (null) (null) 2007
|
69
|
-
4 Adelie Torgersen 36.700000 19.300000 193 3450 female 2007
|
70
|
-
5 Adelie Torgersen 39.300000 20.600000 190 3650 male 2007
|
71
|
-
6 Adelie Torgersen 38.900000 17.800000 181 3625 female 2007
|
72
|
-
7 Adelie Torgersen 39.200000 19.600000 195 4675 male 2007
|
73
|
-
8 Adelie Torgersen 34.100000 18.100000 193 3475 (null) 2007
|
74
|
-
9 Adelie Torgersen 42.000000 20.200000 190 4250 (null) 2007
|
75
|
-
...
|
76
|
-
334 Gentoo Biscoe 46.200000 14.100000 217 4375 female 2009
|
77
|
-
335 Gentoo Biscoe 55.100000 16.000000 230 5850 male 2009
|
78
|
-
336 Gentoo Biscoe 44.500000 15.700000 217 4875 (null) 2009
|
79
|
-
337 Gentoo Biscoe 48.800000 16.200000 222 6000 male 2009
|
80
|
-
338 Gentoo Biscoe 47.200000 13.700000 214 4925 female 2009
|
81
|
-
339 Gentoo Biscoe (null) (null) (null) (null) (null) 2009
|
82
|
-
340 Gentoo Biscoe 46.800000 14.300000 215 4850 female 2009
|
83
|
-
341 Gentoo Biscoe 50.400000 15.700000 222 5750 male 2009
|
84
|
-
342 Gentoo Biscoe 45.200000 14.800000 212 5200 female 2009
|
85
|
-
343 Gentoo Biscoe 49.900000 16.100000 213 5400 male 2009
|
86
|
-
```
|
87
|
-
|
88
|
-
By default, RedAmber shows self by compact transposed style. This unfamiliar style (TDR) is designed for
|
89
|
-
the exploratory data processing. It keeps Vectors as row vectors, shows keys and types at a glance, shows levels
|
90
|
-
for the 'factor-like' variables and shows the number of abnormal values like NaN and nil.
|
91
|
-
|
92
|
-
```ruby
|
93
|
-
penguins
|
59
|
+
RedAmber::DataFrame.new(arrow)
|
94
60
|
|
95
61
|
# =>
|
96
|
-
RedAmber::DataFrame : 344 x 8 Vectors
|
97
|
-
|
98
|
-
|
99
|
-
1
|
100
|
-
2
|
101
|
-
3
|
102
|
-
4
|
103
|
-
5
|
104
|
-
|
105
|
-
7
|
106
|
-
8
|
62
|
+
#<RedAmber::DataFrame : 344 x 8 Vectors, 0x0000000000013790>
|
63
|
+
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
64
|
+
<string> <string> <double> <double> <uint8> ... <uint16>
|
65
|
+
1 Adelie Torgersen 39.1 18.7 181 ... 2007
|
66
|
+
2 Adelie Torgersen 39.5 17.4 186 ... 2007
|
67
|
+
3 Adelie Torgersen 40.3 18.0 195 ... 2007
|
68
|
+
4 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
69
|
+
5 Adelie Torgersen 36.7 19.3 193 ... 2007
|
70
|
+
: : : : : : ... :
|
71
|
+
342 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
72
|
+
343 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
73
|
+
344 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
107
74
|
```
|
108
75
|
|
109
76
|
### DataFrame model
|
@@ -113,23 +80,41 @@ For example, `DataFrame#pick` accepts keys as an argument and returns a sub Data
|
|
113
80
|
|
114
81
|
```ruby
|
115
82
|
df = penguins.pick(:body_mass_g)
|
83
|
+
df
|
84
|
+
|
116
85
|
# =>
|
117
|
-
#<RedAmber::DataFrame : 344 x 1 Vector,
|
118
|
-
|
119
|
-
|
120
|
-
1
|
86
|
+
#<RedAmber::DataFrame : 344 x 1 Vector, 0x0000000000015cc0>
|
87
|
+
body_mass_g
|
88
|
+
<uint16>
|
89
|
+
1 3750
|
90
|
+
2 3800
|
91
|
+
3 3250
|
92
|
+
4 (nil)
|
93
|
+
5 3450
|
94
|
+
: :
|
95
|
+
342 5750
|
96
|
+
343 5200
|
97
|
+
344 5400
|
121
98
|
```
|
122
99
|
|
123
100
|
`DataFrame#assign` creates new variables (column in the table).
|
124
101
|
|
125
102
|
```ruby
|
126
103
|
df.assign(:body_mass_kg => df[:body_mass_g] / 1000.0)
|
104
|
+
|
127
105
|
# =>
|
128
|
-
#<RedAmber::DataFrame : 344 x 2 Vectors,
|
129
|
-
|
130
|
-
|
131
|
-
1
|
132
|
-
2
|
106
|
+
#<RedAmber::DataFrame : 344 x 2 Vectors, 0x00000000000212f0>
|
107
|
+
body_mass_g body_mass_kg
|
108
|
+
<uint16> <double>
|
109
|
+
1 3750 3.8
|
110
|
+
2 3800 3.8
|
111
|
+
3 3250 3.3
|
112
|
+
4 (nil) (nil)
|
113
|
+
5 3450 3.5
|
114
|
+
: : :
|
115
|
+
342 5750 5.8
|
116
|
+
343 5200 5.2
|
117
|
+
344 5400 5.4
|
133
118
|
```
|
134
119
|
|
135
120
|
DataFrame manipulating methods like `pick`, `drop`, `slice`, `remove`, `rename` and `assign` accept a block.
|
@@ -178,19 +163,9 @@ Vectors accepts some [functional methods from Arrow](https://arrow.apache.org/do
|
|
178
163
|
|
179
164
|
See [Vector.md](doc/Vector.md) for details.
|
180
165
|
|
181
|
-
##
|
182
|
-
|
183
|
-
I named the data frame representation style in the model above as TDR (Transposed DataFrame Representation).
|
184
|
-
|
185
|
-
This library can be used with both TDR mode and usual Table mode.
|
186
|
-
If you set the environment variable `RED_AMBER_OUTPUT_MODE` to `"table"`, output style by `inspect` and `to_iruby` is the Table mode. Other value including nil will output TDR style.
|
187
|
-
|
188
|
-
You can switch the mode in Ruby like this.
|
189
|
-
```ruby
|
190
|
-
ENV['RED_AMBER_OUTPUT_STYLE'] = 'table' # => Table mode
|
191
|
-
```
|
166
|
+
## Jupyter notebook
|
192
167
|
|
193
|
-
|
168
|
+
[47 Examples of Red Amber](doc/47_examples_of_red_amber.ipynb)
|
194
169
|
|
195
170
|
## Development
|
196
171
|
|
data/Rakefile
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
prelude: |
|
2
|
-
require 'datasets-arrow'
|
3
2
|
require 'rover'
|
4
3
|
require 'red_amber'
|
5
4
|
|
6
5
|
penguins_csv = 'benchmark/cache/penguins.csv'
|
7
6
|
|
8
7
|
unless File.exist?(penguins_csv)
|
8
|
+
require 'datasets-arrow'
|
9
9
|
arrow = Datasets::Penguins.new.to_arrow
|
10
10
|
RedAmber::DataFrame.new(arrow).save(penguins_csv)
|
11
11
|
end
|