red_amber 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +11 -5
- data/CHANGELOG.md +93 -1
- data/Gemfile +5 -6
- data/README.ja.md +252 -0
- data/README.md +30 -23
- data/benchmark/basic.yml +1 -1
- data/benchmark/group.yml +12 -5
- data/doc/CODE_OF_CONDUCT.md +1 -1
- data/docker/.env +4 -0
- data/docker/Dockerfile +66 -0
- data/docker/Gemfile +26 -0
- data/docker/Gemfile.lock +118 -0
- data/docker/docker-compose.yml +21 -0
- data/docker/example +86 -0
- data/docker/notebook/examples_of_red_amber.ipynb +8562 -0
- data/docker/notebook/red-amber.ipynb +188 -0
- data/docker/readme.md +118 -0
- data/lib/red_amber/data_frame.rb +78 -4
- data/lib/red_amber/data_frame_combinable.rb +147 -119
- data/lib/red_amber/data_frame_displayable.rb +6 -5
- data/lib/red_amber/data_frame_selectable.rb +49 -0
- data/lib/red_amber/group.rb +190 -89
- data/lib/red_amber/helper.rb +26 -0
- data/lib/red_amber/subframes.rb +166 -66
- data/lib/red_amber/vector.rb +43 -24
- data/lib/red_amber/vector_aggregation.rb +26 -0
- data/lib/red_amber/vector_binary_element_wise.rb +54 -25
- data/lib/red_amber/vector_selectable.rb +74 -23
- data/lib/red_amber/vector_string_function.rb +211 -0
- data/lib/red_amber/vector_unary_element_wise.rb +4 -0
- data/lib/red_amber/vector_updatable.rb +28 -0
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -1
- data/red_amber.gemspec +4 -4
- metadata +20 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b8fc1df498792b2b30d63a47a783cda67ccb8cea09e933aa8cba5d317277f500
|
4
|
+
data.tar.gz: 83e54f0fb6070a6b3c4301d0cd3e5356f1ca4e09bdae200f4fc7694a2e3e7daa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 440dd984e88afd4bee7860a0f5b03c54094b8536de6acf70b770d3f473c1ee93608a5565d62b5bd60d5e8e3ba8c09675e136373a15daef77bfc455c7c5a4a7cc
|
7
|
+
data.tar.gz: '09f27ff2a0c3804b345c4b5c581013135544fed098265e9bca274e34de52791b9a36525df22b17fe80093f14249c195496366e16079c25e70cf660070dc66858'
|
data/.rubocop.yml
CHANGED
@@ -52,7 +52,7 @@ Lint/BinaryOperatorWithIdenticalOperands:
|
|
52
52
|
|
53
53
|
Lint/Debugger:
|
54
54
|
Exclude:
|
55
|
-
- '
|
55
|
+
- 'docker/example'
|
56
56
|
|
57
57
|
# Need for test with empty block
|
58
58
|
# Offense count: 1
|
@@ -76,7 +76,8 @@ Metrics/AbcSize:
|
|
76
76
|
Max: 30
|
77
77
|
CountRepeatedAttributes: false
|
78
78
|
AllowedMethods: [
|
79
|
-
'
|
79
|
+
'join_merge_keys', # 54.18
|
80
|
+
'join', # 53.1
|
80
81
|
'dataframe_info', # 46.5
|
81
82
|
'format_table', # 84.62
|
82
83
|
'to_long', # 33.66
|
@@ -87,6 +88,9 @@ Metrics/AbcSize:
|
|
87
88
|
'[]', # 33.76
|
88
89
|
'split', # 37.35
|
89
90
|
'aggregate', # 38.13
|
91
|
+
'filters', # 33.91
|
92
|
+
'merge_keys', # 32.17
|
93
|
+
'rename_keys', # 31.64
|
90
94
|
]
|
91
95
|
|
92
96
|
# Max: 25
|
@@ -139,10 +143,12 @@ Metrics/MethodLength:
|
|
139
143
|
Max: 30
|
140
144
|
AllowedMethods: [
|
141
145
|
'join', # 47
|
142
|
-
'
|
146
|
+
'join_merge_keys', # 41
|
143
147
|
'format_table', # 53
|
144
148
|
'slice_by', # 38
|
145
149
|
'assign_update', # 35
|
150
|
+
'summarize', # 35
|
151
|
+
'dataframe_info', # 33
|
146
152
|
'drop', # 32
|
147
153
|
'aggregate', # 31
|
148
154
|
]
|
@@ -219,7 +225,7 @@ Naming/PredicateName:
|
|
219
225
|
Rubycw/Rubycw:
|
220
226
|
Exclude:
|
221
227
|
- 'test/**/*'
|
222
|
-
- '
|
228
|
+
- 'docker/example'
|
223
229
|
|
224
230
|
# Offense count: 16
|
225
231
|
# This cop supports safe autocorrection (--autocorrect).
|
@@ -236,7 +242,7 @@ Style/SlicingWithRange:
|
|
236
242
|
|
237
243
|
Style/MixinUsage:
|
238
244
|
Exclude:
|
239
|
-
- '
|
245
|
+
- 'docker/example'
|
240
246
|
|
241
247
|
# Necessary to Vector < 0 element-wise comparison
|
242
248
|
# Offense count: 5
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,95 @@
|
|
1
|
+
## [0.5.0] - 2023/05-24
|
2
|
+
|
3
|
+
- Breaking change
|
4
|
+
- Use non keyword argument in #sub_by_value (#219)
|
5
|
+
- Upgrade dependency to Arrow 12.0.0 (#238)
|
6
|
+
- right_join will output columns as same order as Red Arrow.
|
7
|
+
- DataFrame#join will not force ordering of original column by default
|
8
|
+
- Join with type, such as full_join, sort after join by default
|
9
|
+
|
10
|
+
- Bug fixes
|
11
|
+
- Use truncate in Vector#sample(float) (#229)
|
12
|
+
- Support options in DataFrame#tdra (#231)
|
13
|
+
- Fix printing table with non-ascii strings (#233)
|
14
|
+
- Fix join for Arrow 12.0.0
|
15
|
+
|
16
|
+
- New features and improvements
|
17
|
+
- Add a singleton method Vector.[] (#218)
|
18
|
+
- Add an alias #sub_group (#219)
|
19
|
+
- Accept Group#summarize{Hash} to rename aggregated columns (#219)
|
20
|
+
- Add Group#group_frame (#219)
|
21
|
+
- Add Vector#cast (#224)
|
22
|
+
- Add Vector#fill_nil(value) (#226)
|
23
|
+
- Add Vector#one (#227)
|
24
|
+
- Add Vector#mode (#228)
|
25
|
+
- Add DataFrame#propagate (#235)
|
26
|
+
- Add DataFrame#sample (#237)
|
27
|
+
- Add DataFrame#shuffle (#237)
|
28
|
+
- Support RankOptions in Vector#rank (#239)
|
29
|
+
- Introduce MatchSubstringOptions family in Vector (#241)
|
30
|
+
- Introduce Vector#match_substring?
|
31
|
+
- Add Vector#end_with?, #start_with? method
|
32
|
+
- Add Vector#match_like?
|
33
|
+
- Add Vector#count_substring method
|
34
|
+
|
35
|
+
- Refactoring
|
36
|
+
- Refine Group and SubFrames function (#219)
|
37
|
+
- Refine Group#group_count
|
38
|
+
- Use Acero in Group#filters
|
39
|
+
- Refine Group#filters, not using Acero
|
40
|
+
- Refine Group#summarize(array)
|
41
|
+
- Use Acero for renaming columns in join (#238)
|
42
|
+
- Use index kernel with IndexOptions introduced in 12.0.0 (#240)
|
43
|
+
|
44
|
+
- Improve in tests/CI
|
45
|
+
- Use Fedra 39 Rawhide in CI (#238)
|
46
|
+
|
47
|
+
- Documentation and Example
|
48
|
+
- Add missing yard documents for SubFrames::Selectors (#219)
|
49
|
+
- Update docker/example (#219)
|
50
|
+
- Update Gemfile in docker (#219)
|
51
|
+
- Add README.ja.md (#242)
|
52
|
+
|
53
|
+
- GitHub site
|
54
|
+
- Update link of Red Data Tools Chat to matrix (#242)
|
55
|
+
|
56
|
+
- Thanks
|
57
|
+
|
58
|
+
## [0.4.2] - 2023-04-02
|
59
|
+
|
60
|
+
- Breaking change
|
61
|
+
|
62
|
+
- Bug fixes
|
63
|
+
- Fix Vector#modulo, #fdiv, #remainder (#203)
|
64
|
+
|
65
|
+
- New features and improvements
|
66
|
+
- Update SubFrames#take to return SubFrames (#212)
|
67
|
+
|
68
|
+
- Refactoring
|
69
|
+
- Refine SubFrames to support partial retrieval (#207)
|
70
|
+
- Upgrade SubFrames#frames and promote to public (#207)
|
71
|
+
- Use faster count in Group#inspect (#207)
|
72
|
+
|
73
|
+
- Improve in tests/CI
|
74
|
+
|
75
|
+
- Documentation and Example
|
76
|
+
- Introduce minimum docker environment (#205)
|
77
|
+
- Move example REPL to docker (#205)
|
78
|
+
- Add readme.md in docker (#205)
|
79
|
+
- Add example_of_red_amber.ipynb (#205)
|
80
|
+
- Use smaller dataset in irb example
|
81
|
+
- Fix docker/example
|
82
|
+
- Updated link to red-data-tools (#213)
|
83
|
+
- Thanks to Soumya Kushwaha
|
84
|
+
|
85
|
+
- GitHub site
|
86
|
+
- Migrated to [Red Data Tools](https://github.com/red-data-tools)
|
87
|
+
- Thanks to Sutou Kouhei
|
88
|
+
|
89
|
+
- Thanks
|
90
|
+
- Sutou Kouhei
|
91
|
+
- Soumya Kushwaha
|
92
|
+
|
1
93
|
## [0.4.1] - 2023-03-11
|
2
94
|
|
3
95
|
- Breaking change
|
@@ -676,7 +768,7 @@
|
|
676
768
|
- Improve `DataFrame#[]`, `#slice`, `#remove` by a new engine
|
677
769
|
- It parses arguments to Vector internally.
|
678
770
|
- Used Kernel#Array to simplify code (#16) .
|
679
|
-
-
|
771
|
+
- Move `DataFrame#slice`, `#remove` to Selectable
|
680
772
|
- Refine `DataFrame#take`, `#filter` (undocumented)
|
681
773
|
|
682
774
|
- Introduce coerce in Vector (#35)
|
data/Gemfile
CHANGED
@@ -7,7 +7,7 @@ gemspec
|
|
7
7
|
group :test do
|
8
8
|
gem 'rake'
|
9
9
|
|
10
|
-
gem 'red-parquet', '~>
|
10
|
+
gem 'red-parquet', '~> 12.0.0'
|
11
11
|
gem 'rover-df', '~> 0.3.0'
|
12
12
|
|
13
13
|
gem 'rubocop'
|
@@ -15,14 +15,13 @@ group :test do
|
|
15
15
|
gem 'rubocop-rake'
|
16
16
|
gem 'rubocop-rubycw', require: false
|
17
17
|
|
18
|
-
gem 'iruby'
|
19
|
-
gem 'test-unit'
|
20
|
-
gem 'webrick'
|
21
|
-
gem 'yard'
|
22
|
-
|
23
18
|
gem 'benchmark_driver'
|
19
|
+
gem 'iruby'
|
24
20
|
gem 'red-arrow-numo-narray'
|
25
21
|
gem 'red-datasets-arrow'
|
26
22
|
gem 'simplecov'
|
27
23
|
gem 'simplecov-json'
|
24
|
+
gem 'test-unit'
|
25
|
+
gem 'webrick'
|
26
|
+
gem 'yard'
|
28
27
|
end
|
data/README.ja.md
ADDED
@@ -0,0 +1,252 @@
|
|
1
|
+
# RedAmber
|
2
|
+
|
3
|
+
[](https://rubygems.org/gems/red_amber)
|
4
|
+
[](https://github.com/red-data-tools/red_amber/actions/workflows/ci.yml)
|
5
|
+
[](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
|
6
|
+
[](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
|
7
|
+
[](https://heronshoes.github.io/red_amber/)
|
8
|
+
[](https://github.com/red-data-tools/red_amber/discussions)
|
9
|
+
|
10
|
+
Rubyistのためのデータフレームライブラリ.
|
11
|
+
|
12
|
+
- Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
|
13
|
+
[](https://app.element.io/#/room/#red-data-tools_ja:gitter.im) [](https://rubygems.org/gems/red-arrow)
|
14
|
+
- Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
|
15
|
+
|
16
|
+
[README in English](README.md)
|
17
|
+
|
18
|
+

|
19
|
+
|
20
|
+
## 必要な環境
|
21
|
+
### Ruby
|
22
|
+
- Ruby 3.0 以上.
|
23
|
+
|
24
|
+
### ライブラリ
|
25
|
+
```ruby
|
26
|
+
gem 'red-arrow', '~> 12.0.0' # お使いの環境に合わせた Apache Arrow が必要です(下記のインストールを参照してください)
|
27
|
+
gem 'red-parquet', '~> 12.0.0' # 必要に応じて。Parquetの入出力が必要な場合。
|
28
|
+
gem 'red-datasets-arrow' # 必要に応じて。Red Datasets またはランダムサンプリングが必要な場合。
|
29
|
+
gem 'red-arrow-activerecord' # 必要に応じて。Active Record とのデータ交換が必要な場合。
|
30
|
+
gem 'rover-df', '~> 0.3.0' # 必要に応じて。Rover::DataFrameに対する入出力が必要な場合。
|
31
|
+
```
|
32
|
+
|
33
|
+
## インストール
|
34
|
+
|
35
|
+
RedAmberをインストールする前に、下記のライブラリのインストールが必要です。
|
36
|
+
|
37
|
+
- Apache Arrow (~> 12.0.0)
|
38
|
+
- Apache Arrow GLib (~> 12.0.0)
|
39
|
+
- Apache Parquet GLib (~> 12.0.0) # Parquetの入出力が必要な場合。
|
40
|
+
|
41
|
+
環境ごとの詳しいインストール方法は、 [Apache Arrow install document](https://arrow.apache.org/install/) を参照してください。
|
42
|
+
|
43
|
+
- Ubuntuの場合の最低限必要なインストール例:
|
44
|
+
|
45
|
+
```
|
46
|
+
sudo apt update
|
47
|
+
sudo apt install -y -V ca-certificates lsb-release wget
|
48
|
+
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
|
49
|
+
sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
|
50
|
+
sudo apt update
|
51
|
+
sudo apt install -y -V libarrow-dev
|
52
|
+
sudo apt install -y -V libarrow-glib-dev
|
53
|
+
```
|
54
|
+
|
55
|
+
- Fedora 39 (Rawhide)の場合:
|
56
|
+
|
57
|
+
```
|
58
|
+
sudo dnf update
|
59
|
+
sudo dnf -y install gcc-c++ libarrow-devel libarrow-glib-devel ruby-devel
|
60
|
+
```
|
61
|
+
|
62
|
+
- macOS の場合は、Homebrewを使用する:
|
63
|
+
|
64
|
+
```
|
65
|
+
brew install apache-arrow
|
66
|
+
brew install apache-arrow-glib
|
67
|
+
```
|
68
|
+
|
69
|
+
Apache Arrowがインストールできたら、下記の行をGemfileに追加してください:
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
gem 'red-arrow', '~> 12.0.0' # お使いの環境に合わせた Apache Arrow が必要です(下記のインストールを参照してください)
|
73
|
+
gem 'red_amber'
|
74
|
+
gem 'red-parquet', '~> 12.0.0' # 必要に応じて。Parquetの入出力が必要な場合。
|
75
|
+
gem 'red-datasets-arrow' # 必要に応じて。Red Datasets またはランダムサンプリングが必要な場合。
|
76
|
+
gem 'red-arrow-numo-narray' # 必要に応じて。Numo::NArrayとの連携が必要な場合
|
77
|
+
gem 'red-arrow-activerecord' # 必要に応じて。Active Record とのデータ交換が必要な場合。
|
78
|
+
gem 'rover-df', '~> 0.3.0' # 必要に応じて。Rover::DataFrameに対する入出力が必要な場合。
|
79
|
+
```
|
80
|
+
|
81
|
+
`bundle install`とするか、または `gem install red_amber`としてインストールしてください。
|
82
|
+
|
83
|
+
## Docker イメージと Jupyter Notebook
|
84
|
+
|
85
|
+
このリポジトリの`docker` フォルダーから Docker コンテナ環境を生成できます。リポジトリをクローンしてから、dockerフォルダーにある [readme](docker/readme.md) を参照してください。その環境では `docker/notebook` フォルダーにある Jupyter Notebookイメージを試用できます。
|
86
|
+
|
87
|
+
このREADMEの内容をネットワーク上のJupyter Notebookでインタラクティブに試用することも出来ます。 [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb).
|
88
|
+
[](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb)
|
89
|
+
|
90
|
+
Jupyter Notebookの環境を含めた他の多くのデータ処理用のライブラリーとともにRedAmberもパッケージングされたDocker Imageとして、[RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) が利用できます(Thanks to Kenta Murata).
|
91
|
+
|
92
|
+
## 他のデータフレームライブラリとの比較表
|
93
|
+
|
94
|
+
RedAmberの基本的な機能をPython
|
95
|
+
[pandas](https://pandas.pydata.org/) や
|
96
|
+
R [Tidyverse](https://www.tidyverse.org/) や
|
97
|
+
Julia [Dataframes](https://dataframes.juliadata.org/stable/) と比較した表は [DataFrame_Comparison.md](doc/DataFrame_Comparison.md) にあります(Thanks to Benson Muite).
|
98
|
+
|
99
|
+
## `RedAmber`のデータフレーム
|
100
|
+
|
101
|
+
クラス `RedAmber::DataFrame` は2次元のデータの集まりを表現します。
|
102
|
+
その実体は Red Arrowの Tableオブジェクトです。
|
103
|
+
|
104
|
+

|
105
|
+
|
106
|
+
それではライブラリをロードしていくつかの例を試してみましょう。
|
107
|
+
|
108
|
+
```ruby
|
109
|
+
require 'red_amber' # require 'red-amber' でもOKです.
|
110
|
+
include RedAmber
|
111
|
+
```
|
112
|
+
|
113
|
+
### 例: diamonds データセット
|
114
|
+
|
115
|
+
もしまだであれば、Red DatasetsのArrow拡張を`
|
116
|
+
gem install red-datasets-arrow
|
117
|
+
`
|
118
|
+
としてインストールしてから次を実行してください。
|
119
|
+
|
120
|
+
```ruby
|
121
|
+
require 'datasets-arrow' # サンプルデータのロードのため
|
122
|
+
|
123
|
+
dataset = Datasets::Diamonds.new
|
124
|
+
diamonds = DataFrame.new(dataset) # v0.2.3以前では, `dataset.to_arrow`とする必要があります。
|
125
|
+
|
126
|
+
# =>
|
127
|
+
#<RedAmber::DataFrame : 53940 x 10 Vectors, 0x000000000000f668>
|
128
|
+
carat cut color clarity depth table price x ... z
|
129
|
+
<double> <string> <string> <string> <double> <double> <uint16> <double> ... <double>
|
130
|
+
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 ... 2.43
|
131
|
+
1 0.21 Premium E SI1 59.8 61.0 326 3.89 ... 2.31
|
132
|
+
2 0.23 Good E VS1 56.9 65.0 327 4.05 ... 2.31
|
133
|
+
3 0.29 Premium I VS2 62.4 58.0 334 4.2 ... 2.63
|
134
|
+
4 0.31 Good J SI2 63.3 58.0 335 4.34 ... 2.75
|
135
|
+
: : : : : : : : : ... :
|
136
|
+
53937 0.7 Very Good D SI1 62.8 60.0 2757 5.66 ... 3.56
|
137
|
+
53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 ... 3.74
|
138
|
+
53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 ... 3.64
|
139
|
+
```
|
140
|
+
|
141
|
+
例えば、1カラット以下のレコードに対し、cut毎の平均のpriceを求めるには次のようにします。
|
142
|
+
|
143
|
+
```ruby
|
144
|
+
df = diamonds
|
145
|
+
.slice { carat > 1 } # #sliceの代わりに#filterでも可
|
146
|
+
.group(:cut)
|
147
|
+
.mean(:price) # ここで:priceを指定する場合はgroupの前のpickは不要
|
148
|
+
.sort('-mean(price)')
|
149
|
+
|
150
|
+
# =>
|
151
|
+
#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000f67c>
|
152
|
+
cut mean(price)
|
153
|
+
<string> <double>
|
154
|
+
0 Ideal 8674.23
|
155
|
+
1 Premium 8487.25
|
156
|
+
2 Very Good 8340.55
|
157
|
+
3 Good 7753.6
|
158
|
+
4 Fair 7177.86
|
159
|
+
```
|
160
|
+
|
161
|
+
Arrowのデータはイミュータブルなので、これらのメソッドは新しいオブジェクトを返します。
|
162
|
+
|
163
|
+
次の例は、列をリネームしてから新しい列に簡単な計算の結果を格納します。
|
164
|
+
|
165
|
+
```ruby
|
166
|
+
usdjpy = 110.0 # 今よりずっと円高の頃
|
167
|
+
|
168
|
+
df.rename('mean(price)': :mean_price_USD)
|
169
|
+
.assign(:mean_price_JPY) { mean_price_USD * usdjpy }
|
170
|
+
|
171
|
+
# =>
|
172
|
+
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000000f71c>
|
173
|
+
cut mean_price_USD mean_price_JPY
|
174
|
+
<string> <double> <double>
|
175
|
+
0 Ideal 8674.23 954164.93
|
176
|
+
1 Premium 8487.25 933597.34
|
177
|
+
2 Very Good 8340.55 917460.37
|
178
|
+
3 Good 7753.6 852896.11
|
179
|
+
4 Fair 7177.86 789564.12
|
180
|
+
```
|
181
|
+
|
182
|
+
### 例: starwars データセット
|
183
|
+
|
184
|
+
次の例は、CSVファイルをダウンロードして`starwars` データセットを読み込みます。その後簡単なデータのクリーニングを行います。
|
185
|
+
|
186
|
+
```ruby
|
187
|
+
uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')
|
188
|
+
|
189
|
+
starwars = DataFrame.load(uri)
|
190
|
+
|
191
|
+
starwars
|
192
|
+
.drop(0) # 不要な列を取り除く
|
193
|
+
.remove { species == "NA" } # 不要な行を取り除く
|
194
|
+
.group(:species) { [count(:species), mean(:height, :mass)] }
|
195
|
+
.slice { count > 1 } # #filterでも可
|
196
|
+
|
197
|
+
# =>
|
198
|
+
#<RedAmber::DataFrame : 8 x 4 Vectors, 0x000000000000f848>
|
199
|
+
species count mean(height) mean(mass)
|
200
|
+
<string> <int64> <double> <double>
|
201
|
+
0 Human 35 176.65 82.78
|
202
|
+
1 Droid 6 131.2 69.75
|
203
|
+
2 Wookiee 2 231.0 124.0
|
204
|
+
3 Gungan 3 208.67 74.0
|
205
|
+
4 Zabrak 2 173.0 80.0
|
206
|
+
5 Twi'lek 2 179.0 55.0
|
207
|
+
6 Mirialan 2 168.0 53.1
|
208
|
+
7 Kaminoan 2 221.0 88.0
|
209
|
+
```
|
210
|
+
|
211
|
+
より詳しいデータフレームの使用例については、[DataFrame.md](doc/DataFrame.md) をご参照ください。
|
212
|
+
|
213
|
+
|
214
|
+
### 1次元のデータを保持する `Vector`
|
215
|
+
|
216
|
+
クラス`RedAmber::Vector` はデータフレームの中の列方向に格納された1次元のデータ列を保持します.
|
217
|
+
|
218
|
+
より詳しい使用例については [Vector.md](doc/Vector.md) をご参照ください。
|
219
|
+
|
220
|
+
## Jupyter notebook
|
221
|
+
|
222
|
+
Jupyter Notebook形式の使用例として、[Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
|
223
|
+
([raw file](https://raw.githubusercontent.com/heronshoes/docker-stacks/RedAmber-binder/binder/examples_of_red_amber.ipynb)) があります。データのロードから各種のデータ処理まで100以上の使用例を集めています。[Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=examples_of_red_amber.ipynb).
|
224
|
+
[](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=examples_of_red_amber.ipynb)で試すこともできます。
|
225
|
+
|
226
|
+
|
227
|
+
## 開発
|
228
|
+
|
229
|
+
```shell
|
230
|
+
git clone https://github.com/red-data-tools/red_amber.git
|
231
|
+
cd red_amber
|
232
|
+
bundle install
|
233
|
+
bundle exec rake test
|
234
|
+
```
|
235
|
+
|
236
|
+
rake testは必須ですが、rake rubocopをパスすることはコントリビュートの際に必須ではありません。このプロジェクトではコードの書き方の好みを尊重します。ただしマージの際に書き方を統一することがあります。
|
237
|
+
|
238
|
+
## コミュニティ
|
239
|
+
|
240
|
+
このプロジェクトを支援して頂けると嬉しいです。支援の方法はいくつかあります。
|
241
|
+
|
242
|
+
- [discussions](https://github.com/heronshoes/red_amber/discussions)で話をする [](https://github.com/red-data-tools/red_amber/discussions)
|
243
|
+
- Q and Aや使用方法、豆知識などを見る。
|
244
|
+
- 疑問に思っていることを質問する。
|
245
|
+
- 新しいアイデアを共有する。アイデアはdiscussionからissueに昇格させて育てていくこともあります。漠然としたアイデアでもdiscussionから始めて大きくしていきましょう。
|
246
|
+
- [バグ報告や新しい機能の提案](https://github.com/red-data-tools/red_amber/issues)
|
247
|
+
- バグの修正や[プルリクエスト](https://github.com/red-data-tools/red_amber/pulls)
|
248
|
+
- ドキュメントを修正したり、不明確なところを直したり、新しく追加する
|
249
|
+
|
250
|
+
## License
|
251
|
+
|
252
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/README.md
CHANGED
@@ -1,39 +1,43 @@
|
|
1
1
|
# RedAmber
|
2
2
|
|
3
3
|
[](https://rubygems.org/gems/red_amber)
|
4
|
-
[](https://github.com/
|
4
|
+
[](https://github.com/red-data-tools/red_amber/actions/workflows/ci.yml)
|
5
5
|
[](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
|
6
6
|
[](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
|
7
7
|
[](https://heronshoes.github.io/red_amber/)
|
8
|
-
[](https://github.com/
|
8
|
+
[](https://github.com/red-data-tools/red_amber/discussions)
|
9
9
|
|
10
10
|
A simple dataframe library for Ruby.
|
11
11
|
|
12
12
|
- Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
|
13
|
-
[](https://app.element.io/#/room/#red-data-tools_en:gitter.im) [](https://rubygems.org/gems/red-arrow)
|
14
14
|
- Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
|
15
15
|
|
16
|
-
|
16
|
+
[日本語のREADME](README.ja.md)
|
17
|
+
|
18
|
+

|
17
19
|
|
18
20
|
## Requirements
|
19
21
|
### Ruby
|
20
22
|
Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
|
21
|
-
- I decided to remove support for Ruby 2.7 without waiting for its EOL. See [Release note for v0.3.0](https://github.com/heronshoes/red_amber/discussions/162) for details.
|
22
23
|
|
23
|
-
###
|
24
|
+
### Required libraries
|
24
25
|
```ruby
|
25
|
-
gem 'red-arrow', '~>
|
26
|
-
gem 'red-parquet', '~>
|
27
|
-
gem '
|
26
|
+
gem 'red-arrow', '~> 12.0.0' # Requires Apache Arrow (see installation below)
|
27
|
+
gem 'red-parquet', '~> 12.0.0' # Optional, if you use IO from/to parquet
|
28
|
+
gem 'red-datasets-arrow' # Optional, if you use Red Datasets or random sampling feature
|
29
|
+
gem 'red-arrow-numo-narray' # Optional, recommended if you use inputs from Numo::NArray
|
30
|
+
gem 'red-arrow-activerecord' # Optional, if you use Active Record
|
31
|
+
gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
28
32
|
```
|
29
33
|
|
30
34
|
## Installation
|
31
35
|
|
32
36
|
Install requirements before you install RedAmber.
|
33
37
|
|
34
|
-
- Apache Arrow (~>
|
35
|
-
- Apache Arrow GLib (~>
|
36
|
-
- Apache Parquet GLib (~>
|
38
|
+
- Apache Arrow (~> 12.0.0)
|
39
|
+
- Apache Arrow GLib (~> 12.0.0)
|
40
|
+
- Apache Parquet GLib (~> 12.0.0) # If you use IO from/to parquet
|
37
41
|
|
38
42
|
See [Apache Arrow install document](https://arrow.apache.org/install/).
|
39
43
|
|
@@ -49,7 +53,7 @@ See [Apache Arrow install document](https://arrow.apache.org/install/).
|
|
49
53
|
sudo apt install -y -V libarrow-glib-dev
|
50
54
|
```
|
51
55
|
|
52
|
-
- On Fedora
|
56
|
+
- On Fedora 39 (Rawhide):
|
53
57
|
|
54
58
|
```
|
55
59
|
sudo dnf update
|
@@ -66,23 +70,26 @@ See [Apache Arrow install document](https://arrow.apache.org/install/).
|
|
66
70
|
If you prepared Apache Arrow, add these lines to your Gemfile:
|
67
71
|
|
68
72
|
```ruby
|
69
|
-
gem 'red-arrow', '~>
|
73
|
+
gem 'red-arrow', '~> 12.0.0'
|
70
74
|
gem 'red_amber'
|
71
|
-
gem 'red-parquet', '~>
|
72
|
-
gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
75
|
+
gem 'red-parquet', '~> 12.0.0' # Optional, if you use IO from/to parquet
|
73
76
|
gem 'red-datasets-arrow' # Optional, recommended if you use Red Datasets
|
74
77
|
gem 'red-arrow-numo-narray' # Optional, recommended if you use inputs from Numo::NArray
|
78
|
+
gem 'red-arrow-activerecord' # Optional, if you use Active Record
|
79
|
+
gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
|
75
80
|
```
|
76
81
|
|
77
82
|
And then execute `bundle install` or install them yourself such as `gem install red_amber`.
|
78
83
|
|
79
84
|
## Docker image and Jupyter Notebook
|
80
85
|
|
81
|
-
|
86
|
+
Docker image is available from `docker` folder. See [readme](docker/readme.md) for instruction. Integrated Jypyter notebook is in docker/notebook folder.
|
82
87
|
|
83
|
-
|
88
|
+
You can try the contents of this README interactively by [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb).
|
84
89
|
[](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb)
|
85
90
|
|
91
|
+
[RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to Kenta Murata).
|
92
|
+
|
86
93
|
## Comparison of DataFrames
|
87
94
|
|
88
95
|
Comparison of basic features of RedAmber with Python
|
@@ -95,7 +102,7 @@ Julia [Dataframes](https://dataframes.juliadata.org/stable/) is in [DataFrame_Co
|
|
95
102
|
Class `RedAmber::DataFrame` represents a set of data in 2D-shape.
|
96
103
|
Its entity is a Red Arrow's Table object.
|
97
104
|
|
98
|
-

|
99
106
|
|
100
107
|
Let's load the library and try some examples.
|
101
108
|
|
@@ -222,7 +229,7 @@ You can try this notebook on [Binder](https://mybinder.org/v2/gh/heronshoes/dock
|
|
222
229
|
## Development
|
223
230
|
|
224
231
|
```shell
|
225
|
-
git clone https://github.com/
|
232
|
+
git clone https://github.com/red-data-tools/red_amber.git
|
226
233
|
cd red_amber
|
227
234
|
bundle install
|
228
235
|
bundle exec rake test
|
@@ -232,12 +239,12 @@ bundle exec rake test
|
|
232
239
|
|
233
240
|
I will appreciate if you could help to improve this project. Here are a few ways you can help:
|
234
241
|
|
235
|
-
- Let's talk in the [discussions](https://github.com/heronshoes/red_amber/discussions). [](https://github.com/
|
242
|
+
- Let's talk in the [discussions](https://github.com/heronshoes/red_amber/discussions). [](https://github.com/red-data-tools/red_amber/discussions)
|
236
243
|
- Browse Q and A, how to use, tips, etc.
|
237
244
|
- Ask questions you’re wondering about.
|
238
245
|
- Share ideas. The idea may be promoted to issues or pull requests.
|
239
|
-
- [Report bugs or suggest new features](https://github.com/
|
240
|
-
- Fix bugs and [submit pull requests](https://github.com/
|
246
|
+
- [Report bugs or suggest new features](https://github.com/red-data-tools/red_amber/issues)
|
247
|
+
- Fix bugs and [submit pull requests](https://github.com/red-data-tools/red_amber/pulls)
|
241
248
|
- Write, clarify, or fix documentation
|
242
249
|
|
243
250
|
## License
|
data/benchmark/basic.yml
CHANGED
data/benchmark/group.yml
CHANGED
@@ -1,18 +1,19 @@
|
|
1
1
|
loop_count: 3
|
2
2
|
|
3
3
|
contexts:
|
4
|
-
- name: 0.2.2
|
5
|
-
gems:
|
6
|
-
red_amber: 0.2.2
|
7
4
|
- name: 0.3.0
|
8
5
|
gems:
|
9
6
|
red_amber: 0.3.0
|
7
|
+
- name: 0.4.2
|
8
|
+
gems:
|
9
|
+
red_amber: 0.4.2
|
10
10
|
- name: HEAD
|
11
11
|
prelude: |
|
12
12
|
$LOAD_PATH.unshift(File.expand_path('lib'))
|
13
13
|
|
14
14
|
prelude: |
|
15
15
|
require 'red_amber'
|
16
|
+
include RedAmber
|
16
17
|
require 'datasets-arrow'
|
17
18
|
|
18
19
|
ds = Datasets::Rdatasets.new('nycflights13', 'flights')
|
@@ -32,8 +33,14 @@ benchmark:
|
|
32
33
|
'G03: sum arr_delay, mean distance by flight': |
|
33
34
|
df.group(:flight) { [sum(:arr_delay), mean(:distance)] }
|
34
35
|
|
35
|
-
'G04:
|
36
|
+
'G04:filtersir_time, distance by flight': |
|
36
37
|
df.group(:flight).mean(:air_time, :distance)
|
37
38
|
|
38
|
-
'
|
39
|
+
'G75: sum dep_delay, arr_delay by carrer': |
|
39
40
|
df.group(:carrier).sum(:dep_delay, :arr_delay)
|
41
|
+
|
42
|
+
'G06: filters': |
|
43
|
+
Group.new(df, :dest).filters
|
44
|
+
|
45
|
+
'G07: inspect': |
|
46
|
+
Group.new(df, :dest).inspect
|
data/doc/CODE_OF_CONDUCT.md
CHANGED
@@ -39,7 +39,7 @@ This Code of Conduct applies within all community spaces, and also applies when
|
|
39
39
|
|
40
40
|
## Enforcement
|
41
41
|
|
42
|
-
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at
|
42
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at heronshoes877@gmail.com. All complaints will be reviewed and investigated promptly and fairly.
|
43
43
|
|
44
44
|
All community leaders are obligated to respect the privacy and security of the reporter of any incident.
|
45
45
|
|
data/docker/.env
ADDED