red_amber 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 264e7637475fd01946900335751a1592a3859e9bfa772ecc0800ab05c4d852f0
4
- data.tar.gz: a57400445419698a66d6b5c94e15fa8c040f2f3930f9fbf75603ffb6e18bd9cf
3
+ metadata.gz: b8fc1df498792b2b30d63a47a783cda67ccb8cea09e933aa8cba5d317277f500
4
+ data.tar.gz: 83e54f0fb6070a6b3c4301d0cd3e5356f1ca4e09bdae200f4fc7694a2e3e7daa
5
5
  SHA512:
6
- metadata.gz: 0fdbcdb732e36bb866a8251800ab3fa1a714fa075234bf8cd516f2542ab6704ebfa429a7177da2bd8cd6fa6eb1158efb0d68f46f43d1dc088a9a0f0debdc5c54
7
- data.tar.gz: f9c1dffaa157ecf34b0b4fec6c1d7972b4773bbf7a11101a345172d621753cd9fc3818753b329dd2906a506af294d6a96c0180a0fb4dc84c2b54bceef6b520f5
6
+ metadata.gz: 440dd984e88afd4bee7860a0f5b03c54094b8536de6acf70b770d3f473c1ee93608a5565d62b5bd60d5e8e3ba8c09675e136373a15daef77bfc455c7c5a4a7cc
7
+ data.tar.gz: '09f27ff2a0c3804b345c4b5c581013135544fed098265e9bca274e34de52791b9a36525df22b17fe80093f14249c195496366e16079c25e70cf660070dc66858'
data/.rubocop.yml CHANGED
@@ -52,7 +52,7 @@ Lint/BinaryOperatorWithIdenticalOperands:
52
52
 
53
53
  Lint/Debugger:
54
54
  Exclude:
55
- - 'bin/example'
55
+ - 'docker/example'
56
56
 
57
57
  # Need for test with empty block
58
58
  # Offense count: 1
@@ -76,7 +76,8 @@ Metrics/AbcSize:
76
76
  Max: 30
77
77
  CountRepeatedAttributes: false
78
78
  AllowedMethods: [
79
- 'join', # 51.87
79
+ 'join_merge_keys', # 54.18
80
+ 'join', # 53.1
80
81
  'dataframe_info', # 46.5
81
82
  'format_table', # 84.62
82
83
  'to_long', # 33.66
@@ -87,6 +88,9 @@ Metrics/AbcSize:
87
88
  '[]', # 33.76
88
89
  'split', # 37.35
89
90
  'aggregate', # 38.13
91
+ 'filters', # 33.91
92
+ 'merge_keys', # 32.17
93
+ 'rename_keys', # 31.64
90
94
  ]
91
95
 
92
96
  # Max: 25
@@ -139,10 +143,12 @@ Metrics/MethodLength:
139
143
  Max: 30
140
144
  AllowedMethods: [
141
145
  'join', # 47
142
- 'dataframe_info', # 33
146
+ 'join_merge_keys', # 41
143
147
  'format_table', # 53
144
148
  'slice_by', # 38
145
149
  'assign_update', # 35
150
+ 'summarize', # 35
151
+ 'dataframe_info', # 33
146
152
  'drop', # 32
147
153
  'aggregate', # 31
148
154
  ]
@@ -219,7 +225,7 @@ Naming/PredicateName:
219
225
  Rubycw/Rubycw:
220
226
  Exclude:
221
227
  - 'test/**/*'
222
- - 'bin/example'
228
+ - 'docker/example'
223
229
 
224
230
  # Offense count: 16
225
231
  # This cop supports safe autocorrection (--autocorrect).
@@ -236,7 +242,7 @@ Style/SlicingWithRange:
236
242
 
237
243
  Style/MixinUsage:
238
244
  Exclude:
239
- - 'bin/example'
245
+ - 'docker/example'
240
246
 
241
247
  # Necessary to Vector < 0 element-wise comparison
242
248
  # Offense count: 5
data/CHANGELOG.md CHANGED
@@ -1,3 +1,95 @@
1
+ ## [0.5.0] - 2023/05-24
2
+
3
+ - Breaking change
4
+ - Use non keyword argument in #sub_by_value (#219)
5
+ - Upgrade dependency to Arrow 12.0.0 (#238)
6
+ - right_join will output columns as same order as Red Arrow.
7
+ - DataFrame#join will not force ordering of original column by default
8
+ - Join with type, such as full_join, sort after join by default
9
+
10
+ - Bug fixes
11
+ - Use truncate in Vector#sample(float) (#229)
12
+ - Support options in DataFrame#tdra (#231)
13
+ - Fix printing table with non-ascii strings (#233)
14
+ - Fix join for Arrow 12.0.0
15
+
16
+ - New features and improvements
17
+ - Add a singleton method Vector.[] (#218)
18
+ - Add an alias #sub_group (#219)
19
+ - Accept Group#summarize{Hash} to rename aggregated columns (#219)
20
+ - Add Group#group_frame (#219)
21
+ - Add Vector#cast (#224)
22
+ - Add Vector#fill_nil(value) (#226)
23
+ - Add Vector#one (#227)
24
+ - Add Vector#mode (#228)
25
+ - Add DataFrame#propagate (#235)
26
+ - Add DataFrame#sample (#237)
27
+ - Add DataFrame#shuffle (#237)
28
+ - Support RankOptions in Vector#rank (#239)
29
+ - Introduce MatchSubstringOptions family in Vector (#241)
30
+ - Introduce Vector#match_substring?
31
+ - Add Vector#end_with?, #start_with? method
32
+ - Add Vector#match_like?
33
+ - Add Vector#count_substring method
34
+
35
+ - Refactoring
36
+ - Refine Group and SubFrames function (#219)
37
+ - Refine Group#group_count
38
+ - Use Acero in Group#filters
39
+ - Refine Group#filters, not using Acero
40
+ - Refine Group#summarize(array)
41
+ - Use Acero for renaming columns in join (#238)
42
+ - Use index kernel with IndexOptions introduced in 12.0.0 (#240)
43
+
44
+ - Improve in tests/CI
45
+ - Use Fedra 39 Rawhide in CI (#238)
46
+
47
+ - Documentation and Example
48
+ - Add missing yard documents for SubFrames::Selectors (#219)
49
+ - Update docker/example (#219)
50
+ - Update Gemfile in docker (#219)
51
+ - Add README.ja.md (#242)
52
+
53
+ - GitHub site
54
+ - Update link of Red Data Tools Chat to matrix (#242)
55
+
56
+ - Thanks
57
+
58
+ ## [0.4.2] - 2023-04-02
59
+
60
+ - Breaking change
61
+
62
+ - Bug fixes
63
+ - Fix Vector#modulo, #fdiv, #remainder (#203)
64
+
65
+ - New features and improvements
66
+ - Update SubFrames#take to return SubFrames (#212)
67
+
68
+ - Refactoring
69
+ - Refine SubFrames to support partial retrieval (#207)
70
+ - Upgrade SubFrames#frames and promote to public (#207)
71
+ - Use faster count in Group#inspect (#207)
72
+
73
+ - Improve in tests/CI
74
+
75
+ - Documentation and Example
76
+ - Introduce minimum docker environment (#205)
77
+ - Move example REPL to docker (#205)
78
+ - Add readme.md in docker (#205)
79
+ - Add example_of_red_amber.ipynb (#205)
80
+ - Use smaller dataset in irb example
81
+ - Fix docker/example
82
+ - Updated link to red-data-tools (#213)
83
+ - Thanks to Soumya Kushwaha
84
+
85
+ - GitHub site
86
+ - Migrated to [Red Data Tools](https://github.com/red-data-tools)
87
+ - Thanks to Sutou Kouhei
88
+
89
+ - Thanks
90
+ - Sutou Kouhei
91
+ - Soumya Kushwaha
92
+
1
93
  ## [0.4.1] - 2023-03-11
2
94
 
3
95
  - Breaking change
@@ -676,7 +768,7 @@
676
768
  - Improve `DataFrame#[]`, `#slice`, `#remove` by a new engine
677
769
  - It parses arguments to Vector internally.
678
770
  - Used Kernel#Array to simplify code (#16) .
679
- - recycle: Move `DataFrame#slice`, `#remove` to Selectable
771
+ - Move `DataFrame#slice`, `#remove` to Selectable
680
772
  - Refine `DataFrame#take`, `#filter` (undocumented)
681
773
 
682
774
  - Introduce coerce in Vector (#35)
data/Gemfile CHANGED
@@ -7,7 +7,7 @@ gemspec
7
7
  group :test do
8
8
  gem 'rake'
9
9
 
10
- gem 'red-parquet', '~> 11.0.0'
10
+ gem 'red-parquet', '~> 12.0.0'
11
11
  gem 'rover-df', '~> 0.3.0'
12
12
 
13
13
  gem 'rubocop'
@@ -15,14 +15,13 @@ group :test do
15
15
  gem 'rubocop-rake'
16
16
  gem 'rubocop-rubycw', require: false
17
17
 
18
- gem 'iruby'
19
- gem 'test-unit'
20
- gem 'webrick'
21
- gem 'yard'
22
-
23
18
  gem 'benchmark_driver'
19
+ gem 'iruby'
24
20
  gem 'red-arrow-numo-narray'
25
21
  gem 'red-datasets-arrow'
26
22
  gem 'simplecov'
27
23
  gem 'simplecov-json'
24
+ gem 'test-unit'
25
+ gem 'webrick'
26
+ gem 'yard'
28
27
  end
data/README.ja.md ADDED
@@ -0,0 +1,252 @@
1
+ # RedAmber
2
+
3
+ [![Gem Version](https://img.shields.io/gem/v/red_amber?color=brightgreen)](https://rubygems.org/gems/red_amber)
4
+ [![CI](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml/badge.svg)](https://github.com/red-data-tools/red_amber/actions/workflows/ci.yml)
5
+ [![Maintainability](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/maintainability)](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
6
+ [![Test coverage](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/test_coverage)](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
7
+ [![Doc](https://img.shields.io/badge/docs-latest-blue)](https://heronshoes.github.io/red_amber/)
8
+ [![Discussions](https://img.shields.io/github/discussions/heronshoes/red_amber)](https://github.com/red-data-tools/red_amber/discussions)
9
+
10
+ Rubyistのためのデータフレームライブラリ.
11
+
12
+ - Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
13
+ [![Red Data Tools Chat (ja)](https://badges.gitter.im/red-data-tools/en.svg)](https://app.element.io/#/room/#red-data-tools_ja:gitter.im) [![Gem Version](https://img.shields.io/gem/v/red-arrow?color=brightgreen)](https://rubygems.org/gems/red-arrow)
14
+ - Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
15
+
16
+ [README in English](README.md)
17
+
18
+ ![screenshot from jupyterlab](https://raw.githubusercontent.com/red-data-tools/red_amber/main/doc/image/screenshot.png)
19
+
20
+ ## 必要な環境
21
+ ### Ruby
22
+ - Ruby 3.0 以上.
23
+
24
+ ### ライブラリ
25
+ ```ruby
26
+ gem 'red-arrow', '~> 12.0.0' # お使いの環境に合わせた Apache Arrow が必要です(下記のインストールを参照してください)
27
+ gem 'red-parquet', '~> 12.0.0' # 必要に応じて。Parquetの入出力が必要な場合。
28
+ gem 'red-datasets-arrow' # 必要に応じて。Red Datasets またはランダムサンプリングが必要な場合。
29
+ gem 'red-arrow-activerecord' # 必要に応じて。Active Record とのデータ交換が必要な場合。
30
+ gem 'rover-df', '~> 0.3.0' # 必要に応じて。Rover::DataFrameに対する入出力が必要な場合。
31
+ ```
32
+
33
+ ## インストール
34
+
35
+ RedAmberをインストールする前に、下記のライブラリのインストールが必要です。
36
+
37
+ - Apache Arrow (~> 12.0.0)
38
+ - Apache Arrow GLib (~> 12.0.0)
39
+ - Apache Parquet GLib (~> 12.0.0) # Parquetの入出力が必要な場合。
40
+
41
+ 環境ごとの詳しいインストール方法は、 [Apache Arrow install document](https://arrow.apache.org/install/) を参照してください。
42
+
43
+ - Ubuntuの場合の最低限必要なインストール例:
44
+
45
+ ```
46
+ sudo apt update
47
+ sudo apt install -y -V ca-certificates lsb-release wget
48
+ wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
49
+ sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
50
+ sudo apt update
51
+ sudo apt install -y -V libarrow-dev
52
+ sudo apt install -y -V libarrow-glib-dev
53
+ ```
54
+
55
+ - Fedora 39 (Rawhide)の場合:
56
+
57
+ ```
58
+ sudo dnf update
59
+ sudo dnf -y install gcc-c++ libarrow-devel libarrow-glib-devel ruby-devel
60
+ ```
61
+
62
+ - macOS の場合は、Homebrewを使用する:
63
+
64
+ ```
65
+ brew install apache-arrow
66
+ brew install apache-arrow-glib
67
+ ```
68
+
69
+ Apache Arrowがインストールできたら、下記の行をGemfileに追加してください:
70
+
71
+ ```ruby
72
+ gem 'red-arrow', '~> 12.0.0' # お使いの環境に合わせた Apache Arrow が必要です(下記のインストールを参照してください)
73
+ gem 'red_amber'
74
+ gem 'red-parquet', '~> 12.0.0' # 必要に応じて。Parquetの入出力が必要な場合。
75
+ gem 'red-datasets-arrow' # 必要に応じて。Red Datasets またはランダムサンプリングが必要な場合。
76
+ gem 'red-arrow-numo-narray' # 必要に応じて。Numo::NArrayとの連携が必要な場合
77
+ gem 'red-arrow-activerecord' # 必要に応じて。Active Record とのデータ交換が必要な場合。
78
+ gem 'rover-df', '~> 0.3.0' # 必要に応じて。Rover::DataFrameに対する入出力が必要な場合。
79
+ ```
80
+
81
+ `bundle install`とするか、または `gem install red_amber`としてインストールしてください。
82
+
83
+ ## Docker イメージと Jupyter Notebook
84
+
85
+ このリポジトリの`docker` フォルダーから Docker コンテナ環境を生成できます。リポジトリをクローンしてから、dockerフォルダーにある [readme](docker/readme.md) を参照してください。その環境では `docker/notebook` フォルダーにある Jupyter Notebookイメージを試用できます。
86
+
87
+ このREADMEの内容をネットワーク上のJupyter Notebookでインタラクティブに試用することも出来ます。 [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb).
88
+ [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb)
89
+
90
+ Jupyter Notebookの環境を含めた他の多くのデータ処理用のライブラリーとともにRedAmberもパッケージングされたDocker Imageとして、[RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) が利用できます(Thanks to Kenta Murata).
91
+
92
+ ## 他のデータフレームライブラリとの比較表
93
+
94
+ RedAmberの基本的な機能をPython
95
+ [pandas](https://pandas.pydata.org/) や
96
+ R [Tidyverse](https://www.tidyverse.org/) や
97
+ Julia [Dataframes](https://dataframes.juliadata.org/stable/) と比較した表は [DataFrame_Comparison.md](doc/DataFrame_Comparison.md) にあります(Thanks to Benson Muite).
98
+
99
+ ## `RedAmber`のデータフレーム
100
+
101
+ クラス `RedAmber::DataFrame` は2次元のデータの集まりを表現します。
102
+ その実体は Red Arrowの Tableオブジェクトです。
103
+
104
+ ![dataframe model of RedAmber](https://raw.githubusercontent.com/red-data-tools/red_amber/main/doc/image/dataframe_model.png)
105
+
106
+ それではライブラリをロードしていくつかの例を試してみましょう。
107
+
108
+ ```ruby
109
+ require 'red_amber' # require 'red-amber' でもOKです.
110
+ include RedAmber
111
+ ```
112
+
113
+ ### 例: diamonds データセット
114
+
115
+ もしまだであれば、Red DatasetsのArrow拡張を`
116
+ gem install red-datasets-arrow
117
+ `
118
+ としてインストールしてから次を実行してください。
119
+
120
+ ```ruby
121
+ require 'datasets-arrow' # サンプルデータのロードのため
122
+
123
+ dataset = Datasets::Diamonds.new
124
+ diamonds = DataFrame.new(dataset) # v0.2.3以前では, `dataset.to_arrow`とする必要があります。
125
+
126
+ # =>
127
+ #<RedAmber::DataFrame : 53940 x 10 Vectors, 0x000000000000f668>
128
+ carat cut color clarity depth table price x ... z
129
+ <double> <string> <string> <string> <double> <double> <uint16> <double> ... <double>
130
+ 0 0.23 Ideal E SI2 61.5 55.0 326 3.95 ... 2.43
131
+ 1 0.21 Premium E SI1 59.8 61.0 326 3.89 ... 2.31
132
+ 2 0.23 Good E VS1 56.9 65.0 327 4.05 ... 2.31
133
+ 3 0.29 Premium I VS2 62.4 58.0 334 4.2 ... 2.63
134
+ 4 0.31 Good J SI2 63.3 58.0 335 4.34 ... 2.75
135
+ : : : : : : : : : ... :
136
+ 53937 0.7 Very Good D SI1 62.8 60.0 2757 5.66 ... 3.56
137
+ 53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 ... 3.74
138
+ 53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 ... 3.64
139
+ ```
140
+
141
+ 例えば、1カラット以下のレコードに対し、cut毎の平均のpriceを求めるには次のようにします。
142
+
143
+ ```ruby
144
+ df = diamonds
145
+ .slice { carat > 1 } # #sliceの代わりに#filterでも可
146
+ .group(:cut)
147
+ .mean(:price) # ここで:priceを指定する場合はgroupの前のpickは不要
148
+ .sort('-mean(price)')
149
+
150
+ # =>
151
+ #<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000f67c>
152
+ cut mean(price)
153
+ <string> <double>
154
+ 0 Ideal 8674.23
155
+ 1 Premium 8487.25
156
+ 2 Very Good 8340.55
157
+ 3 Good 7753.6
158
+ 4 Fair 7177.86
159
+ ```
160
+
161
+ Arrowのデータはイミュータブルなので、これらのメソッドは新しいオブジェクトを返します。
162
+
163
+ 次の例は、列をリネームしてから新しい列に簡単な計算の結果を格納します。
164
+
165
+ ```ruby
166
+ usdjpy = 110.0 # 今よりずっと円高の頃
167
+
168
+ df.rename('mean(price)': :mean_price_USD)
169
+ .assign(:mean_price_JPY) { mean_price_USD * usdjpy }
170
+
171
+ # =>
172
+ #<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000000f71c>
173
+ cut mean_price_USD mean_price_JPY
174
+ <string> <double> <double>
175
+ 0 Ideal 8674.23 954164.93
176
+ 1 Premium 8487.25 933597.34
177
+ 2 Very Good 8340.55 917460.37
178
+ 3 Good 7753.6 852896.11
179
+ 4 Fair 7177.86 789564.12
180
+ ```
181
+
182
+ ### 例: starwars データセット
183
+
184
+ 次の例は、CSVファイルをダウンロードして`starwars` データセットを読み込みます。その後簡単なデータのクリーニングを行います。
185
+
186
+ ```ruby
187
+ uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')
188
+
189
+ starwars = DataFrame.load(uri)
190
+
191
+ starwars
192
+ .drop(0) # 不要な列を取り除く
193
+ .remove { species == "NA" } # 不要な行を取り除く
194
+ .group(:species) { [count(:species), mean(:height, :mass)] }
195
+ .slice { count > 1 } # #filterでも可
196
+
197
+ # =>
198
+ #<RedAmber::DataFrame : 8 x 4 Vectors, 0x000000000000f848>
199
+ species count mean(height) mean(mass)
200
+ <string> <int64> <double> <double>
201
+ 0 Human 35 176.65 82.78
202
+ 1 Droid 6 131.2 69.75
203
+ 2 Wookiee 2 231.0 124.0
204
+ 3 Gungan 3 208.67 74.0
205
+ 4 Zabrak 2 173.0 80.0
206
+ 5 Twi'lek 2 179.0 55.0
207
+ 6 Mirialan 2 168.0 53.1
208
+ 7 Kaminoan 2 221.0 88.0
209
+ ```
210
+
211
+ より詳しいデータフレームの使用例については、[DataFrame.md](doc/DataFrame.md) をご参照ください。
212
+
213
+
214
+ ### 1次元のデータを保持する `Vector`
215
+
216
+ クラス`RedAmber::Vector` はデータフレームの中の列方向に格納された1次元のデータ列を保持します.
217
+
218
+ より詳しい使用例については [Vector.md](doc/Vector.md) をご参照ください。
219
+
220
+ ## Jupyter notebook
221
+
222
+ Jupyter Notebook形式の使用例として、[Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
223
+ ([raw file](https://raw.githubusercontent.com/heronshoes/docker-stacks/RedAmber-binder/binder/examples_of_red_amber.ipynb)) があります。データのロードから各種のデータ処理まで100以上の使用例を集めています。[Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=examples_of_red_amber.ipynb).
224
+ [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=examples_of_red_amber.ipynb)で試すこともできます。
225
+
226
+
227
+ ## 開発
228
+
229
+ ```shell
230
+ git clone https://github.com/red-data-tools/red_amber.git
231
+ cd red_amber
232
+ bundle install
233
+ bundle exec rake test
234
+ ```
235
+
236
+ rake testは必須ですが、rake rubocopをパスすることはコントリビュートの際に必須ではありません。このプロジェクトではコードの書き方の好みを尊重します。ただしマージの際に書き方を統一することがあります。
237
+
238
+ ## コミュニティ
239
+
240
+ このプロジェクトを支援して頂けると嬉しいです。支援の方法はいくつかあります。
241
+
242
+ - [discussions](https://github.com/heronshoes/red_amber/discussions)で話をする [![Discussions](https://img.shields.io/github/discussions/heronshoes/red_amber)](https://github.com/red-data-tools/red_amber/discussions)
243
+ - Q and Aや使用方法、豆知識などを見る。
244
+ - 疑問に思っていることを質問する。
245
+ - 新しいアイデアを共有する。アイデアはdiscussionからissueに昇格させて育てていくこともあります。漠然としたアイデアでもdiscussionから始めて大きくしていきましょう。
246
+ - [バグ報告や新しい機能の提案](https://github.com/red-data-tools/red_amber/issues)
247
+ - バグの修正や[プルリクエスト](https://github.com/red-data-tools/red_amber/pulls)
248
+ - ドキュメントを修正したり、不明確なところを直したり、新しく追加する
249
+
250
+ ## License
251
+
252
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/README.md CHANGED
@@ -1,39 +1,43 @@
1
1
  # RedAmber
2
2
 
3
3
  [![Gem Version](https://img.shields.io/gem/v/red_amber?color=brightgreen)](https://rubygems.org/gems/red_amber)
4
- [![CI](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml/badge.svg)](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml)
4
+ [![CI](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml/badge.svg)](https://github.com/red-data-tools/red_amber/actions/workflows/ci.yml)
5
5
  [![Maintainability](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/maintainability)](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
6
6
  [![Test coverage](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/test_coverage)](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
7
7
  [![Doc](https://img.shields.io/badge/docs-latest-blue)](https://heronshoes.github.io/red_amber/)
8
- [![Discussions](https://img.shields.io/github/discussions/heronshoes/red_amber)](https://github.com/heronshoes/red_amber/discussions)
8
+ [![Discussions](https://img.shields.io/github/discussions/heronshoes/red_amber)](https://github.com/red-data-tools/red_amber/discussions)
9
9
 
10
10
  A simple dataframe library for Ruby.
11
11
 
12
12
  - Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
13
- [![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en) [![Gem Version](https://img.shields.io/gem/v/red-arrow?color=brightgreen)](https://rubygems.org/gems/red-arrow)
13
+ [![Red Data Tools Chat (en)](https://badges.gitter.im/red-data-tools/en.svg)](https://app.element.io/#/room/#red-data-tools_en:gitter.im) [![Gem Version](https://img.shields.io/gem/v/red-arrow?color=brightgreen)](https://rubygems.org/gems/red-arrow)
14
14
  - Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
15
15
 
16
- ![screenshot from jupyterlab](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/screenshot.png)
16
+ [日本語のREADME](README.ja.md)
17
+
18
+ ![screenshot from jupyterlab](https://raw.githubusercontent.com/red-data-tools/red_amber/main/doc/image/screenshot.png)
17
19
 
18
20
  ## Requirements
19
21
  ### Ruby
20
22
  Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
21
- - I decided to remove support for Ruby 2.7 without waiting for its EOL. See [Release note for v0.3.0](https://github.com/heronshoes/red_amber/discussions/162) for details.
22
23
 
23
- ### Libraries
24
+ ### Required libraries
24
25
  ```ruby
25
- gem 'red-arrow', '~> 11.0.0' # Requires Apache Arrow (see installation below)
26
- gem 'red-parquet', '~> 11.0.0' # Optional, if you use IO from/to parquet
27
- gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
26
+ gem 'red-arrow', '~> 12.0.0' # Requires Apache Arrow (see installation below)
27
+ gem 'red-parquet', '~> 12.0.0' # Optional, if you use IO from/to parquet
28
+ gem 'red-datasets-arrow' # Optional, if you use Red Datasets or random sampling feature
29
+ gem 'red-arrow-numo-narray' # Optional, recommended if you use inputs from Numo::NArray
30
+ gem 'red-arrow-activerecord' # Optional, if you use Active Record
31
+ gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
28
32
  ```
29
33
 
30
34
  ## Installation
31
35
 
32
36
  Install requirements before you install RedAmber.
33
37
 
34
- - Apache Arrow (~> 11.0.0)
35
- - Apache Arrow GLib (~> 11.0.0)
36
- - Apache Parquet GLib (~> 11.0.0) # If you use IO from/to parquet
38
+ - Apache Arrow (~> 12.0.0)
39
+ - Apache Arrow GLib (~> 12.0.0)
40
+ - Apache Parquet GLib (~> 12.0.0) # If you use IO from/to parquet
37
41
 
38
42
  See [Apache Arrow install document](https://arrow.apache.org/install/).
39
43
 
@@ -49,7 +53,7 @@ See [Apache Arrow install document](https://arrow.apache.org/install/).
49
53
  sudo apt install -y -V libarrow-glib-dev
50
54
  ```
51
55
 
52
- - On Fedora 38 (Rawhide):
56
+ - On Fedora 39 (Rawhide):
53
57
 
54
58
  ```
55
59
  sudo dnf update
@@ -66,23 +70,26 @@ See [Apache Arrow install document](https://arrow.apache.org/install/).
66
70
  If you prepared Apache Arrow, add these lines to your Gemfile:
67
71
 
68
72
  ```ruby
69
- gem 'red-arrow', '~> 11.0.0'
73
+ gem 'red-arrow', '~> 12.0.0'
70
74
  gem 'red_amber'
71
- gem 'red-parquet', '~> 11.0.0' # Optional, if you use IO from/to parquet
72
- gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
75
+ gem 'red-parquet', '~> 12.0.0' # Optional, if you use IO from/to parquet
73
76
  gem 'red-datasets-arrow' # Optional, recommended if you use Red Datasets
74
77
  gem 'red-arrow-numo-narray' # Optional, recommended if you use inputs from Numo::NArray
78
+ gem 'red-arrow-activerecord' # Optional, if you use Active Record
79
+ gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
75
80
  ```
76
81
 
77
82
  And then execute `bundle install` or install them yourself such as `gem install red_amber`.
78
83
 
79
84
  ## Docker image and Jupyter Notebook
80
85
 
81
- [RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to Kenta Murata).
86
+ Docker image is available from `docker` folder. See [readme](docker/readme.md) for instruction. Integrated Jypyter notebook is in docker/notebook folder.
82
87
 
83
- Also you can try the contents of this README interactively by [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb).
88
+ You can try the contents of this README interactively by [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb).
84
89
  [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb)
85
90
 
91
+ [RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to Kenta Murata).
92
+
86
93
  ## Comparison of DataFrames
87
94
 
88
95
  Comparison of basic features of RedAmber with Python
@@ -95,7 +102,7 @@ Julia [Dataframes](https://dataframes.juliadata.org/stable/) is in [DataFrame_Co
95
102
  Class `RedAmber::DataFrame` represents a set of data in 2D-shape.
96
103
  Its entity is a Red Arrow's Table object.
97
104
 
98
- ![dataframe model of RedAmber](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/dataframe_model.png)
105
+ ![dataframe model of RedAmber](https://raw.githubusercontent.com/red-data-tools/red_amber/main/doc/image/dataframe_model.png)
99
106
 
100
107
  Let's load the library and try some examples.
101
108
 
@@ -222,7 +229,7 @@ You can try this notebook on [Binder](https://mybinder.org/v2/gh/heronshoes/dock
222
229
  ## Development
223
230
 
224
231
  ```shell
225
- git clone https://github.com/heronshoes/red_amber.git
232
+ git clone https://github.com/red-data-tools/red_amber.git
226
233
  cd red_amber
227
234
  bundle install
228
235
  bundle exec rake test
@@ -232,12 +239,12 @@ bundle exec rake test
232
239
 
233
240
  I will appreciate if you could help to improve this project. Here are a few ways you can help:
234
241
 
235
- - Let's talk in the [discussions](https://github.com/heronshoes/red_amber/discussions). [![Discussions](https://img.shields.io/github/discussions/heronshoes/red_amber)](https://github.com/heronshoes/red_amber/discussions)
242
+ - Let's talk in the [discussions](https://github.com/heronshoes/red_amber/discussions). [![Discussions](https://img.shields.io/github/discussions/heronshoes/red_amber)](https://github.com/red-data-tools/red_amber/discussions)
236
243
  - Browse Q and A, how to use, tips, etc.
237
244
  - Ask questions you’re wondering about.
238
245
  - Share ideas. The idea may be promoted to issues or pull requests.
239
- - [Report bugs or suggest new features](https://github.com/heronshoes/red_amber/issues)
240
- - Fix bugs and [submit pull requests](https://github.com/heronshoes/red_amber/pulls)
246
+ - [Report bugs or suggest new features](https://github.com/red-data-tools/red_amber/issues)
247
+ - Fix bugs and [submit pull requests](https://github.com/red-data-tools/red_amber/pulls)
241
248
  - Write, clarify, or fix documentation
242
249
 
243
250
  ## License
data/benchmark/basic.yml CHANGED
@@ -43,7 +43,7 @@ benchmark:
43
43
  'B06: Pick by a block': |
44
44
  df.pick { keys.map { |key| key.end_with?('time') } }
45
45
 
46
- 'B07: Slice([]) by a index': |
46
+ 'B07: Slice([]) by an index': |
47
47
  df[877]
48
48
 
49
49
  'B08: Slice by indeces': |
data/benchmark/group.yml CHANGED
@@ -1,18 +1,19 @@
1
1
  loop_count: 3
2
2
 
3
3
  contexts:
4
- - name: 0.2.2
5
- gems:
6
- red_amber: 0.2.2
7
4
  - name: 0.3.0
8
5
  gems:
9
6
  red_amber: 0.3.0
7
+ - name: 0.4.2
8
+ gems:
9
+ red_amber: 0.4.2
10
10
  - name: HEAD
11
11
  prelude: |
12
12
  $LOAD_PATH.unshift(File.expand_path('lib'))
13
13
 
14
14
  prelude: |
15
15
  require 'red_amber'
16
+ include RedAmber
16
17
  require 'datasets-arrow'
17
18
 
18
19
  ds = Datasets::Rdatasets.new('nycflights13', 'flights')
@@ -32,8 +33,14 @@ benchmark:
32
33
  'G03: sum arr_delay, mean distance by flight': |
33
34
  df.group(:flight) { [sum(:arr_delay), mean(:distance)] }
34
35
 
35
- 'G04: mean air_time, distance by flight': |
36
+ 'G04:filtersir_time, distance by flight': |
36
37
  df.group(:flight).mean(:air_time, :distance)
37
38
 
38
- 'G05: sum dep_delay, arr_delay by carrer': |
39
+ 'G75: sum dep_delay, arr_delay by carrer': |
39
40
  df.group(:carrier).sum(:dep_delay, :arr_delay)
41
+
42
+ 'G06: filters': |
43
+ Group.new(df, :dest).filters
44
+
45
+ 'G07: inspect': |
46
+ Group.new(df, :dest).inspect
@@ -39,7 +39,7 @@ This Code of Conduct applies within all community spaces, and also applies when
39
39
 
40
40
  ## Enforcement
41
41
 
42
- Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at 63298319+heronshoes@users.noreply.github.com. All complaints will be reviewed and investigated promptly and fairly.
42
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at heronshoes877@gmail.com. All complaints will be reviewed and investigated promptly and fairly.
43
43
 
44
44
  All community leaders are obligated to respect the privacy and security of the reporter of any incident.
45
45
 
data/docker/.env ADDED
@@ -0,0 +1,4 @@
1
+ NB_USER=jovyan
2
+
3
+ TZ=
4
+ TOKEN=''