red_amber 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +39 -20
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +113 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +25 -26
  8. data/benchmark/basic.yml +2 -2
  9. data/benchmark/combine.yml +2 -2
  10. data/benchmark/dataframe.yml +2 -2
  11. data/benchmark/group.yml +2 -2
  12. data/benchmark/reshape.yml +2 -2
  13. data/benchmark/vector.yml +3 -0
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +429 -75
  20. data/lib/red_amber/data_frame_combinable.rb +516 -66
  21. data/lib/red_amber/data_frame_displayable.rb +244 -14
  22. data/lib/red_amber/data_frame_indexable.rb +121 -18
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +622 -66
  26. data/lib/red_amber/data_frame_variable_operation.rb +446 -34
  27. data/lib/red_amber/group.rb +187 -22
  28. data/lib/red_amber/helper.rb +70 -10
  29. data/lib/red_amber/refinements.rb +12 -5
  30. data/lib/red_amber/subframes.rb +1066 -0
  31. data/lib/red_amber/vector.rb +385 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +387 -0
  34. data/lib/red_amber/vector_selectable.rb +217 -12
  35. data/lib/red_amber/vector_unary_element_wise.rb +436 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 78fa72064f9494f0f756f15cf1daaacb3640535e899ba71ab080730c0d61b0b2
4
- data.tar.gz: 3f2de4a449c38eb995ebcc0394a1a93633f097e533696edfa91267a596dcb580
3
+ metadata.gz: aa6f3c47b47df7271d7d150a800013c7c9d8bd75ca6066f54506c922f12eea09
4
+ data.tar.gz: 763f19f54a6508648fe9f1bdd0a11f678a86f554b58b71d7bed66aa5df7df2a7
5
5
  SHA512:
6
- metadata.gz: 45a7c37cc746c606e8d4d2a43005da8154b60df21bf2cf6b2bafa9f7ad5f962a3e3c8e2f931e6543b20b8f6cd8c8a447b99b7f0127854d3bb716ea763ab3cae5
7
- data.tar.gz: b3ac4479df1e30b75e7ccfcc48b09f709cea536c98072bfe937ae283c0cc1d203ab97388cf6f57c39fd31c6beceadcb850c3f14e8e07e5e196cc0c862634f36d
6
+ metadata.gz: 433ca52f7a62f055f327e0426426cfd86f563009e4ec4811d7cf8297152309081271b7b7625d39ffa31ecf455d352ee305d76b6d09e4d1dab0d90aa6c2bffb3e
7
+ data.tar.gz: 717d8618dd428d165c80420e7c35f1b7f870a059227a91bd5224f67b9cd3b8bdafcaed523fee170524738036cc9b43e914712fa01e88f7eb9ca1f0cc18c98dbf
data/.rubocop.yml CHANGED
@@ -31,6 +31,19 @@ Style/TrailingCommaInHashLiteral:
31
31
 
32
32
  # To let you know the possibility of refactoring ===
33
33
 
34
+ # Max: 120
35
+ # This cop supports safe autocorrection (--autocorrect).
36
+ # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
37
+ # URISchemes: http, https
38
+ Layout/LineLength:
39
+ Max: 90
40
+ Exclude:
41
+ - 'test/**/*'
42
+
43
+ # EnforcedStyle: aligned
44
+ Layout/MultilineMethodCallIndentation:
45
+ EnforcedStyle: indented_relative_to_receiver
46
+
34
47
  # avoid unused variable asignment
35
48
  Rubycw/Rubycw:
36
49
  Exclude:
@@ -40,7 +53,7 @@ Rubycw/Rubycw:
40
53
  # Offense count: 38
41
54
  Lint/BinaryOperatorWithIdenticalOperands:
42
55
  Exclude:
43
- - 'test/test_vector_function.rb'
56
+ - 'test/test_vector_binary_element_wise.rb'
44
57
 
45
58
  # Need for test with empty block
46
59
  # Offense count: 1
@@ -55,15 +68,6 @@ Lint/UselessAssignment:
55
68
  Exclude:
56
69
  - 'test/**/*'
57
70
 
58
- # Max: 120
59
- # This cop supports safe autocorrection (--autocorrect).
60
- # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
61
- # URISchemes: http, https
62
- Layout/LineLength:
63
- Max: 90
64
- Exclude:
65
- - 'test/**/*'
66
-
67
71
  # <= 17 satisfactory
68
72
  # 18..30 unsatisfactory
69
73
  # > 30 dangerous
@@ -104,6 +108,7 @@ Metrics/ClassLength:
104
108
  - 'test/**/*'
105
109
  - 'lib/red_amber/data_frame.rb' # 162
106
110
  - 'lib/red_amber/group.rb' # 105
111
+ - 'lib/red_amber/subframes.rb' # 110
107
112
  - 'lib/red_amber/vector.rb' # 152
108
113
 
109
114
  # Only for monitoring. I will measure by PerceivedComplexity.
@@ -113,14 +118,15 @@ Metrics/ClassLength:
113
118
  Metrics/CyclomaticComplexity:
114
119
  Max: 12
115
120
  AllowedMethods: [
116
- 'join', # 14
121
+ 'split', # 33
117
122
  'format_table', # 21
123
+ 'normalize_element', # 17
118
124
  'slice_by', # 16
125
+ 'assign_update', # 14
126
+ 'join', # 14
127
+ 'parse_range', # 14
119
128
  'remove', # 14
120
- 'normalize_element', # 17
121
129
  '[]', # 13
122
- 'parse_range', # 14
123
- 'split', # 33
124
130
  ]
125
131
 
126
132
  # Max: 10
@@ -134,6 +140,7 @@ Metrics/MethodLength:
134
140
  'format_table', # 53
135
141
  'slice_by', # 38
136
142
  'assign_update', # 35
143
+ 'aggregate', # 31
137
144
  ]
138
145
 
139
146
  # Max: 100
@@ -163,17 +170,23 @@ Metrics/ParameterLists:
163
170
  Metrics/PerceivedComplexity:
164
171
  Max: 10
165
172
  AllowedMethods: [
166
- 'join', # 14
167
- 'dataframe_info', # 13
168
173
  'format_table', # 22
169
174
  'slice_by', # 20
170
- 'remove', # 14
171
- 'drop', # 12
172
- 'filters', # 11
173
175
  'normalize_element', # 17
174
- '[]', # 11
176
+ 'assign_update', # 15
175
177
  'parse_range', # 15
178
+ 'join', # 14
179
+ 'remove', # 14
176
180
  'split', # 14
181
+ 'dataframe_info', # 13
182
+ 'replace', # 13
183
+ 'drop', # 12
184
+ 'initialize', # 12
185
+ 'aggregate', # 12
186
+ '[]', # 11
187
+ 'filters', # 11
188
+ 'html_table', # 11
189
+ 'slice', # 11
177
190
  ]
178
191
 
179
192
  # Offense count: 1
@@ -197,6 +210,12 @@ Naming/PredicateName:
197
210
  - 'lib/red_amber/vector_functions.rb'
198
211
  - 'lib/red_amber/vector_selectable.rb'
199
212
 
213
+ # Offense count: 16
214
+ # This cop supports safe autocorrection (--autocorrect).
215
+ Style/OperatorMethodCall:
216
+ Exclude:
217
+ - 'test/test_vector_binary_element_wise.rb'
218
+
200
219
  # Necessary to test when range.end == -1
201
220
  # Offense count: 2
202
221
  # This cop supports unsafe autocorrection (--autocorrect-all).
data/.yardopts CHANGED
@@ -1 +1,3 @@
1
1
  --output-dir doc/yard
2
+ --template-path doc/yard-templates
3
+ --use-cache
data/CHANGELOG.md CHANGED
@@ -1,3 +1,116 @@
1
+ ## [0.4.0] - 2023-02-25
2
+
3
+ :memo: Update documents for consistency
4
+
5
+ - Breaking change
6
+ - Upgrade dependency to Arrow 11.0.0 (#188)
7
+
8
+ - Bug fixes
9
+ - Add :force_order option for DataFrame#join (#174)
10
+ - Return error for empty DataFrame in DataFrame#filter (#172)
11
+ - Accept ChunkedArray in DataFrame#filter (#172)
12
+ - Fix Vector#replace to accept Arrow::Array as a replacer (#179)
13
+ - Fix Vector#round_to_multiple to accept Float or Integer (#180)
14
+ - Change Vector atan2 to a class method (#180)
15
+ - Fix Vector#shift when boolean Vector (#184)
16
+ - Fix processing empty SubFrames (#183)
17
+ - Do not check object id in DataFrame#rename, #drop for self (#188)
18
+
19
+ - New features and improvements
20
+ - Accept a block in DataFrame#filter (#172)
21
+ - Add Vector.aggregate? method (#175)
22
+ - Introduce Vector#propagate method (#175)
23
+ - Add Vector#rank methods (#176)
24
+ - Add Vector#sample method (#176)
25
+ - Add Vector#sort method (#176)
26
+ - Promote DataFrame#shape_str to public (#184)
27
+ - Introduce Vector#concatenate (#184)
28
+ - Add #numeric? in refinements of Array (#184)
29
+ - Add Vector#cumulative_sum_checked and #cumsum (#184)
30
+ - Add Vector#resolve method (#184)
31
+ - Add DataFrame#tdra method (#184)
32
+ - Add #expand as an alias for Vector#propagate (#184)
33
+ - Add #glimpse as an alias for DataFrame#tdr (#184)
34
+ - New class SubFrames (#183)
35
+ - Introduce class SubFrames
36
+ - Memorize dataframes in SubFrames
37
+ - Add @frames to memorize sub DataFrames
38
+ - Accept filters in SubFrames.new
39
+ - Accept block in SubFrames.new
40
+ - Add SubFrames.by_filter
41
+ - Introduce methods creating SubFrames from DataFrame
42
+ - Introduce SubFrames#each method
43
+ - Add SubFrames#to_s method
44
+ - Add SubFrames#concatenate method
45
+ - Add SubFrames#offset_indices method
46
+ - SubFrames#aggregate method
47
+ - Redefine SubFrames#map to return SubFrames
48
+ - Define SubFrame#map dynamically
49
+ - Add SubFrames#assign method
50
+ - Redefine SubFrames#select to return SubFrames
51
+ - Add SubFrames#reject method
52
+ - Add SubFrames#filter_map method
53
+ - Refine DataFrame#indices memorizing @indices
54
+ - Rename SubFrames#universal_frame as #baseframe
55
+ - Set Group iteration feature to @api private
56
+
57
+ - Refactoring
58
+ - Generate Vector functions in class method (#177)
59
+ - Set Constant visibility to private (#179)
60
+ - Separate test_vector_function (#179)
61
+ - Relocate methods in DataFrameIndexable (#179)
62
+ - Rename Array refinements to the same name as Vector (#184)
63
+
64
+ - Improve in tests/CI
65
+ - Tests
66
+ - Update benchmarks to set 0.3.0 as a reference (#167)
67
+ - Move test of Vector#logb to proper location (#180)
68
+
69
+ - Cops
70
+ - Update .rubocop.yml to align with latest cops (#174)
71
+ - Unify style of MethodCallIndentation as relative to reciever (#184)
72
+
73
+ - CI
74
+ - Fix setting up Arrow by homebrew in CI (#167)
75
+ - Fix CI error on homebrew deleting python link (#167)
76
+ - Set cache-version to get new C extensions in CI (#173) Thanks to @kou for suggestion.
77
+
78
+ - Documentation
79
+ - Update DataFrame.md about loading csv without headers (#165)
80
+ - Thanks to kojix2
81
+ - Update YARD in DataFrame combinable (#168)
82
+ - Update comment for Ruby 2.7 support in README.md
83
+ - Update license year
84
+ - Update README (#172)
85
+ - Update Vector.md and yardoc in #propagate (#175)
86
+ - Use customized style sheet for YARD (#179)
87
+ - Add examples for the doc of #pick and #drop (#179)
88
+ - Add examples to YARD in DataFrame reshaping methods (#179)
89
+ - Update documents in DataFrameDisplayable (#179)
90
+ - Update documents in DataFrameVariableOperation (#179)
91
+ - Update document for dynamically generated methods (#179)
92
+ - Unify style in document (#179)
93
+ - Update documents in DataFrameSelectable (#179)
94
+ - Update documents of basic Vector methods (#179)
95
+ - Update document in VectorUpdatable (#179)
96
+ - Update document of Group (#179)
97
+ - Update document of DataFrameLoadSave (#180)
98
+ - Add examples for document of ArrowFunction (#180)
99
+ - Update document of Vector_unary_aggregation (#180)
100
+ - Update document of Vector_unary_element_wise (#180)
101
+ - Update document of Vector_biary_element_wise (#180)
102
+ - Add documentation to give comparison of dataframes(#169)
103
+ - Thanks to Benson Muite
104
+ - Update documents for consistency of method indentation (#189)
105
+ - Update CHANGELOG (#189)
106
+ - Update README for 0.4.0 (#189)
107
+
108
+ - GitHub site
109
+
110
+ - Thanks
111
+ - kojix2
112
+ - Benson Muite
113
+
1
114
  ## [0.3.0] - 2022-12-18
2
115
 
3
116
  - Breaking change
data/Gemfile CHANGED
@@ -7,7 +7,7 @@ gemspec
7
7
  group :test do
8
8
  gem 'rake'
9
9
 
10
- gem 'red-parquet', '~> 10.0.0'
10
+ gem 'red-parquet', '~> 11.0.0'
11
11
  gem 'rover-df', '~> 0.3.0'
12
12
 
13
13
  gem 'rubocop'
data/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2022 Hirokazu SUZUKI (heronshoes)
3
+ Copyright (c) 2022-2023 Hirokazu SUZUKI (heronshoes)
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # RedAmber
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/red_amber.svg)](https://badge.fury.io/rb/red_amber)
3
+ [![Gem Version](https://img.shields.io/gem/v/red_amber?color=brightgreen)](https://rubygems.org/gems/red_amber)
4
4
  [![CI](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml/badge.svg)](https://github.com/heronshoes/red_amber/actions/workflows/ci.yml)
5
5
  [![Maintainability](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/maintainability)](https://codeclimate.com/github/heronshoes/red_amber/maintainability)
6
6
  [![Test coverage](https://api.codeclimate.com/v1/badges/b8a745047045d2f49daa/test_coverage)](https://codeclimate.com/github/heronshoes/red_amber/test_coverage)
@@ -10,22 +10,20 @@
10
10
  A simple dataframe library for Ruby.
11
11
 
12
12
  - Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
13
- [![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en)
13
+ [![Gitter Chat](https://badges.gitter.im/red-data-tools/en.svg)](https://gitter.im/red-data-tools/en) [![Gem Version](https://img.shields.io/gem/v/red-arrow?color=brightgreen)](https://rubygems.org/gems/red-arrow)
14
14
  - Inspired by the dataframe library [Rover-df](https://github.com/ankane/rover)
15
15
 
16
16
  ![screenshot from jupyterlab](https://raw.githubusercontent.com/heronshoes/red_amber/main/doc/image/screenshot.png)
17
17
 
18
18
  ## Requirements
19
-
19
+ ### Ruby
20
20
  Supported Ruby version is >= 3.0 (since RedAmber 0.3.0).
21
+ - I decided to remove Ruby 2.7 without waiting for EOL. See [Release note for v0.3.0](https://github.com/heronshoes/red_amber/discussions/162) for details.
21
22
 
22
- - I decided to remove Ruby 2.7 without waiting for EOL because it cannot solve the problem of simultaneous use of Hash and keyword arguments when implementing DataFrame#join.
23
-
23
+ ### Libraries
24
24
  ```ruby
25
- # Libraries required
26
- gem 'red-arrow', '~> 10.0.0' # Requires Apache Arrow (see installation below)
27
-
28
- gem 'red-parquet', '~> 10.0.0' # Optional, if you use IO from/to parquet
25
+ gem 'red-arrow', '~> 11.0.0' # Requires Apache Arrow (see installation below)
26
+ gem 'red-parquet', '~> 11.0.0' # Optional, if you use IO from/to parquet
29
27
  gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
30
28
  ```
31
29
 
@@ -33,11 +31,11 @@ gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
33
31
 
34
32
  Install requirements before you install Red Amber.
35
33
 
36
- - Apache Arrow (~> 10.0.0)
37
- - Apache Arrow GLib (~> 10.0.0)
38
- - Apache Parquet GLib (~> 10.0.0) # If you use IO from/to parquet
34
+ - Apache Arrow (~> 11.0.0)
35
+ - Apache Arrow GLib (~> 11.0.0)
36
+ - Apache Parquet GLib (~> 11.0.0) # If you use IO from/to parquet
39
37
 
40
- See [Apache Arrow install document](https://arrow.apache.org/install/).
38
+ See [Apache Arrow install document](https://arrow.apache.org/install/).
41
39
 
42
40
  - Minimum installation example for the latest Ubuntu:
43
41
 
@@ -58,38 +56,39 @@ Install requirements before you install Red Amber.
58
56
  sudo dnf -y install gcc-c++ libarrow-devel libarrow-glib-devel ruby-devel
59
57
  ```
60
58
 
61
- - On macOS, you can install Apache Arrow C++ library using Homebrew:
59
+ - On macOS, using Homebrew:
62
60
 
63
61
  ```
64
62
  brew install apache-arrow
65
- ```
66
-
67
- and GLib (C) package with:
68
-
69
- ```
70
63
  brew install apache-arrow-glib
71
64
  ```
72
65
 
73
66
  If you prepared Apache Arrow, add these lines to your Gemfile:
74
67
 
75
68
  ```ruby
76
- gem 'red-arrow', '~> 10.0.0'
69
+ gem 'red-arrow', '~> 11.0.0'
77
70
  gem 'red_amber'
78
- gem 'red-parquet', '~> 10.0.0' # Optional, if you use IO from/to parquet
71
+ gem 'red-parquet', '~> 11.0.0' # Optional, if you use IO from/to parquet
79
72
  gem 'rover-df', '~> 0.3.0' # Optional, if you use IO from/to Rover::DataFrame
80
73
  gem 'red-datasets-arrow' # Optional, recommended if you use Red Datasets
81
74
  gem 'red-arrow-numo-narray' # Optional, recommended if you use inputs from Numo::NArray
82
75
  ```
83
76
 
84
- And then execute `bundle install` or install it yourself as `gem install red_amber`.
77
+ And then execute `bundle install` or install them yourself such as `gem install red_amber`.
85
78
 
86
79
  ## Docker image and Jupyter Notebook
87
80
 
88
- [RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to @mrkn).
81
+ [RubyData Docker Stacks](https://github.com/RubyData/docker-stacks) is available as a ready-to-run Docker image containing Jupyter and useful data tools as well as RedAmber (Thanks to Kenta Murata).
89
82
 
90
83
  Also you can try the contents of this README interactively by [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb).
91
84
  [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=red-amber.ipynb)
92
85
 
86
+ ## Comparison of DataFrames
87
+
88
+ Comparison of basic features of RedAmber with Python
89
+ [pandas](https://pandas.pydata.org/),
90
+ R [Tidyverse](https://www.tidyverse.org/) and
91
+ Julia [Dataframes](https://dataframes.juliadata.org/stable/) is [here](doc/DataFrame_Comparison.md) (Thanks to Benson Muite).
93
92
 
94
93
  ## Data frame in `RedAmber`
95
94
 
@@ -137,7 +136,7 @@ For example, we can compute mean prices per cut for the data larger than 1 carat
137
136
 
138
137
  ```ruby
139
138
  df = diamonds
140
- .slice { carat > 1 }
139
+ .slice { carat > 1 } # or use #filter instead of #slice
141
140
  .group(:cut)
142
141
  .mean(:price) # `pick` prior to `group` is not required if `:price` is specified here.
143
142
  .sort('-mean(price)')
@@ -186,7 +185,7 @@ starwars
186
185
  .drop(0) # delete unnecessary index column
187
186
  .remove { species == "NA" } # delete unnecessary rows
188
187
  .group(:species) { [count(:species), mean(:height, :mass)] }
189
- .slice { count > 1 }
188
+ .slice { count > 1 } # or use #filter instead of slice
190
189
 
191
190
  # =>
192
191
  #<RedAmber::DataFrame : 8 x 4 Vectors, 0x000000000000f848>
@@ -213,7 +212,7 @@ See [Vector.md](doc/Vector.md) for details.
213
212
 
214
213
  ## Jupyter notebook
215
214
 
216
- [89 Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
215
+ [Examples of Red Amber](https://github.com/heronshoes/docker-stacks/blob/RedAmber-binder/binder/examples_of_red_amber.ipynb)
217
216
  ([raw file](https://raw.githubusercontent.com/heronshoes/docker-stacks/RedAmber-binder/binder/examples_of_red_amber.ipynb)) shows more examples in jupyter notebook.
218
217
 
219
218
  You can try this notebook on [Binder](https://mybinder.org/v2/gh/heronshoes/docker-stacks/RedAmber-binder?filepath=examples_of_red_amber.ipynb).
data/benchmark/basic.yml CHANGED
@@ -4,9 +4,9 @@ contexts:
4
4
  - name: HEAD
5
5
  prelude: |
6
6
  $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.2.3
7
+ - name: 0.3.0
8
8
  gems:
9
- red_amber: 0.2.3
9
+ red_amber: 0.3.0
10
10
  - name: 0.2.0
11
11
  gems:
12
12
  red_amber: 0.2.0
@@ -4,9 +4,9 @@ contexts:
4
4
  - name: HEAD
5
5
  prelude: |
6
6
  $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.2.3
7
+ - name: 0.3.0
8
8
  gems:
9
- red_amber: 0.2.3
9
+ red_amber: 0.3.0
10
10
 
11
11
  prelude: |
12
12
  require 'red_amber'
@@ -4,9 +4,9 @@ contexts:
4
4
  - name: HEAD
5
5
  prelude: |
6
6
  $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.2.3
7
+ - name: 0.3.0
8
8
  gems:
9
- red_amber: 0.2.3
9
+ red_amber: 0.3.0
10
10
  - name: 0.2.0
11
11
  gems:
12
12
  red_amber: 0.2.0
data/benchmark/group.yml CHANGED
@@ -4,9 +4,9 @@ contexts:
4
4
  - name: HEAD
5
5
  prelude: |
6
6
  $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.2.3
7
+ - name: 0.3.0
8
8
  gems:
9
- red_amber: 0.2.3
9
+ red_amber: 0.3.0
10
10
  - name: 0.2.2
11
11
  gems:
12
12
  red_amber: 0.2.2
@@ -4,9 +4,9 @@ contexts:
4
4
  - name: HEAD
5
5
  prelude: |
6
6
  $LOAD_PATH.unshift(File.expand_path('lib'))
7
- - name: 0.2.3
7
+ - name: 0.3.0
8
8
  gems:
9
- red_amber: 0.2.3
9
+ red_amber: 0.3.0
10
10
  - name: 0.2.2
11
11
  gems:
12
12
  red_amber: 0.2.2
data/benchmark/vector.yml CHANGED
@@ -4,6 +4,9 @@ contexts:
4
4
  - name: HEAD
5
5
  prelude: |
6
6
  $LOAD_PATH.unshift(File.expand_path('lib'))
7
+ - name: 0.3.0
8
+ gems:
9
+ red_amber: 0.3.0
7
10
  - name: 0.2.0
8
11
  gems:
9
12
  red_amber: 0.2.0
data/doc/DataFrame.md CHANGED
@@ -57,6 +57,10 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
57
57
  ```ruby
58
58
  RedAmber::DataFrame.load("test/entity/with_header.csv")
59
59
  ```
60
+
61
+ ```ruby
62
+ RedAmber::DataFrame.load("test/entity/without_header.csv", headers: [:x, :y, :z])
63
+ ```
60
64
 
61
65
  - from a string buffer
62
66
 
@@ -275,6 +279,7 @@ penguins.to_rover
275
279
 
276
280
  - Shows some information about self in a transposed style.
277
281
  - `tdr_str` returns same info as a String.
282
+ - `glimpse` is an alias. It is similar to dplyr's (or Polars's) `glimpse()`.
278
283
 
279
284
  ```ruby
280
285
  require 'red_amber'
@@ -568,7 +573,7 @@ penguins.to_rover
568
573
  [1, 2, 3]
569
574
  ```
570
575
 
571
- ### `slice ` - slice and select records -
576
+ ### `slice ` - cut into slices of records -
572
577
 
573
578
  Slice and select records (rows) to create a sub DataFrame.
574
579
 
@@ -601,11 +606,14 @@ penguins.to_rover
601
606
 
602
607
  - Booleans as an argument
603
608
 
604
- `slice(booleans)` accepts booleans as an argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
609
+ `filter(booleans)` or `slice(booleans)` accepts booleans as an argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
610
+
611
+ note: `slice(booleans)` is acceptable for orthogonality of `slice`/`remove`.
605
612
 
606
613
  ```ruby
607
614
  vector = penguins[:bill_length_mm]
608
- penguins.slice(vector >= 40)
615
+ penguins.filter(vector >= 40)
616
+ # penguins.slice(vector >= 40) is also acceptable
609
617
 
610
618
  # =>
611
619
  #<RedAmber::DataFrame : 242 x 8 Vectors, 0x0000000000043d3c>
@@ -833,14 +841,14 @@ penguins.to_rover
833
841
 
834
842
  Assign new or updated variables (columns) and create an updated DataFrame.
835
843
 
836
- - Variables with new keys will append new columns from the right.
844
+ - Variables with new keys will append new columns from right.
837
845
  - Variables with exisiting keys will update corresponding vectors.
838
846
 
839
847
  ![assign method image](doc/../image/dataframe/assign.png)
840
848
 
841
849
  - Variables as arguments
842
850
 
843
- `assign(key_pairs)` accepts pairs of key and values as parameters. `key_pairs` should be a Hash of `{key => array_like}` or an Array of Arrays like `[[key, array_like], ... ]`. `array_like` is ether `Vector`, `Array` or `Arrow::Array`.
851
+ `assign(key_value_pairs)` accepts pairs of key and values as parameters. `key_value_pairs` should be a Hash of `{key => array_like}` or an Array of Arrays like `[[key, array_like], ... ]`. `array_like` is ether `Vector`, `Array` or `Arrow::Array`.
844
852
 
845
853
  ```ruby
846
854
  df = RedAmber::DataFrame.new(
@@ -857,12 +865,12 @@ penguins.to_rover
857
865
  2 Hinata 28
858
866
 
859
867
  # update :age and add :brother
860
- df.assign do
868
+ df.assign(
861
869
  {
862
870
  age: age + 29,
863
871
  brother: ['Santa', nil, 'Momotaro']
864
872
  }
865
- end
873
+ )
866
874
 
867
875
  # =>
868
876
  #<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000658b0>
@@ -932,7 +940,7 @@ penguins.to_rover
932
940
 
933
941
  - Append from left
934
942
 
935
- `assign_left` method accepts the same parameters and block as `assign`, but append new columns from leftside.
943
+ `assign_left` method accepts the same parameters and block as `assign`, but append new columns from left.
936
944
 
937
945
  ```ruby
938
946
  df.assign_left(new_index: df.indices(1))
@@ -1453,6 +1461,8 @@ When the option `keep_key: true` used, the column `key` will be preserved.
1453
1461
  1 B 4
1454
1462
  2 D 5
1455
1463
  ```
1464
+ ##### `set_operable?(other)`
1465
+ Check if `types` of self and other are same.
1456
1466
 
1457
1467
  ##### `intersect(other)`
1458
1468
 
@@ -1498,15 +1508,23 @@ When the option `keep_key: true` used, the column `key` will be preserved.
1498
1508
  <string> <uint8>
1499
1509
  1 B 2
1500
1510
  2 C 3
1511
+
1512
+ other.differencr(df)
1513
+ #=>
1514
+ #<RedAmber::DataFrame : 2 x 2 Vectors, 0x0000000000040e0c>
1515
+ KEY1 KEY2
1516
+ <string> <uint8>
1517
+ 0 B 4
1518
+ 1 D 5
1501
1519
  ```
1502
1520
 
1503
1521
  ## Binding
1504
1522
 
1505
1523
  ### `concatenate(other)`
1506
1524
 
1507
- Concatenate another DataFrame or Table onto the bottom of self. The shape and data type of other must be the same as self.
1525
+ Concatenate another DataFrame or Table onto the bottom of self. The types of other must be the same as self.
1508
1526
 
1509
- The alias is `concat`.
1527
+ The alias is `concat` and `bind_rows`.
1510
1528
 
1511
1529
  An array of DataFrames or Tables is also acceptable as other.
1512
1530
 
@@ -1538,9 +1556,11 @@ When the option `keep_key: true` used, the column `key` will be preserved.
1538
1556
  3 4 D
1539
1557
  ```
1540
1558
 
1541
- ### `merge(other)`
1559
+ ### `merge(*other)`
1560
+
1561
+ Concatenate another DataFrame or Table onto the bottom of self. The size of other must be the same as self. Self and other must not share the same key.
1542
1562
 
1543
- Concatenate another DataFrame or Table onto the bottom of self. The shape and data type of other must be the same as self.
1563
+ The alias is `bind_cols`.
1544
1564
 
1545
1565
  ```ruby
1546
1566
  df