red_amber 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rubocop.yml +89 -0
- data/.rubocop_todo.yml +21 -0
- data/CHANGELOG.md +34 -0
- data/Gemfile +17 -0
- data/LICENSE +21 -0
- data/README.md +336 -0
- data/Rakefile +16 -0
- data/doc/CODE_OF_CONDUCT.md +84 -0
- data/lib/red_amber/data_frame.rb +116 -0
- data/lib/red_amber/data_frame_output.rb +116 -0
- data/lib/red_amber/data_frame_selectable.rb +75 -0
- data/lib/red_amber/vector.rb +72 -0
- data/lib/red_amber/vector_functions.rb +172 -0
- data/lib/red_amber/version.rb +5 -0
- data/lib/red_amber.rb +18 -0
- data/red_amber.gemspec +43 -0
- data/sig/red_amber.rbs +4 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 00ba2e99b2b1d6f977b2e2e5c7d60b9313972cf3e831918606e5388d51442137
|
4
|
+
data.tar.gz: f0fc831937bff5fede4ee0f0537b0ef5fdfb8a1faa8a57082a197a627562252c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7bc020b8663c3523426461e3bd54642d4eb85a86296a8db3f5d94315091ee4475ec8b910fb87165c5d029e35fa9dc45f119bea6278e023d3cc63ad011388fbfb
|
7
|
+
data.tar.gz: 78dd55182b40ee9bec769efdbcac23adb85ad93bbafe3f74c4ded9d56ab40e39da0ce1e34a841d5705e6b94fea85312057e360140965fa217243eede0d238eb5
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
# We will use cops to detect bugs in an early stage
|
2
|
+
# Feel free to use .rubocop_todo.yml by --auto-gen-config
|
3
|
+
inherit_from: .rubocop_todo.yml
|
4
|
+
|
5
|
+
require:
|
6
|
+
- rubocop-performance
|
7
|
+
- rubocop-rubycw
|
8
|
+
- rubocop-rake
|
9
|
+
|
10
|
+
AllCops:
|
11
|
+
# drop support for < 2.7
|
12
|
+
TargetRubyVersion: 2.7
|
13
|
+
# accept new cops if any
|
14
|
+
NewCops: enable
|
15
|
+
|
16
|
+
# ===
|
17
|
+
|
18
|
+
# alias is hard to see separately
|
19
|
+
Style/Alias:
|
20
|
+
EnforcedStyle: prefer_alias_method
|
21
|
+
|
22
|
+
# For consistency and portability
|
23
|
+
Style/TrailingCommaInArrayLiteral:
|
24
|
+
EnforcedStyleForMultiline: comma
|
25
|
+
Style/TrailingCommaInHashLiteral:
|
26
|
+
EnforcedStyleForMultiline: comma
|
27
|
+
# Should not set for Style/TrailingCommaInArguments
|
28
|
+
|
29
|
+
# ===
|
30
|
+
|
31
|
+
# To let you know the possibility of refactoring ===
|
32
|
+
#
|
33
|
+
# avoid unused variable asignment
|
34
|
+
Rubycw/Rubycw:
|
35
|
+
Exclude:
|
36
|
+
- 'test/**/*'
|
37
|
+
Lint/UselessAssignment:
|
38
|
+
Exclude:
|
39
|
+
- 'test/**/*'
|
40
|
+
|
41
|
+
# Disabled to define Vector operators
|
42
|
+
Lint/BinaryOperatorWithIdenticalOperands:
|
43
|
+
Exclude:
|
44
|
+
- 'test/test_vector_function.rb'
|
45
|
+
|
46
|
+
# Max: 120
|
47
|
+
Layout/LineLength:
|
48
|
+
Max: 100
|
49
|
+
Exclude:
|
50
|
+
- 'test/**/*'
|
51
|
+
|
52
|
+
# <= 17 satisfactory
|
53
|
+
# 18..30 unsatisfactory
|
54
|
+
# > 30 dangerous
|
55
|
+
Metrics/AbcSize:
|
56
|
+
Max: 19
|
57
|
+
Exclude:
|
58
|
+
- 'lib/red_amber/data_frame_output.rb' # Max: 78
|
59
|
+
|
60
|
+
# Max: 25
|
61
|
+
Metrics/BlockLength:
|
62
|
+
Max: 25
|
63
|
+
Exclude:
|
64
|
+
- 'test/**/*'
|
65
|
+
- '*.gemspec'
|
66
|
+
|
67
|
+
# Max: 100
|
68
|
+
Metrics/ClassLength:
|
69
|
+
Max: 100
|
70
|
+
Exclude:
|
71
|
+
- 'test/**/*'
|
72
|
+
|
73
|
+
# Max: 7
|
74
|
+
Metrics/CyclomaticComplexity:
|
75
|
+
Max: 10
|
76
|
+
Exclude:
|
77
|
+
- 'lib/red_amber/data_frame_output.rb' # Max: 11
|
78
|
+
|
79
|
+
# Max: 10
|
80
|
+
Metrics/MethodLength:
|
81
|
+
Max: 18
|
82
|
+
Exclude:
|
83
|
+
- 'lib/red_amber/data_frame_output.rb' # Max: 35
|
84
|
+
|
85
|
+
# Max: 8
|
86
|
+
Metrics/PerceivedComplexity:
|
87
|
+
Max: 9
|
88
|
+
Exclude:
|
89
|
+
- 'lib/red_amber/data_frame_output.rb' # Max: 12
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2022-04-27 00:29:57 UTC using RuboCop version 1.27.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
# This cop supports unsafe auto-correction (--auto-correct-all).
|
11
|
+
Style/SlicingWithRange:
|
12
|
+
Exclude:
|
13
|
+
- 'lib/red_amber/data_frame_selectable.rb'
|
14
|
+
|
15
|
+
# Offense count: 1
|
16
|
+
# This cop supports unsafe auto-correction (--auto-correct-all).
|
17
|
+
# Configuration parameters: EnforcedStyle.
|
18
|
+
# SupportedStyles: forbid_for_all_comparison_operators, forbid_for_equality_operators_only, require_for_all_comparison_operators, require_for_equality_operators_only
|
19
|
+
Style/YodaCondition:
|
20
|
+
Exclude:
|
21
|
+
- 'lib/red_amber/data_frame.rb'
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
## [0.1.2] - Unreleased
|
2
|
+
|
3
|
+
- Add support for Arrow 8.0.0
|
4
|
+
- `DataFrame`
|
5
|
+
- Introduce updating
|
6
|
+
- Introduce NA support
|
7
|
+
- Add slice method
|
8
|
+
- `Vector`
|
9
|
+
- Add NaN support for functions
|
10
|
+
- More functions
|
11
|
+
|
12
|
+
## [0.1.1] - 2022-05-06 (experimental)
|
13
|
+
|
14
|
+
- Release on rubygem.org
|
15
|
+
- Introduce class `DataFrame`
|
16
|
+
- New from Hash, schema/rows, `Arrow::Table`, `Rover::DataFrame`
|
17
|
+
- Load from file, string, URI
|
18
|
+
- Save to file, string, URI
|
19
|
+
- Methods for basic properties
|
20
|
+
- Rich inspect method
|
21
|
+
- Basic selecting by `#[]`
|
22
|
+
- Introduce class `Vector`
|
23
|
+
- New from a column in a `DataFlame`
|
24
|
+
- New from `Arrow::Array`, `Arrow::ChunkedArray`, `Array`
|
25
|
+
- Methods for basic properties
|
26
|
+
- Function support
|
27
|
+
- Unary aggregations
|
28
|
+
- Unary element-wises
|
29
|
+
- Binary element-wises
|
30
|
+
- Some operators defined
|
31
|
+
|
32
|
+
## [0.1.0] - 2022-04-15 (unreleased)
|
33
|
+
|
34
|
+
- Initial version
|
data/Gemfile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source 'https://rubygems.org'
|
4
|
+
|
5
|
+
gemspec
|
6
|
+
|
7
|
+
group :test do
|
8
|
+
gem 'rake'
|
9
|
+
|
10
|
+
gem 'rubocop'
|
11
|
+
gem 'rubocop-performance', require: false
|
12
|
+
gem 'rubocop-rake'
|
13
|
+
gem 'rubocop-rubycw', require: false
|
14
|
+
|
15
|
+
gem 'test-unit'
|
16
|
+
gem 'webrick'
|
17
|
+
end
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2022 Hirokazu SUZUKI (heronshoes)
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,336 @@
|
|
1
|
+
# RedAmber
|
2
|
+
|
3
|
+
A simple dataframe library for Ruby (experimental)
|
4
|
+
|
5
|
+
- Powered by [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
|
6
|
+
- Simple API similar to [Rover-df](https://github.com/ankane/rover)
|
7
|
+
|
8
|
+
## Requirements
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'red-arrow', '~> 7.0.0'
|
12
|
+
gem 'red-parquet', '~> 7.0.0' # if you use IO from/to parquet
|
13
|
+
gem 'rover-df', '~> 0.3.0' # if you use IO from/to Rover::DataFrame
|
14
|
+
```
|
15
|
+
|
16
|
+
## Installation
|
17
|
+
|
18
|
+
Add this line to your Gemfile:
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
gem 'red_amber'
|
22
|
+
```
|
23
|
+
|
24
|
+
And then execute:
|
25
|
+
|
26
|
+
$ bundle install
|
27
|
+
|
28
|
+
Or install it yourself as:
|
29
|
+
|
30
|
+
$ gem install red_amber
|
31
|
+
|
32
|
+
## `RedAmber::DataFrame`
|
33
|
+
|
34
|
+
### Constructors and saving
|
35
|
+
|
36
|
+
- [x] `new` from a columnar Hash
|
37
|
+
- `RedAmber::DataFrame.new(x: [1, 2, 3])`
|
38
|
+
|
39
|
+
- [x] `new` from a schema (by Hash) and rows (by Array)
|
40
|
+
- `RedAmber::DataFrame.new({:x=>:uint8}, [[1], [2], [3]])`
|
41
|
+
|
42
|
+
- [x] `new` from an Arrow::Table
|
43
|
+
- `RedAmber::DataFrame.new(Arrow::Table.new(x: [1, 2, 3]))`
|
44
|
+
|
45
|
+
- [x] `new` from a Rover::DataFrame
|
46
|
+
- `RedAmber::DataFrame.new(Rover::DataFrame.new(x: [1, 2, 3]))`
|
47
|
+
|
48
|
+
- [ ] `load` (class method)
|
49
|
+
|
50
|
+
- [x] from a [`.arrow`, `.arrows`, `.csv`, `.csv.gz`, `.tsv`] file
|
51
|
+
- `RedAmber::DataFrame.load("test/entity/with_header.csv")`
|
52
|
+
|
53
|
+
- [x] from a string buffer
|
54
|
+
|
55
|
+
- [x] from a URI
|
56
|
+
- `RedAmber::DataFrame.load(URI("https://github.com/heronshoes/red_amber/blob/master/test/entity/with_header.csv"))`
|
57
|
+
|
58
|
+
- [ ] from a parquet file
|
59
|
+
|
60
|
+
- [ ] `save` (instance method)
|
61
|
+
|
62
|
+
- [x] to a [`.arrow`, `.arrows`, `.csv`, `.csv.gz`, `.tsv`] file
|
63
|
+
|
64
|
+
- [x] to a string buffer
|
65
|
+
|
66
|
+
- [x] to a URI
|
67
|
+
|
68
|
+
- [ ] to a parquet file
|
69
|
+
|
70
|
+
### Properties
|
71
|
+
|
72
|
+
- [x] `table`
|
73
|
+
|
74
|
+
Reader of Arrow::Table object inside.
|
75
|
+
|
76
|
+
- [x] `n_rows`, `nrow`, `size`, `length`
|
77
|
+
|
78
|
+
Returns num of rows (data size).
|
79
|
+
|
80
|
+
- [x] `n_columns`, `ncol`, `width`
|
81
|
+
|
82
|
+
Returns num of columns (num of vectors).
|
83
|
+
|
84
|
+
- [x] `shape`
|
85
|
+
|
86
|
+
Returns shape in an Array[n_rows, n_cols].
|
87
|
+
|
88
|
+
- [x] `column_names`, `keys`
|
89
|
+
|
90
|
+
Returns num of column names by an Array.
|
91
|
+
|
92
|
+
- [x] `types(class_name: false)`
|
93
|
+
|
94
|
+
Returns types of columns by an Array.
|
95
|
+
If `class_name: true` returns an Array of `Arrow::DataType`.
|
96
|
+
|
97
|
+
- [x] `vectors`
|
98
|
+
|
99
|
+
Returns an Array of Vectors.
|
100
|
+
|
101
|
+
- [x] `to_h`
|
102
|
+
|
103
|
+
Returns column-oriented data in a Hash.
|
104
|
+
|
105
|
+
- [x] `to_a`, `raw_records`
|
106
|
+
|
107
|
+
Returns an array of row-oriented data without header. If you need a column-oriented full array, use `.to_h.to_a`
|
108
|
+
|
109
|
+
- [x] `schema`
|
110
|
+
|
111
|
+
Returns column name and data type in a Hash.
|
112
|
+
|
113
|
+
- [x] `==`
|
114
|
+
|
115
|
+
- [x] `empty?`
|
116
|
+
|
117
|
+
### Output
|
118
|
+
|
119
|
+
- [x] `to_s`
|
120
|
+
|
121
|
+
- [ ] summary, describe
|
122
|
+
|
123
|
+
- [x] `to_rover`
|
124
|
+
|
125
|
+
Returns a `Rover::DataFrame`.
|
126
|
+
|
127
|
+
- [x] `inspect(tally_level: 5, max_element: 5)`
|
128
|
+
|
129
|
+
Shows some information about self.
|
130
|
+
|
131
|
+
- tally_level: max level to use tally mode
|
132
|
+
- max_element: max num of element to show values in each row
|
133
|
+
|
134
|
+
### Selecting
|
135
|
+
|
136
|
+
- [x] Selecting columns by `[]`
|
137
|
+
|
138
|
+
`[key]`, `[keys]`, `[keys[index]]`
|
139
|
+
|
140
|
+
- [x] Selecting rows by `[]`
|
141
|
+
|
142
|
+
`[index]`, `[range]`, `[array]`
|
143
|
+
|
144
|
+
- [x] Selecting rows from top or bottom
|
145
|
+
|
146
|
+
`head(n=5)`, `tail(n=5)`, `first(n=1)`, `last(n=1)`
|
147
|
+
|
148
|
+
- [ ] slice
|
149
|
+
|
150
|
+
### Updating
|
151
|
+
|
152
|
+
- [ ] Add a new column
|
153
|
+
|
154
|
+
- [ ] Update a single element
|
155
|
+
|
156
|
+
- [ ] Update multiple elements
|
157
|
+
|
158
|
+
- [ ] Update all elements
|
159
|
+
|
160
|
+
- [ ] Update elements matching a condition
|
161
|
+
|
162
|
+
- [ ] Clamp
|
163
|
+
|
164
|
+
- [ ] Delete columns
|
165
|
+
|
166
|
+
- [ ] Rename a column
|
167
|
+
|
168
|
+
- [ ] Sort rows
|
169
|
+
|
170
|
+
- [ ] Clear data
|
171
|
+
|
172
|
+
### Treat na data
|
173
|
+
|
174
|
+
- [ ] Drop na (NaN, nil)
|
175
|
+
|
176
|
+
- [ ] Replace na with value
|
177
|
+
|
178
|
+
- [ ] Interpolate na with convolution array
|
179
|
+
|
180
|
+
### Combining DataFrames
|
181
|
+
|
182
|
+
- [ ] Add rows
|
183
|
+
|
184
|
+
- [ ] Add columns
|
185
|
+
|
186
|
+
- [ ] Inner join
|
187
|
+
|
188
|
+
- [ ] Left join
|
189
|
+
|
190
|
+
### Encoding
|
191
|
+
|
192
|
+
- [ ] One-hot encoding
|
193
|
+
|
194
|
+
### Iteration (not impremented)
|
195
|
+
|
196
|
+
### Filtering (not impremented)
|
197
|
+
|
198
|
+
|
199
|
+
## `RedAmber::Vector`
|
200
|
+
### Constructor
|
201
|
+
|
202
|
+
- [x] Create from a column in a DataFrame
|
203
|
+
|
204
|
+
- [x] New from an Array
|
205
|
+
|
206
|
+
### Properties
|
207
|
+
|
208
|
+
- [x] `to_s`
|
209
|
+
|
210
|
+
- [x] `values`, `to_a`, `entries`
|
211
|
+
|
212
|
+
- [x] `size`, `length`, `n_rows`, `nrow`
|
213
|
+
|
214
|
+
- [x] `type`
|
215
|
+
|
216
|
+
- [ ] `each`
|
217
|
+
|
218
|
+
- [ ] `chunked?`
|
219
|
+
|
220
|
+
- [ ] `n_chunks`
|
221
|
+
|
222
|
+
- [ ] `each_chunk`
|
223
|
+
|
224
|
+
- [x] `tally`
|
225
|
+
|
226
|
+
- [ ] `n_nulls`
|
227
|
+
|
228
|
+
### Functions
|
229
|
+
#### Unary aggregations: vector.func => Scalar
|
230
|
+
|
231
|
+
| Method |Boolean|Numeric|String|Remarks|
|
232
|
+
| ------------ | --- | --- | --- | ----- |
|
233
|
+
|[x] `all` | [x] | | | |
|
234
|
+
|[x] `any` | [x] | | | |
|
235
|
+
|[x] `approximate_median`| | [x] | | |
|
236
|
+
|[x] `count` | [x] | [x] | [x] | |
|
237
|
+
|[x] `count_distinct`| [x] | [x] | [x] | |
|
238
|
+
|[x] `count_uniq` | [x] | [x] | [x] |an alias of `count_distinct`|
|
239
|
+
|[ ] `index` | | | | |
|
240
|
+
|[x] `max` | [x] | [x] | [x] | |
|
241
|
+
|[x] `mean` | [x] | [x] | | |
|
242
|
+
|[x] `min` | [x] | [x] | [x] | |
|
243
|
+
|[ ] `min_max` | | | | |
|
244
|
+
|[ ] `mode` | | | | |
|
245
|
+
|[x] `product` | [x] | [x] | | |
|
246
|
+
|[ ] `quantile`| | | | |
|
247
|
+
|[x] `stddev` | | [x] | | |
|
248
|
+
|[x] `sum` | [x] | [x] | | |
|
249
|
+
|[ ] `tdigest` | | | | |
|
250
|
+
|[x] `variance`| | [x] | | |
|
251
|
+
|
252
|
+
#### Unary element-wise: vector.func => Vector
|
253
|
+
|
254
|
+
| Method |Boolean|Numeric|String|Remarks|
|
255
|
+
| ------------ | --- | --- | --- | ----- |
|
256
|
+
|[x] `-@` | | [x] | |as `-vector`|
|
257
|
+
|[x] `negate` | | [x] | |`-@` |
|
258
|
+
|[x] `abs` | | [x] | | |
|
259
|
+
|[ ] `acos` | | [ ] | | |
|
260
|
+
|[ ] `asin` | | [ ] | | |
|
261
|
+
|[x] `atan` | | [x] | | |
|
262
|
+
|[ ] `ceil` | | [x] | | |
|
263
|
+
|[x] `cos` | | [x] | | |
|
264
|
+
|[ ] `floor` | | [x] | | |
|
265
|
+
|[ ] `ln` | | [ ] | | |
|
266
|
+
|[ ] `log10` | | [ ] | | |
|
267
|
+
|[ ] `log1p` | | [ ] | | |
|
268
|
+
|[ ] `log2` | | [ ] | | |
|
269
|
+
|[x] `sign` | | [x] | | |
|
270
|
+
|[x] `sin` | | [x] | | |
|
271
|
+
|[x] `tan` | | [x] | | |
|
272
|
+
|[ ] `trunc` | | [x] | | |
|
273
|
+
|
274
|
+
#### Binary element-wise: vector.func(vector) => Vector
|
275
|
+
|
276
|
+
| Method |Boolean|Numeric|String|Remarks|
|
277
|
+
| ------------------ | --- | --- | --- | ----- |
|
278
|
+
|[x] `add` | | [x] | | `+` |
|
279
|
+
|[x] `atan2` | | [x] | | |
|
280
|
+
|[x] `and` | [x] | | | |
|
281
|
+
|[x] `and_kleene` | [x] | | | |
|
282
|
+
|[x] `and_not` | [x] | | | |
|
283
|
+
|[x] `and_not_kleene`| [x] | | | |
|
284
|
+
|[x] `bit_wise_and` | |([x])| |`&`, integer only|
|
285
|
+
|[ ] `bit_wise_not` | |([x])| |`!`, integer only|
|
286
|
+
|[x] `bit_wise_or` | |([x])| |`|`, integer only|
|
287
|
+
|[x] `bit_wise_xor` | |([x])| |`^`, integer only|
|
288
|
+
|[x] `divide` | | [x] | | `/` |
|
289
|
+
|[x] `equal` | [x] | [x] | [x] |`==`, alias `eq`|
|
290
|
+
|[x] `greater` | [x] | [x] | [x] |`>`, alias `gt`|
|
291
|
+
|[x] `greater_equal` | [x] | [x] | [x] |`>=`, alias `ge`|
|
292
|
+
|[x] `less` | [x] | [x] | [x] |`<`, alias `lt`|
|
293
|
+
|[x] `less_equal` | [x] | [x] | [x] |`<=`, alias `le`|
|
294
|
+
|[ ] `logb` | | [ ] | | |
|
295
|
+
|[ ] `mod` | | [ ] | | |
|
296
|
+
|[x] `multiply` | | [x] | | `*` |
|
297
|
+
|[x] `not_equal` | [x] | [x] | [x] |`!=`, alias `ne`|
|
298
|
+
|[x] `or` | [x] | | | |
|
299
|
+
|[x] `or_kleene` | [x] | | | |
|
300
|
+
|[x] `power` | | [x] | | `**` |
|
301
|
+
|[x] `subtract` | | [x] | | `-` |
|
302
|
+
|[x] `shift_left` | |([x])| |`<<`, integer only|
|
303
|
+
|[x] `shift_right` | |([x])| |`>>`, integer only|
|
304
|
+
|[x] `xor` | [x] | | | |
|
305
|
+
|
306
|
+
##### (Not impremented)
|
307
|
+
- [ ] invert, round, round_to_multiple
|
308
|
+
- [ ] sort, sort_index
|
309
|
+
- [ ] minmax, var, median, quantile
|
310
|
+
- [ ] argmin, argmax
|
311
|
+
- [ ] (array functions)
|
312
|
+
- [ ] (strings functions)
|
313
|
+
- [ ] (temporal functions)
|
314
|
+
- [ ] (conditional functions)
|
315
|
+
- [ ] (index functions)
|
316
|
+
- [ ] (other functions)
|
317
|
+
|
318
|
+
### Coerce (not impremented)
|
319
|
+
|
320
|
+
### Updating (not impremented)
|
321
|
+
|
322
|
+
### DSL in a block for faster calculation ?
|
323
|
+
|
324
|
+
|
325
|
+
## Development
|
326
|
+
|
327
|
+
```
|
328
|
+
git clone https://github.com/heronshoes/red_amber.git
|
329
|
+
cd red_amber
|
330
|
+
bundle install
|
331
|
+
bundle exec rake test
|
332
|
+
```
|
333
|
+
|
334
|
+
## License
|
335
|
+
|
336
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'bundler/gem_tasks'
|
4
|
+
require 'rake/testtask'
|
5
|
+
|
6
|
+
Rake::TestTask.new(:test) do |t|
|
7
|
+
t.libs << 'test'
|
8
|
+
t.libs << 'lib'
|
9
|
+
t.test_files = FileList['test/**/test_*.rb']
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'rubocop/rake_task'
|
13
|
+
|
14
|
+
RuboCop::RakeTask.new
|
15
|
+
|
16
|
+
task default: %i[test rubocop]
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
2
|
+
|
3
|
+
## Our Pledge
|
4
|
+
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
|
6
|
+
|
7
|
+
We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
|
8
|
+
|
9
|
+
## Our Standards
|
10
|
+
|
11
|
+
Examples of behavior that contributes to a positive environment for our community include:
|
12
|
+
|
13
|
+
* Demonstrating empathy and kindness toward other people
|
14
|
+
* Being respectful of differing opinions, viewpoints, and experiences
|
15
|
+
* Giving and gracefully accepting constructive feedback
|
16
|
+
* Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
|
17
|
+
* Focusing on what is best not just for us as individuals, but for the overall community
|
18
|
+
|
19
|
+
Examples of unacceptable behavior include:
|
20
|
+
|
21
|
+
* The use of sexualized language or imagery, and sexual attention or
|
22
|
+
advances of any kind
|
23
|
+
* Trolling, insulting or derogatory comments, and personal or political attacks
|
24
|
+
* Public or private harassment
|
25
|
+
* Publishing others' private information, such as a physical or email
|
26
|
+
address, without their explicit permission
|
27
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
28
|
+
professional setting
|
29
|
+
|
30
|
+
## Enforcement Responsibilities
|
31
|
+
|
32
|
+
Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.
|
33
|
+
|
34
|
+
Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.
|
35
|
+
|
36
|
+
## Scope
|
37
|
+
|
38
|
+
This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
|
39
|
+
|
40
|
+
## Enforcement
|
41
|
+
|
42
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at 63298319+heronshoes@users.noreply.github.com. All complaints will be reviewed and investigated promptly and fairly.
|
43
|
+
|
44
|
+
All community leaders are obligated to respect the privacy and security of the reporter of any incident.
|
45
|
+
|
46
|
+
## Enforcement Guidelines
|
47
|
+
|
48
|
+
Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:
|
49
|
+
|
50
|
+
### 1. Correction
|
51
|
+
|
52
|
+
**Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community.
|
53
|
+
|
54
|
+
**Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.
|
55
|
+
|
56
|
+
### 2. Warning
|
57
|
+
|
58
|
+
**Community Impact**: A violation through a single incident or series of actions.
|
59
|
+
|
60
|
+
**Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.
|
61
|
+
|
62
|
+
### 3. Temporary Ban
|
63
|
+
|
64
|
+
**Community Impact**: A serious violation of community standards, including sustained inappropriate behavior.
|
65
|
+
|
66
|
+
**Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.
|
67
|
+
|
68
|
+
### 4. Permanent Ban
|
69
|
+
|
70
|
+
**Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.
|
71
|
+
|
72
|
+
**Consequence**: A permanent ban from any sort of public interaction within the community.
|
73
|
+
|
74
|
+
## Attribution
|
75
|
+
|
76
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0,
|
77
|
+
available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
|
78
|
+
|
79
|
+
Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity).
|
80
|
+
|
81
|
+
[homepage]: https://www.contributor-covenant.org
|
82
|
+
|
83
|
+
For answers to common questions about this code of conduct, see the FAQ at
|
84
|
+
https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations.
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# data frame class
|
5
|
+
# @table : holds Arrow::Table object
|
6
|
+
class DataFrame
|
7
|
+
# mix-in
|
8
|
+
include DataFrameSelectable
|
9
|
+
include DataFrameOutput
|
10
|
+
|
11
|
+
def initialize(*args)
|
12
|
+
# accepts: DataFrame.new, DataFrame.new([]), DataFrame.new(nil)
|
13
|
+
# returns empty DataFrame
|
14
|
+
@table = Arrow::Table.new({}, [])
|
15
|
+
# bug in gobject-introspection: ruby-gnome/ruby-gnome#1472
|
16
|
+
# [Arrow::Table] == [nil] shows ArgumentError
|
17
|
+
# temporary use yoda condition to workaround
|
18
|
+
return if args.empty? || args == [[]] || [nil] == args
|
19
|
+
|
20
|
+
if args.size > 1
|
21
|
+
@table = Arrow::Table.new(*args)
|
22
|
+
else
|
23
|
+
arg = args[0]
|
24
|
+
@table =
|
25
|
+
case arg
|
26
|
+
when Arrow::Table then arg
|
27
|
+
when DataFrame then arg.table
|
28
|
+
when Rover::DataFrame then Arrow::Table.new(arg.to_h)
|
29
|
+
when Hash
|
30
|
+
args << [] if arg.empty? # create empty df from DataFrame.new({})
|
31
|
+
Arrow::Table.new(*args)
|
32
|
+
else
|
33
|
+
raise DataFrameTypeError, "invalid argument: #{args}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.load(path, options = {})
|
39
|
+
DataFrame.new(Arrow::Table.load(path, options))
|
40
|
+
end
|
41
|
+
|
42
|
+
attr_reader :table
|
43
|
+
|
44
|
+
def save(output, options = {})
|
45
|
+
@table.save(output, options)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Properties ===
|
49
|
+
def n_rows
|
50
|
+
@table.n_rows
|
51
|
+
end
|
52
|
+
alias_method :nrow, :n_rows
|
53
|
+
alias_method :size, :n_rows
|
54
|
+
alias_method :length, :n_rows
|
55
|
+
|
56
|
+
def n_columns
|
57
|
+
@table.n_columns
|
58
|
+
end
|
59
|
+
alias_method :ncol, :n_columns
|
60
|
+
alias_method :width, :n_columns
|
61
|
+
|
62
|
+
def shape
|
63
|
+
[n_rows, n_columns]
|
64
|
+
end
|
65
|
+
|
66
|
+
def column_names
|
67
|
+
@table.columns.map { |column| column.name.to_sym }
|
68
|
+
end
|
69
|
+
alias_method :keys, :column_names
|
70
|
+
alias_method :header, :column_names
|
71
|
+
|
72
|
+
def types(class_name: false)
|
73
|
+
@table.columns.map do |column|
|
74
|
+
r = column.data_type
|
75
|
+
class_name ? r.class : r.to_s.to_sym
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def vectors
|
80
|
+
@table.columns.map do |column|
|
81
|
+
Vector.new(column.data)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_h
|
86
|
+
@table.columns.each_with_object({}) do |column, result|
|
87
|
+
result[column.name.to_sym] = column.entries
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def to_a
|
92
|
+
# output an array of row-oriented data without header
|
93
|
+
# if you need column-oriented array, use `.to_h.to_a`
|
94
|
+
@table.raw_records
|
95
|
+
end
|
96
|
+
alias_method :raw_records, :to_a
|
97
|
+
|
98
|
+
def schema
|
99
|
+
keys.zip(types).to_h
|
100
|
+
end
|
101
|
+
|
102
|
+
def ==(other)
|
103
|
+
other.is_a?(DataFrame) && @table == other.table
|
104
|
+
end
|
105
|
+
|
106
|
+
def empty?
|
107
|
+
@table.columns.empty?
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_rover
|
111
|
+
Rover::DataFrame.new(to_h)
|
112
|
+
end
|
113
|
+
|
114
|
+
# def to_parquet() end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'stringio'
|
4
|
+
|
5
|
+
module RedAmber
|
6
|
+
# mix-ins for the class DataFrame
|
7
|
+
module DataFrameOutput
|
8
|
+
def to_s
|
9
|
+
@table.to_s
|
10
|
+
end
|
11
|
+
|
12
|
+
# def describe() end
|
13
|
+
|
14
|
+
# def summary() end
|
15
|
+
|
16
|
+
def inspect_raw
|
17
|
+
format "#<#{self.class}:0x%016x>\n#{self}", object_id
|
18
|
+
end
|
19
|
+
|
20
|
+
# - tally_level: max level to use tally mode
|
21
|
+
# - max_element: max element to show values in each row
|
22
|
+
# TODO: Is it better to change name other than `inspect` ?
|
23
|
+
# TODO: Add na count capability
|
24
|
+
# TODO: Fall back to inspect_raw when treating large dataset
|
25
|
+
# TODO: Refactor code to smaller methods
|
26
|
+
def inspect(tally_level: 5, max_element: 5)
|
27
|
+
return '#<RedAmber::DataFrame (empty)>' if empty?
|
28
|
+
|
29
|
+
stringio = StringIO.new # output string buffer
|
30
|
+
|
31
|
+
# 1st row: show shape of the dataframe
|
32
|
+
r = pl(nrow)
|
33
|
+
c = pl(ncol)
|
34
|
+
stringio.puts \
|
35
|
+
"#{self.class} : #{nrow} observation#{r}(row#{r}) of #{ncol} variable#{c}(column#{c})"
|
36
|
+
|
37
|
+
# 2nd row: show var counts by type
|
38
|
+
type_groups = types(class_name: true).map { |t| type_group(t) }
|
39
|
+
|
40
|
+
stringio.puts "Variable#{pl(ncol)} : #{var_type_count(type_groups).join(', ')}"
|
41
|
+
|
42
|
+
# 3rd row: print header of rows
|
43
|
+
levels = vectors.map { |v| v.to_a.uniq.size }
|
44
|
+
row_headers = { idx: '#', key: 'key', type: 'type', levels: 'level', data: 'data_preview' }
|
45
|
+
# find longest word to adjust column width
|
46
|
+
w_idx = ncol.to_s.size
|
47
|
+
w_key = (keys.map { |key| key.size + 1 } << row_headers[:key].size).max
|
48
|
+
w_type = (types.map(&:size) << row_headers[:type].size).max
|
49
|
+
w_row = (levels.map { |l| l.to_s.size } << row_headers[:levels].size).max
|
50
|
+
stringio.printf("%-#{w_idx}s %-#{w_key}s %-#{w_type}s %-#{w_row}s %s\n", *row_headers.values)
|
51
|
+
|
52
|
+
# (4) show details for each column (vector)
|
53
|
+
vectors.each.with_index(1) do |vector, i|
|
54
|
+
key = keys[i - 1]
|
55
|
+
type = types[i - 1]
|
56
|
+
type_group = type_groups[i - 1]
|
57
|
+
data_tally = vector.tally
|
58
|
+
|
59
|
+
str = format("%#{w_row}d ", data_tally.size)
|
60
|
+
str <<
|
61
|
+
case type_group
|
62
|
+
when :numeric, :string, :boolean
|
63
|
+
if data_tally.size <= tally_level && data_tally.size != nrow
|
64
|
+
data_tally.to_s
|
65
|
+
else
|
66
|
+
reduced_vector_presentation(vector, nrow, max_element)
|
67
|
+
end
|
68
|
+
# c = vector.is_na.tally[1] # release when `#is_na` impremented
|
69
|
+
# str << " #{c} NaN#{pl(c)}" if c&.>(0) # safely call c>0
|
70
|
+
else
|
71
|
+
reduced_vector_presentation(vector, nrow, max_element)
|
72
|
+
end
|
73
|
+
|
74
|
+
stringio.printf("%#{w_idx}d %-#{w_key}s %-#{w_type}s %s\n", i, ":#{key}", type, str)
|
75
|
+
end
|
76
|
+
|
77
|
+
stringio.string
|
78
|
+
end
|
79
|
+
|
80
|
+
private # =====
|
81
|
+
|
82
|
+
def pl(num)
|
83
|
+
num > 1 ? 's' : ''
|
84
|
+
end
|
85
|
+
|
86
|
+
def type_group(type)
|
87
|
+
if Arrow::NumericDataType >= type
|
88
|
+
:numeric
|
89
|
+
elsif Arrow::StringDataType >= type
|
90
|
+
:string
|
91
|
+
elsif Arrow::BooleanDataType >= type
|
92
|
+
:boolean
|
93
|
+
elsif Arrow::TemporalDataType >= type
|
94
|
+
:temporal
|
95
|
+
else
|
96
|
+
:other
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def var_type_count(type_groups)
|
101
|
+
tg = type_groups.tally
|
102
|
+
a = []
|
103
|
+
a << "#{tg[:numeric]} numeric" if tg[:numeric]
|
104
|
+
a << "#{tg[:string]} string#{pl(tg[:string])}" if tg[:string]
|
105
|
+
a << "#{tg[:boolean]} boolean" if tg[:boolean]
|
106
|
+
a << "#{tg[:temporal]} temporal" if tg[:temporal]
|
107
|
+
a
|
108
|
+
end
|
109
|
+
|
110
|
+
def reduced_vector_presentation(vector, nrow, max_element)
|
111
|
+
a = vector.to_a.take(max_element)
|
112
|
+
a << '...' if nrow > max_element
|
113
|
+
"[#{a.join(', ')}]"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# mix-ins for the class DataFrame
|
5
|
+
module DataFrameSelectable
|
6
|
+
# select columns: [symbol] or [string]
|
7
|
+
# select rows: [array of index], [range]
|
8
|
+
def [](*args)
|
9
|
+
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
10
|
+
raise DataFrameArgumentError, 'Empty argument' if args.empty?
|
11
|
+
|
12
|
+
# expand Range like [1..3, 4] to [1, 2, 3, 4]
|
13
|
+
expanded =
|
14
|
+
args.each_with_object([]) do |e, a|
|
15
|
+
e.is_a?(Range) ? a.concat(e.to_a) : a.append(e)
|
16
|
+
end
|
17
|
+
|
18
|
+
return select_rows(expanded) if integers?(expanded)
|
19
|
+
return select_columns(expanded.map(&:to_sym)) if sym_or_str?(expanded)
|
20
|
+
|
21
|
+
raise DataFrameArgumentError, "invalid argument #{args}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def head(n_rows = 5)
|
25
|
+
raise DataFrameArgumentError, "index is out of range #{n_rows}" if n_rows.negative?
|
26
|
+
|
27
|
+
self[0...[n_rows, size].min]
|
28
|
+
end
|
29
|
+
|
30
|
+
def tail(n_rows = 5)
|
31
|
+
raise DataFrameArgumentError, "index is out of range #{n_rows}" if n_rows.negative?
|
32
|
+
|
33
|
+
self[-[n_rows, size].min..-1]
|
34
|
+
end
|
35
|
+
|
36
|
+
def first(n_rows = 1)
|
37
|
+
head(n_rows)
|
38
|
+
end
|
39
|
+
|
40
|
+
def last(n_rows = 1)
|
41
|
+
tail(n_rows)
|
42
|
+
end
|
43
|
+
|
44
|
+
private # =====
|
45
|
+
|
46
|
+
def select_columns(keys)
|
47
|
+
if keys.one?
|
48
|
+
Vector.new(@table[*keys].data)
|
49
|
+
else
|
50
|
+
DataFrame.new(@table[keys])
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def select_rows(indeces)
|
55
|
+
if out_of_range?(indeces)
|
56
|
+
raise DataFrameArgumentError, "invalid index: #{indeces} for [0..#{size - 1}]"
|
57
|
+
end
|
58
|
+
|
59
|
+
a = indeces.map { |i| @table.slice(i).to_a }
|
60
|
+
DataFrame.new(@table.schema, a)
|
61
|
+
end
|
62
|
+
|
63
|
+
def out_of_range?(indeces)
|
64
|
+
indeces.max >= size || indeces.min < -size
|
65
|
+
end
|
66
|
+
|
67
|
+
def integers?(enum)
|
68
|
+
enum.all?(Integer)
|
69
|
+
end
|
70
|
+
|
71
|
+
def sym_or_str?(enum)
|
72
|
+
enum.all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# Columnar data object
|
5
|
+
# @data : holds Arrow::ChunkedArray
|
6
|
+
class Vector
|
7
|
+
# mix-in
|
8
|
+
include VectorFunctions
|
9
|
+
|
10
|
+
# chunked_array may come from column.data
|
11
|
+
def initialize(array)
|
12
|
+
case array
|
13
|
+
when Vector
|
14
|
+
@data = array.data
|
15
|
+
when Arrow::Array, Arrow::ChunkedArray
|
16
|
+
@data = array
|
17
|
+
when Array
|
18
|
+
@data = Arrow::Array.new(array)
|
19
|
+
else
|
20
|
+
raise ArgumentError, 'Unknown array in argument'
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_reader :data
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
@data.to_a.inspect
|
28
|
+
end
|
29
|
+
|
30
|
+
def inspect
|
31
|
+
format "#<#{self.class}(:#{type}, size=#{size}):0x%016x>\n#{self}", object_id
|
32
|
+
end
|
33
|
+
|
34
|
+
def values
|
35
|
+
@data.values
|
36
|
+
end
|
37
|
+
alias_method :to_a, :values
|
38
|
+
alias_method :entries, :values
|
39
|
+
|
40
|
+
def size
|
41
|
+
# only defined :length in Arrow?
|
42
|
+
@data.length
|
43
|
+
end
|
44
|
+
alias_method :length, :size
|
45
|
+
alias_method :n_rows, :size
|
46
|
+
alias_method :nrow, :size
|
47
|
+
|
48
|
+
def type
|
49
|
+
@data.value_type.nick.to_sym
|
50
|
+
end
|
51
|
+
|
52
|
+
# def each() end
|
53
|
+
|
54
|
+
def chunked?
|
55
|
+
@data.is_a? Arrow::ChunkedArray
|
56
|
+
end
|
57
|
+
|
58
|
+
def n_chunks
|
59
|
+
chunked? ? @data.n_chunks : 0
|
60
|
+
end
|
61
|
+
|
62
|
+
# def each_chunk() end
|
63
|
+
|
64
|
+
def tally
|
65
|
+
values.tally
|
66
|
+
end
|
67
|
+
|
68
|
+
def n_nulls
|
69
|
+
@data.n_nulls
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# mix-ins for class Vector
|
5
|
+
module VectorFunctions
|
6
|
+
# Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
|
7
|
+
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
8
|
+
|
9
|
+
# [Unary aggregations]: vector.func => Scalar
|
10
|
+
unary_aggregations =
|
11
|
+
%i[all any approximate_median count count_distinct max mean min \
|
12
|
+
product stddev sum variance]
|
13
|
+
unary_aggregations.each do |function|
|
14
|
+
define_method(function) { exec_func(function, other: nil, options: { aggregate: true }) }
|
15
|
+
end
|
16
|
+
alias_method :count_uniq, :count_distinct
|
17
|
+
|
18
|
+
# option(s) required
|
19
|
+
# index
|
20
|
+
|
21
|
+
# Returns other than value
|
22
|
+
# min_max
|
23
|
+
# mode
|
24
|
+
# quantile
|
25
|
+
# tdigest
|
26
|
+
|
27
|
+
# [Unary element-wise]: vector.func => Vector
|
28
|
+
unary_element_wise = %i[abs atan ceil cos floor sign sin tan trunc]
|
29
|
+
unary_element_wise.each do |function|
|
30
|
+
define_method(function) { exec_func(function, other: nil, options: {}) }
|
31
|
+
end
|
32
|
+
|
33
|
+
# [Unary element-wise with operator]: vector.func => Vector
|
34
|
+
unary_element_wise_op = {
|
35
|
+
negate: '-@',
|
36
|
+
}
|
37
|
+
unary_element_wise_op.each do |function, operator|
|
38
|
+
define_method(function) { exec_func(function, other: nil, options: {}) }
|
39
|
+
define_method(operator) { exec_func(function, other: nil, options: {}) }
|
40
|
+
end
|
41
|
+
|
42
|
+
# bit_wise_not => '!', invert, round, round_to_multiple
|
43
|
+
|
44
|
+
# NaN support needed
|
45
|
+
# %i[acos asin ln log10 log1p log2]
|
46
|
+
|
47
|
+
# With numerical range check
|
48
|
+
# %i[abs_checked acos_checked asin_checked cos_checked ln_checked \
|
49
|
+
# log10_checked log1p_checked log2_checked sin_checked tan_checked]
|
50
|
+
|
51
|
+
# [Binary element-wise]: vector.func(other) => Vector
|
52
|
+
binary_element_wise = %i[atan2 and and_kleene and_not and_not_kleene or or_kleene xor]
|
53
|
+
binary_element_wise.each do |function|
|
54
|
+
define_method(function) do |other|
|
55
|
+
exec_func(function, other: other, options: {})
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# NaN support needed
|
60
|
+
# logb
|
61
|
+
|
62
|
+
# With numerical range check
|
63
|
+
# %i[add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked \
|
64
|
+
# shift_left_checked shift_right_checked]
|
65
|
+
|
66
|
+
# [Binary element-wise with operator]: vector.func(other) => Vector
|
67
|
+
binary_element_wise_op = {
|
68
|
+
add: '+',
|
69
|
+
divide: '/',
|
70
|
+
multiply: '*',
|
71
|
+
power: '**',
|
72
|
+
subtract: '-',
|
73
|
+
|
74
|
+
bit_wise_and: '&',
|
75
|
+
bit_wise_or: '|',
|
76
|
+
bit_wise_xor: '^',
|
77
|
+
shift_left: '<<',
|
78
|
+
shift_right: '>>',
|
79
|
+
|
80
|
+
equal: '==',
|
81
|
+
greater: '>',
|
82
|
+
greater_equal: '>=',
|
83
|
+
less: '<',
|
84
|
+
less_equal: '<=',
|
85
|
+
not_equal: '!=',
|
86
|
+
}
|
87
|
+
binary_element_wise_op.each do |function, operator|
|
88
|
+
define_method(function) do |other|
|
89
|
+
exec_func(function, other: other, options: {})
|
90
|
+
end
|
91
|
+
define_method(operator) do |other|
|
92
|
+
exec_func(function, other: other, options: {})
|
93
|
+
end
|
94
|
+
end
|
95
|
+
alias_method :eq, :equal
|
96
|
+
alias_method :ge, :greater_equal
|
97
|
+
alias_method :gt, :greater
|
98
|
+
alias_method :le, :less_equal
|
99
|
+
alias_method :lt, :less
|
100
|
+
alias_method :ne, :not_equal
|
101
|
+
|
102
|
+
# mod: '%',
|
103
|
+
|
104
|
+
# (array functions)
|
105
|
+
# array_filter, array_sort_indices, array_take
|
106
|
+
# dictionary_encode, hash_all, hash_any, hash_approximate_median,
|
107
|
+
# hash_count, hash_count_distinct, hash_distinct, hash_max, hash_mean, hash_min,
|
108
|
+
# hash_min_max, hash_product, hash_stddev, hash_sum, hash_tdigest, hash_variance,
|
109
|
+
# partition_nth_indices,
|
110
|
+
# quarter, quarters_between, unique,
|
111
|
+
# value_counts
|
112
|
+
|
113
|
+
# (strings)
|
114
|
+
# ascii_capitalize, ascii_center, ascii_is_alnum, ascii_is_alpha, ascii_is_decimal,
|
115
|
+
# ascii_is_lower, ascii_is_printable, ascii_is_space, ascii_is_title, ascii_is_upper,
|
116
|
+
# ascii_lower, ascii_lpad, ascii_ltrim, ascii_ltrim_whitespace, ascii_reverse,
|
117
|
+
# ascii_rpad, ascii_rtrim, ascii_rtrim_whitespace, ascii_split_whitespace,
|
118
|
+
# ascii_swapcase, ascii_title, ascii_trim, ascii_trim_whitespace, ascii_upper,
|
119
|
+
# binary_join, binary_join_element_wise, binary_length, binary_repeat,
|
120
|
+
# binary_replace_slice, binary_reverse, count_substring, count_substring_regex,
|
121
|
+
# ends_with, extract_regex, find_substring, find_substring_regex,
|
122
|
+
# match_like, match_substring, match_substring_regex, replace_substring,
|
123
|
+
# replace_substring_regex, split_pattern, split_pattern_regex, starts_with,
|
124
|
+
# string_is_ascii, utf8_capitalize, utf8_center, utf8_is_alnum, utf8_is_alpha,
|
125
|
+
# utf8_is_decimal, utf8_is_digit, utf8_is_lower, utf8_is_numeric, utf8_is_printable,
|
126
|
+
# utf8_is_space, utf8_is_title, utf8_is_upper, utf8_length, utf8_lower, utf8_lpad,
|
127
|
+
# utf8_ltrim, utf8_ltrim_whitespace, utf8_normalize, utf8_replace_slice, utf8_reverse,
|
128
|
+
# utf8_rpad, utf8_rtrim, utf8_rtrim_whitespace, utf8_slice_codeunits, utf8_split_whitespace,
|
129
|
+
# utf8_swapcase, utf8_title, utf8_trim, utf8_trim_whitespace, utf8_upper
|
130
|
+
|
131
|
+
# (temporal)
|
132
|
+
# assume_timezone, ceil_temporal, day, day_of_week, day_of_year, day_time_interval_between,
|
133
|
+
# days_between, floor_temporal, hour, hours_between, iso_calendar, iso_week, iso_year,
|
134
|
+
# microsecond, microseconds_between, millisecond, milliseconds_between, minute,
|
135
|
+
# minutes_between, month, month_day_nano_interval_between, month_interval_between,
|
136
|
+
# nanosecond, nanoseconds_between, round_temporal, second, seconds_between, strftime,
|
137
|
+
# strptime, subsecond, us_week, week, weeks_between, year, year_month_day, years_between
|
138
|
+
|
139
|
+
# (onditional)
|
140
|
+
# case_when, cast, if_else
|
141
|
+
|
142
|
+
# (indices)
|
143
|
+
# choose, index_in, index_in_meta_binary, indices_nonzero
|
144
|
+
|
145
|
+
# (others)
|
146
|
+
# coalesce, drop_null, fill_null_backward, fill_null_forward,
|
147
|
+
# filter, is_finite, is_in, is_in_meta_binary, is_inf, is_nan, is_null, is_valid,
|
148
|
+
# list_element, list_flatten, list_parent_indices, list_value_length, make_struct,
|
149
|
+
# max_element_wise, min_element_wise, random, replace_with_mask, select_k_unstable,
|
150
|
+
# sort_indices, struct_field, take
|
151
|
+
|
152
|
+
private # =======
|
153
|
+
|
154
|
+
def exec_func(function, other: nil, options: {})
|
155
|
+
func = Arrow::Function.find(function)
|
156
|
+
output =
|
157
|
+
case other
|
158
|
+
when nil
|
159
|
+
func.execute([data])
|
160
|
+
when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar, Numeric
|
161
|
+
func.execute([data, other])
|
162
|
+
when Vector
|
163
|
+
func.execute([data, other.data])
|
164
|
+
when Rover::Vector
|
165
|
+
func.execute([data, other.to_a])
|
166
|
+
else
|
167
|
+
raise ArgumentError, "operand is not supported: #{other.class}"
|
168
|
+
end
|
169
|
+
options[:aggregate] ? output.value : Vector.new(output.value)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
data/lib/red_amber.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'arrow'
|
4
|
+
require 'rover-df'
|
5
|
+
|
6
|
+
require_relative 'red_amber/data_frame_output'
|
7
|
+
require_relative 'red_amber/data_frame_selectable'
|
8
|
+
require_relative 'red_amber/data_frame'
|
9
|
+
require_relative 'red_amber/vector_functions'
|
10
|
+
require_relative 'red_amber/vector'
|
11
|
+
require_relative 'red_amber/version'
|
12
|
+
|
13
|
+
module RedAmber
|
14
|
+
class Error < StandardError; end
|
15
|
+
|
16
|
+
class DataFrameArgumentError < ArgumentError; end
|
17
|
+
class DataFrameTypeError < TypeError; end
|
18
|
+
end
|
data/red_amber.gemspec
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'lib/red_amber/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = 'red_amber'
|
7
|
+
spec.version = RedAmber::VERSION
|
8
|
+
spec.authors = ['Hirokazu SUZUKI (heronshoes)']
|
9
|
+
spec.email = ['63298319+heronshoes@users.noreply.github.com']
|
10
|
+
|
11
|
+
spec.summary = 'Simple data frames for Ruby'
|
12
|
+
spec.description = 'Powered by Red Arrow and simple API similar to Rover-df'
|
13
|
+
spec.homepage = 'https://github.com/heronshoes/red_amber'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
spec.required_ruby_version = '>= 2.7'
|
16
|
+
|
17
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
18
|
+
spec.metadata['source_code_uri'] = 'https://github.com/heronshoes/red_amber'
|
19
|
+
spec.metadata['changelog_uri'] = 'https://github.com/heronshoes/red_amber/blob/main/CHANGELOG.md'
|
20
|
+
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
22
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
23
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
24
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
25
|
+
(f == __FILE__) ||
|
26
|
+
f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
|
27
|
+
end
|
28
|
+
end
|
29
|
+
spec.bindir = 'exe'
|
30
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
|
+
spec.require_paths = ['lib']
|
32
|
+
|
33
|
+
spec.add_dependency 'red-arrow', '~> 7.0.0'
|
34
|
+
spec.add_dependency 'red-parquet', '~> 7.0.0'
|
35
|
+
spec.add_dependency 'rover-df', '~> 0.3.0'
|
36
|
+
|
37
|
+
# Development dependency has gone to the Gemfile (rubygems/bundler#7237)
|
38
|
+
|
39
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
40
|
+
|
41
|
+
# For more information and examples about making a new gem, check out our
|
42
|
+
# guide at: https://bundler.io/guides/creating_gem.html
|
43
|
+
end
|
data/sig/red_amber.rbs
ADDED
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: red_amber
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Hirokazu SUZUKI (heronshoes)
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-05-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: red-arrow
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 7.0.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 7.0.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: red-parquet
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 7.0.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 7.0.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rover-df
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.3.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.3.0
|
55
|
+
description: Powered by Red Arrow and simple API similar to Rover-df
|
56
|
+
email:
|
57
|
+
- 63298319+heronshoes@users.noreply.github.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".rubocop.yml"
|
63
|
+
- ".rubocop_todo.yml"
|
64
|
+
- CHANGELOG.md
|
65
|
+
- Gemfile
|
66
|
+
- LICENSE
|
67
|
+
- README.md
|
68
|
+
- Rakefile
|
69
|
+
- doc/CODE_OF_CONDUCT.md
|
70
|
+
- lib/red_amber.rb
|
71
|
+
- lib/red_amber/data_frame.rb
|
72
|
+
- lib/red_amber/data_frame_output.rb
|
73
|
+
- lib/red_amber/data_frame_selectable.rb
|
74
|
+
- lib/red_amber/vector.rb
|
75
|
+
- lib/red_amber/vector_functions.rb
|
76
|
+
- lib/red_amber/version.rb
|
77
|
+
- red_amber.gemspec
|
78
|
+
- sig/red_amber.rbs
|
79
|
+
homepage: https://github.com/heronshoes/red_amber
|
80
|
+
licenses:
|
81
|
+
- MIT
|
82
|
+
metadata:
|
83
|
+
homepage_uri: https://github.com/heronshoes/red_amber
|
84
|
+
source_code_uri: https://github.com/heronshoes/red_amber
|
85
|
+
changelog_uri: https://github.com/heronshoes/red_amber/blob/main/CHANGELOG.md
|
86
|
+
rubygems_mfa_required: 'true'
|
87
|
+
post_install_message:
|
88
|
+
rdoc_options: []
|
89
|
+
require_paths:
|
90
|
+
- lib
|
91
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '2.7'
|
96
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - ">="
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
requirements: []
|
102
|
+
rubygems_version: 3.3.7
|
103
|
+
signing_key:
|
104
|
+
specification_version: 4
|
105
|
+
summary: Simple data frames for Ruby
|
106
|
+
test_files: []
|