red_amber 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -3
- data/.rubocop_todo.yml +1 -7
- data/CHANGELOG.md +16 -4
- data/README.md +48 -13
- data/lib/red_amber/data_frame.rb +12 -9
- data/lib/red_amber/data_frame_output.rb +1 -1
- data/lib/red_amber/data_frame_selectable.rb +22 -9
- data/lib/red_amber/vector.rb +4 -0
- data/lib/red_amber/vector_functions.rb +1 -1
- data/lib/red_amber/version.rb +1 -1
- data/red_amber.gemspec +5 -5
- metadata +10 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54de345111ab7c3918e119abe820d2ff207007f1ce9731e2f8954513d47c76a9
|
4
|
+
data.tar.gz: 75e4251c6d6be8eab05739f75e064a2e65cbe3abdafaa574c559d9356fe93a20
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60c2d11d30b91947b67e608864e5e4fe13e544662f671789256e6e2e624a892577f616572e4ba55be4de99affd528d020060b4be56f8820250697db2a80132a2
|
7
|
+
data.tar.gz: 19170b7cd3d6b1174b7de44c0b8841d47acc4d1832fe72fdc8adc7171245e031c922614aca979755ae035566deae0a711644a3e483cecbceabdcfc411efb2263
|
data/.rubocop.yml
CHANGED
@@ -45,7 +45,7 @@ Lint/BinaryOperatorWithIdenticalOperands:
|
|
45
45
|
|
46
46
|
# Max: 120
|
47
47
|
Layout/LineLength:
|
48
|
-
Max:
|
48
|
+
Max: 118
|
49
49
|
Exclude:
|
50
50
|
- 'test/**/*'
|
51
51
|
|
@@ -53,7 +53,7 @@ Layout/LineLength:
|
|
53
53
|
# 18..30 unsatisfactory
|
54
54
|
# > 30 dangerous
|
55
55
|
Metrics/AbcSize:
|
56
|
-
Max:
|
56
|
+
Max: 23
|
57
57
|
Exclude:
|
58
58
|
- 'lib/red_amber/data_frame_output.rb' # Max: 78
|
59
59
|
|
@@ -84,6 +84,11 @@ Metrics/MethodLength:
|
|
84
84
|
|
85
85
|
# Max: 8
|
86
86
|
Metrics/PerceivedComplexity:
|
87
|
-
Max:
|
87
|
+
Max: 11
|
88
88
|
Exclude:
|
89
89
|
- 'lib/red_amber/data_frame_output.rb' # Max: 12
|
90
|
+
|
91
|
+
# Necessary to test when range.end == -1
|
92
|
+
Style/SlicingWithRange:
|
93
|
+
Exclude:
|
94
|
+
- 'test/test_data_frame_selectable.rb'
|
data/.rubocop_todo.yml
CHANGED
@@ -1,17 +1,11 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2022-
|
3
|
+
# on 2022-05-08 02:37:36 UTC using RuboCop version 1.27.0.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
-
# Offense count: 1
|
10
|
-
# This cop supports unsafe auto-correction (--auto-correct-all).
|
11
|
-
Style/SlicingWithRange:
|
12
|
-
Exclude:
|
13
|
-
- 'lib/red_amber/data_frame_selectable.rb'
|
14
|
-
|
15
9
|
# Offense count: 1
|
16
10
|
# This cop supports unsafe auto-correction (--auto-correct-all).
|
17
11
|
# Configuration parameters: EnforcedStyle.
|
data/CHANGELOG.md
CHANGED
@@ -1,17 +1,29 @@
|
|
1
|
-
## [0.1.
|
1
|
+
## [0.1.3] - Unreleased
|
2
2
|
|
3
|
-
- Add support for Arrow 8.0.0
|
4
3
|
- `DataFrame`
|
5
|
-
- Introduce updating
|
4
|
+
- Introduce updating capabilities
|
6
5
|
- Introduce NA support
|
7
6
|
- Add slice method
|
8
7
|
- `Vector`
|
9
8
|
- Add NaN support for functions
|
10
9
|
- More functions
|
11
10
|
|
11
|
+
## [0.1.2] - 2022-05-08 (experimental)
|
12
|
+
|
13
|
+
- Bug fixes:
|
14
|
+
- `DataFrame`
|
15
|
+
- Fix bug in `#[]` with end-less Range
|
16
|
+
- New features and improvements
|
17
|
+
- Add support for Arrow 8.0.0
|
18
|
+
- `DataFrame`
|
19
|
+
- `types` and `data_types`
|
20
|
+
- Range is usable to specify columns in `#[]`
|
21
|
+
- `Vector`
|
22
|
+
- `type` and `data_type`
|
23
|
+
|
12
24
|
## [0.1.1] - 2022-05-06 (experimental)
|
13
25
|
|
14
|
-
- Release on
|
26
|
+
- Release on rubygems.org
|
15
27
|
- Introduce class `DataFrame`
|
16
28
|
- New from Hash, schema/rows, `Arrow::Table`, `Rover::DataFrame`
|
17
29
|
- Load from file, string, URI
|
data/README.md
CHANGED
@@ -8,8 +8,8 @@ A simple dataframe library for Ruby (experimental)
|
|
8
8
|
## Requirements
|
9
9
|
|
10
10
|
```ruby
|
11
|
-
gem 'red-arrow', '
|
12
|
-
gem 'red-parquet', '
|
11
|
+
gem 'red-arrow', '>= 7.0.0'
|
12
|
+
gem 'red-parquet', '>= 7.0.0' # if you use IO from/to parquet
|
13
13
|
gem 'rover-df', '~> 0.3.0' # if you use IO from/to Rover::DataFrame
|
14
14
|
```
|
15
15
|
|
@@ -89,10 +89,13 @@ Or install it yourself as:
|
|
89
89
|
|
90
90
|
Returns num of column names by an Array.
|
91
91
|
|
92
|
-
- [x] `types
|
92
|
+
- [x] `types`
|
93
93
|
|
94
|
-
Returns types of columns by an Array.
|
95
|
-
|
94
|
+
Returns types of columns by an Array of Symbols.
|
95
|
+
|
96
|
+
- [x] `data_types`
|
97
|
+
|
98
|
+
Returns types of columns by an Array of `Arrow::DataType`.
|
96
99
|
|
97
100
|
- [x] `vectors`
|
98
101
|
|
@@ -128,20 +131,50 @@ Or install it yourself as:
|
|
128
131
|
|
129
132
|
Shows some information about self.
|
130
133
|
|
134
|
+
```ruby
|
135
|
+
hash = {a: [1, 2, 3], b: %w[A B C], c: [1.0, 2, 3]}
|
136
|
+
RedAmber::DataFrame.new(hash)
|
137
|
+
# =>
|
138
|
+
RedAmber::DataFrame : 3 observations(rows) of 3 variables(columns)
|
139
|
+
Variables : 2 numeric, 1 string
|
140
|
+
# key type level data_preview
|
141
|
+
1 :a uint8 3 [1, 2, 3]
|
142
|
+
2 :b string 3 [A, B, C]
|
143
|
+
3 :c double 3 [1.0, 2.0, 3.0]
|
144
|
+
```
|
145
|
+
|
131
146
|
- tally_level: max level to use tally mode
|
132
147
|
- max_element: max num of element to show values in each row
|
133
148
|
|
134
149
|
### Selecting
|
135
150
|
|
136
|
-
- [x]
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
-
|
151
|
+
- [x] Select columns by `[]` as `[key]`, `[keys]`, `[keys[index]]`
|
152
|
+
- Key in a Symbol: `df[:symbol]`
|
153
|
+
- Key in a String: `df["string"]`
|
154
|
+
- Keys in an Array: `df[:symbol1`, `"string"`, `:symbol2`
|
155
|
+
- Keys in indeces: `df[df.keys[0]`, `df[df.keys[1,2]]`, `df[df.keys[1..]]`
|
156
|
+
- Keys in a Range:
|
157
|
+
A end-less Range can be used to represent keys.
|
158
|
+
```ruby
|
159
|
+
hash = {a: [1, 2, 3], b: %w[A B C], c: [1.0, 2, 3]}
|
160
|
+
df = RedAmber::DataFrame.new(hash)
|
161
|
+
df[:b..:c, "a"]
|
162
|
+
# =>
|
163
|
+
RedAmber::DataFrame : 3 observations(rows) of 3 variables(columns)
|
164
|
+
Variables : 2 numeric, 1 string
|
165
|
+
# key type level data_preview
|
166
|
+
1 :b string 3 [A, B, C]
|
167
|
+
2 :c double 3 [1.0, 2.0, 3.0]
|
168
|
+
3 :a uint8 3 [1, 2, 3]
|
169
|
+
```
|
141
170
|
|
142
|
-
|
171
|
+
- [x] Select rows by `[]` as `[index]`, `[range]`, `[array]`
|
172
|
+
- Select a row by index: `df[0]`
|
173
|
+
- Select rows by indeces in a Range: `df[1..2]`
|
174
|
+
- Select rows by indeces in an Array: `df[1, 2]`
|
175
|
+
- Mixed case: `df[2, 0..]`
|
143
176
|
|
144
|
-
- [x]
|
177
|
+
- [x] Select rows from top or bottom
|
145
178
|
|
146
179
|
`head(n=5)`, `tail(n=5)`, `first(n=1)`, `last(n=1)`
|
147
180
|
|
@@ -213,6 +246,8 @@ Or install it yourself as:
|
|
213
246
|
|
214
247
|
- [x] `type`
|
215
248
|
|
249
|
+
- [x] `data_type`
|
250
|
+
|
216
251
|
- [ ] `each`
|
217
252
|
|
218
253
|
- [ ] `chunked?`
|
@@ -324,7 +359,7 @@ Or install it yourself as:
|
|
324
359
|
|
325
360
|
## Development
|
326
361
|
|
327
|
-
```
|
362
|
+
```shell
|
328
363
|
git clone https://github.com/heronshoes/red_amber.git
|
329
364
|
cd red_amber
|
330
365
|
bundle install
|
data/lib/red_amber/data_frame.rb
CHANGED
@@ -9,13 +9,13 @@ module RedAmber
|
|
9
9
|
include DataFrameOutput
|
10
10
|
|
11
11
|
def initialize(*args)
|
12
|
-
#
|
12
|
+
# DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
|
13
13
|
# returns empty DataFrame
|
14
14
|
@table = Arrow::Table.new({}, [])
|
15
15
|
# bug in gobject-introspection: ruby-gnome/ruby-gnome#1472
|
16
16
|
# [Arrow::Table] == [nil] shows ArgumentError
|
17
17
|
# temporary use yoda condition to workaround
|
18
|
-
return if args.empty? || args == [[]] || [nil] == args
|
18
|
+
return if args.empty? || args == [[]] || args == [{}] || [nil] == args
|
19
19
|
|
20
20
|
if args.size > 1
|
21
21
|
@table = Arrow::Table.new(*args)
|
@@ -26,11 +26,9 @@ module RedAmber
|
|
26
26
|
when Arrow::Table then arg
|
27
27
|
when DataFrame then arg.table
|
28
28
|
when Rover::DataFrame then Arrow::Table.new(arg.to_h)
|
29
|
-
when Hash
|
30
|
-
args << [] if arg.empty? # create empty df from DataFrame.new({})
|
31
|
-
Arrow::Table.new(*args)
|
29
|
+
when Hash then Arrow::Table.new(arg)
|
32
30
|
else
|
33
|
-
raise DataFrameTypeError, "invalid argument: #{
|
31
|
+
raise DataFrameTypeError, "invalid argument: #{arg}"
|
34
32
|
end
|
35
33
|
end
|
36
34
|
end
|
@@ -69,10 +67,15 @@ module RedAmber
|
|
69
67
|
alias_method :keys, :column_names
|
70
68
|
alias_method :header, :column_names
|
71
69
|
|
72
|
-
def types
|
70
|
+
def types
|
73
71
|
@table.columns.map do |column|
|
74
|
-
|
75
|
-
|
72
|
+
column.data_type.to_s.to_sym
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def data_types
|
77
|
+
@table.columns.map do |column|
|
78
|
+
column.data_type.class
|
76
79
|
end
|
77
80
|
end
|
78
81
|
|
@@ -35,7 +35,7 @@ module RedAmber
|
|
35
35
|
"#{self.class} : #{nrow} observation#{r}(row#{r}) of #{ncol} variable#{c}(column#{c})"
|
36
36
|
|
37
37
|
# 2nd row: show var counts by type
|
38
|
-
type_groups =
|
38
|
+
type_groups = data_types.map { |t| type_group(t) }
|
39
39
|
|
40
40
|
stringio.puts "Variable#{pl(ncol)} : #{var_type_count(type_groups).join(', ')}"
|
41
41
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
# mix-
|
4
|
+
# mix-in for the class DataFrame
|
5
5
|
module DataFrameSelectable
|
6
6
|
# select columns: [symbol] or [string]
|
7
7
|
# select rows: [array of index], [range]
|
@@ -12,25 +12,25 @@ module RedAmber
|
|
12
12
|
# expand Range like [1..3, 4] to [1, 2, 3, 4]
|
13
13
|
expanded =
|
14
14
|
args.each_with_object([]) do |e, a|
|
15
|
-
e.is_a?(Range) ? a.concat(e
|
15
|
+
e.is_a?(Range) ? a.concat(normalized_array(e)) : a.append(e)
|
16
16
|
end
|
17
17
|
|
18
18
|
return select_rows(expanded) if integers?(expanded)
|
19
19
|
return select_columns(expanded.map(&:to_sym)) if sym_or_str?(expanded)
|
20
20
|
|
21
|
-
raise DataFrameArgumentError, "
|
21
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
22
22
|
end
|
23
23
|
|
24
24
|
def head(n_rows = 5)
|
25
|
-
raise DataFrameArgumentError, "
|
25
|
+
raise DataFrameArgumentError, "Index is out of range #{n_rows}" if n_rows.negative?
|
26
26
|
|
27
27
|
self[0...[n_rows, size].min]
|
28
28
|
end
|
29
29
|
|
30
30
|
def tail(n_rows = 5)
|
31
|
-
raise DataFrameArgumentError, "
|
31
|
+
raise DataFrameArgumentError, "Index is out of range #{n_rows}" if n_rows.negative?
|
32
32
|
|
33
|
-
self[-[n_rows, size].min
|
33
|
+
self[-[n_rows, size].min..]
|
34
34
|
end
|
35
35
|
|
36
36
|
def first(n_rows = 1)
|
@@ -52,14 +52,27 @@ module RedAmber
|
|
52
52
|
end
|
53
53
|
|
54
54
|
def select_rows(indeces)
|
55
|
-
|
56
|
-
raise DataFrameArgumentError, "invalid index: #{indeces} for [0..#{size - 1}]"
|
57
|
-
end
|
55
|
+
out_of_range?(indeces) && raise(DataFrameArgumentError, "Invalid index: #{indeces} for 0..#{size - 1}")
|
58
56
|
|
59
57
|
a = indeces.map { |i| @table.slice(i).to_a }
|
60
58
|
DataFrame.new(@table.schema, a)
|
61
59
|
end
|
62
60
|
|
61
|
+
def normalized_array(range)
|
62
|
+
both_end = [range.begin, range.end]
|
63
|
+
both_end[1] -= 1 if range.exclude_end? && range.end.is_a?(Integer)
|
64
|
+
|
65
|
+
if both_end.any?(Integer) || both_end.all?(&:nil?)
|
66
|
+
if both_end.any? { |e| e&.>=(size) || e&.<(-size) }
|
67
|
+
raise DataFrameArgumentError, "Index out of range: #{range} for 0..#{size - 1}"
|
68
|
+
end
|
69
|
+
|
70
|
+
(0...size).to_a[range]
|
71
|
+
else
|
72
|
+
range.to_a
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
63
76
|
def out_of_range?(indeces)
|
64
77
|
indeces.max >= size || indeces.min < -size
|
65
78
|
end
|
data/lib/red_amber/vector.rb
CHANGED
@@ -164,7 +164,7 @@ module RedAmber
|
|
164
164
|
when Rover::Vector
|
165
165
|
func.execute([data, other.to_a])
|
166
166
|
else
|
167
|
-
raise ArgumentError, "
|
167
|
+
raise ArgumentError, "Operand is not supported: #{other.class}"
|
168
168
|
end
|
169
169
|
options[:aggregate] ? output.value : Vector.new(output.value)
|
170
170
|
end
|
data/lib/red_amber/version.rb
CHANGED
data/red_amber.gemspec
CHANGED
@@ -6,10 +6,10 @@ Gem::Specification.new do |spec|
|
|
6
6
|
spec.name = 'red_amber'
|
7
7
|
spec.version = RedAmber::VERSION
|
8
8
|
spec.authors = ['Hirokazu SUZUKI (heronshoes)']
|
9
|
-
spec.email = ['
|
9
|
+
spec.email = ['heronshoes877@gmail.com']
|
10
10
|
|
11
|
-
spec.summary = 'Simple
|
12
|
-
spec.description = '
|
11
|
+
spec.summary = 'Simple dataframe library for Ruby'
|
12
|
+
spec.description = 'RedAmber is a simple dataframe library powered by Red Arrow with simple API similar to Rover-df.'
|
13
13
|
spec.homepage = 'https://github.com/heronshoes/red_amber'
|
14
14
|
spec.license = 'MIT'
|
15
15
|
spec.required_ruby_version = '>= 2.7'
|
@@ -30,8 +30,8 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
|
-
spec.add_dependency 'red-arrow', '
|
34
|
-
spec.add_dependency 'red-parquet', '
|
33
|
+
spec.add_dependency 'red-arrow', '>= 7.0.0'
|
34
|
+
spec.add_dependency 'red-parquet', '>= 7.0.0'
|
35
35
|
spec.add_dependency 'rover-df', '~> 0.3.0'
|
36
36
|
|
37
37
|
# Development dependency has gone to the Gemfile (rubygems/bundler#7237)
|
metadata
CHANGED
@@ -1,41 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 7.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 7.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: red-parquet
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 7.0.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 7.0.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
@@ -52,9 +52,10 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.3.0
|
55
|
-
description:
|
55
|
+
description: RedAmber is a simple dataframe library powered by Red Arrow with simple
|
56
|
+
API similar to Rover-df.
|
56
57
|
email:
|
57
|
-
-
|
58
|
+
- heronshoes877@gmail.com
|
58
59
|
executables: []
|
59
60
|
extensions: []
|
60
61
|
extra_rdoc_files: []
|
@@ -102,5 +103,5 @@ requirements: []
|
|
102
103
|
rubygems_version: 3.3.7
|
103
104
|
signing_key:
|
104
105
|
specification_version: 4
|
105
|
-
summary: Simple
|
106
|
+
summary: Simple dataframe library for Ruby
|
106
107
|
test_files: []
|