red_amber 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -3
- data/.rubocop_todo.yml +1 -7
- data/CHANGELOG.md +16 -4
- data/README.md +48 -13
- data/lib/red_amber/data_frame.rb +12 -9
- data/lib/red_amber/data_frame_output.rb +1 -1
- data/lib/red_amber/data_frame_selectable.rb +22 -9
- data/lib/red_amber/vector.rb +4 -0
- data/lib/red_amber/vector_functions.rb +1 -1
- data/lib/red_amber/version.rb +1 -1
- data/red_amber.gemspec +5 -5
- metadata +10 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54de345111ab7c3918e119abe820d2ff207007f1ce9731e2f8954513d47c76a9
|
4
|
+
data.tar.gz: 75e4251c6d6be8eab05739f75e064a2e65cbe3abdafaa574c559d9356fe93a20
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60c2d11d30b91947b67e608864e5e4fe13e544662f671789256e6e2e624a892577f616572e4ba55be4de99affd528d020060b4be56f8820250697db2a80132a2
|
7
|
+
data.tar.gz: 19170b7cd3d6b1174b7de44c0b8841d47acc4d1832fe72fdc8adc7171245e031c922614aca979755ae035566deae0a711644a3e483cecbceabdcfc411efb2263
|
data/.rubocop.yml
CHANGED
@@ -45,7 +45,7 @@ Lint/BinaryOperatorWithIdenticalOperands:
|
|
45
45
|
|
46
46
|
# Max: 120
|
47
47
|
Layout/LineLength:
|
48
|
-
Max:
|
48
|
+
Max: 118
|
49
49
|
Exclude:
|
50
50
|
- 'test/**/*'
|
51
51
|
|
@@ -53,7 +53,7 @@ Layout/LineLength:
|
|
53
53
|
# 18..30 unsatisfactory
|
54
54
|
# > 30 dangerous
|
55
55
|
Metrics/AbcSize:
|
56
|
-
Max:
|
56
|
+
Max: 23
|
57
57
|
Exclude:
|
58
58
|
- 'lib/red_amber/data_frame_output.rb' # Max: 78
|
59
59
|
|
@@ -84,6 +84,11 @@ Metrics/MethodLength:
|
|
84
84
|
|
85
85
|
# Max: 8
|
86
86
|
Metrics/PerceivedComplexity:
|
87
|
-
Max:
|
87
|
+
Max: 11
|
88
88
|
Exclude:
|
89
89
|
- 'lib/red_amber/data_frame_output.rb' # Max: 12
|
90
|
+
|
91
|
+
# Necessary to test when range.end == -1
|
92
|
+
Style/SlicingWithRange:
|
93
|
+
Exclude:
|
94
|
+
- 'test/test_data_frame_selectable.rb'
|
data/.rubocop_todo.yml
CHANGED
@@ -1,17 +1,11 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2022-
|
3
|
+
# on 2022-05-08 02:37:36 UTC using RuboCop version 1.27.0.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
-
# Offense count: 1
|
10
|
-
# This cop supports unsafe auto-correction (--auto-correct-all).
|
11
|
-
Style/SlicingWithRange:
|
12
|
-
Exclude:
|
13
|
-
- 'lib/red_amber/data_frame_selectable.rb'
|
14
|
-
|
15
9
|
# Offense count: 1
|
16
10
|
# This cop supports unsafe auto-correction (--auto-correct-all).
|
17
11
|
# Configuration parameters: EnforcedStyle.
|
data/CHANGELOG.md
CHANGED
@@ -1,17 +1,29 @@
|
|
1
|
-
## [0.1.
|
1
|
+
## [0.1.3] - Unreleased
|
2
2
|
|
3
|
-
- Add support for Arrow 8.0.0
|
4
3
|
- `DataFrame`
|
5
|
-
- Introduce updating
|
4
|
+
- Introduce updating capabilities
|
6
5
|
- Introduce NA support
|
7
6
|
- Add slice method
|
8
7
|
- `Vector`
|
9
8
|
- Add NaN support for functions
|
10
9
|
- More functions
|
11
10
|
|
11
|
+
## [0.1.2] - 2022-05-08 (experimental)
|
12
|
+
|
13
|
+
- Bug fixes:
|
14
|
+
- `DataFrame`
|
15
|
+
- Fix bug in `#[]` with end-less Range
|
16
|
+
- New features and improvements
|
17
|
+
- Add support for Arrow 8.0.0
|
18
|
+
- `DataFrame`
|
19
|
+
- `types` and `data_types`
|
20
|
+
- Range is usable to specify columns in `#[]`
|
21
|
+
- `Vector`
|
22
|
+
- `type` and `data_type`
|
23
|
+
|
12
24
|
## [0.1.1] - 2022-05-06 (experimental)
|
13
25
|
|
14
|
-
- Release on
|
26
|
+
- Release on rubygems.org
|
15
27
|
- Introduce class `DataFrame`
|
16
28
|
- New from Hash, schema/rows, `Arrow::Table`, `Rover::DataFrame`
|
17
29
|
- Load from file, string, URI
|
data/README.md
CHANGED
@@ -8,8 +8,8 @@ A simple dataframe library for Ruby (experimental)
|
|
8
8
|
## Requirements
|
9
9
|
|
10
10
|
```ruby
|
11
|
-
gem 'red-arrow', '
|
12
|
-
gem 'red-parquet', '
|
11
|
+
gem 'red-arrow', '>= 7.0.0'
|
12
|
+
gem 'red-parquet', '>= 7.0.0' # if you use IO from/to parquet
|
13
13
|
gem 'rover-df', '~> 0.3.0' # if you use IO from/to Rover::DataFrame
|
14
14
|
```
|
15
15
|
|
@@ -89,10 +89,13 @@ Or install it yourself as:
|
|
89
89
|
|
90
90
|
Returns num of column names by an Array.
|
91
91
|
|
92
|
-
- [x] `types
|
92
|
+
- [x] `types`
|
93
93
|
|
94
|
-
Returns types of columns by an Array.
|
95
|
-
|
94
|
+
Returns types of columns by an Array of Symbols.
|
95
|
+
|
96
|
+
- [x] `data_types`
|
97
|
+
|
98
|
+
Returns types of columns by an Array of `Arrow::DataType`.
|
96
99
|
|
97
100
|
- [x] `vectors`
|
98
101
|
|
@@ -128,20 +131,50 @@ Or install it yourself as:
|
|
128
131
|
|
129
132
|
Shows some information about self.
|
130
133
|
|
134
|
+
```ruby
|
135
|
+
hash = {a: [1, 2, 3], b: %w[A B C], c: [1.0, 2, 3]}
|
136
|
+
RedAmber::DataFrame.new(hash)
|
137
|
+
# =>
|
138
|
+
RedAmber::DataFrame : 3 observations(rows) of 3 variables(columns)
|
139
|
+
Variables : 2 numeric, 1 string
|
140
|
+
# key type level data_preview
|
141
|
+
1 :a uint8 3 [1, 2, 3]
|
142
|
+
2 :b string 3 [A, B, C]
|
143
|
+
3 :c double 3 [1.0, 2.0, 3.0]
|
144
|
+
```
|
145
|
+
|
131
146
|
- tally_level: max level to use tally mode
|
132
147
|
- max_element: max num of element to show values in each row
|
133
148
|
|
134
149
|
### Selecting
|
135
150
|
|
136
|
-
- [x]
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
-
|
151
|
+
- [x] Select columns by `[]` as `[key]`, `[keys]`, `[keys[index]]`
|
152
|
+
- Key in a Symbol: `df[:symbol]`
|
153
|
+
- Key in a String: `df["string"]`
|
154
|
+
- Keys in an Array: `df[:symbol1`, `"string"`, `:symbol2`
|
155
|
+
- Keys in indeces: `df[df.keys[0]`, `df[df.keys[1,2]]`, `df[df.keys[1..]]`
|
156
|
+
- Keys in a Range:
|
157
|
+
A end-less Range can be used to represent keys.
|
158
|
+
```ruby
|
159
|
+
hash = {a: [1, 2, 3], b: %w[A B C], c: [1.0, 2, 3]}
|
160
|
+
df = RedAmber::DataFrame.new(hash)
|
161
|
+
df[:b..:c, "a"]
|
162
|
+
# =>
|
163
|
+
RedAmber::DataFrame : 3 observations(rows) of 3 variables(columns)
|
164
|
+
Variables : 2 numeric, 1 string
|
165
|
+
# key type level data_preview
|
166
|
+
1 :b string 3 [A, B, C]
|
167
|
+
2 :c double 3 [1.0, 2.0, 3.0]
|
168
|
+
3 :a uint8 3 [1, 2, 3]
|
169
|
+
```
|
141
170
|
|
142
|
-
|
171
|
+
- [x] Select rows by `[]` as `[index]`, `[range]`, `[array]`
|
172
|
+
- Select a row by index: `df[0]`
|
173
|
+
- Select rows by indeces in a Range: `df[1..2]`
|
174
|
+
- Select rows by indeces in an Array: `df[1, 2]`
|
175
|
+
- Mixed case: `df[2, 0..]`
|
143
176
|
|
144
|
-
- [x]
|
177
|
+
- [x] Select rows from top or bottom
|
145
178
|
|
146
179
|
`head(n=5)`, `tail(n=5)`, `first(n=1)`, `last(n=1)`
|
147
180
|
|
@@ -213,6 +246,8 @@ Or install it yourself as:
|
|
213
246
|
|
214
247
|
- [x] `type`
|
215
248
|
|
249
|
+
- [x] `data_type`
|
250
|
+
|
216
251
|
- [ ] `each`
|
217
252
|
|
218
253
|
- [ ] `chunked?`
|
@@ -324,7 +359,7 @@ Or install it yourself as:
|
|
324
359
|
|
325
360
|
## Development
|
326
361
|
|
327
|
-
```
|
362
|
+
```shell
|
328
363
|
git clone https://github.com/heronshoes/red_amber.git
|
329
364
|
cd red_amber
|
330
365
|
bundle install
|
data/lib/red_amber/data_frame.rb
CHANGED
@@ -9,13 +9,13 @@ module RedAmber
|
|
9
9
|
include DataFrameOutput
|
10
10
|
|
11
11
|
def initialize(*args)
|
12
|
-
#
|
12
|
+
# DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
|
13
13
|
# returns empty DataFrame
|
14
14
|
@table = Arrow::Table.new({}, [])
|
15
15
|
# bug in gobject-introspection: ruby-gnome/ruby-gnome#1472
|
16
16
|
# [Arrow::Table] == [nil] shows ArgumentError
|
17
17
|
# temporary use yoda condition to workaround
|
18
|
-
return if args.empty? || args == [[]] || [nil] == args
|
18
|
+
return if args.empty? || args == [[]] || args == [{}] || [nil] == args
|
19
19
|
|
20
20
|
if args.size > 1
|
21
21
|
@table = Arrow::Table.new(*args)
|
@@ -26,11 +26,9 @@ module RedAmber
|
|
26
26
|
when Arrow::Table then arg
|
27
27
|
when DataFrame then arg.table
|
28
28
|
when Rover::DataFrame then Arrow::Table.new(arg.to_h)
|
29
|
-
when Hash
|
30
|
-
args << [] if arg.empty? # create empty df from DataFrame.new({})
|
31
|
-
Arrow::Table.new(*args)
|
29
|
+
when Hash then Arrow::Table.new(arg)
|
32
30
|
else
|
33
|
-
raise DataFrameTypeError, "invalid argument: #{
|
31
|
+
raise DataFrameTypeError, "invalid argument: #{arg}"
|
34
32
|
end
|
35
33
|
end
|
36
34
|
end
|
@@ -69,10 +67,15 @@ module RedAmber
|
|
69
67
|
alias_method :keys, :column_names
|
70
68
|
alias_method :header, :column_names
|
71
69
|
|
72
|
-
def types
|
70
|
+
def types
|
73
71
|
@table.columns.map do |column|
|
74
|
-
|
75
|
-
|
72
|
+
column.data_type.to_s.to_sym
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def data_types
|
77
|
+
@table.columns.map do |column|
|
78
|
+
column.data_type.class
|
76
79
|
end
|
77
80
|
end
|
78
81
|
|
@@ -35,7 +35,7 @@ module RedAmber
|
|
35
35
|
"#{self.class} : #{nrow} observation#{r}(row#{r}) of #{ncol} variable#{c}(column#{c})"
|
36
36
|
|
37
37
|
# 2nd row: show var counts by type
|
38
|
-
type_groups =
|
38
|
+
type_groups = data_types.map { |t| type_group(t) }
|
39
39
|
|
40
40
|
stringio.puts "Variable#{pl(ncol)} : #{var_type_count(type_groups).join(', ')}"
|
41
41
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
# mix-
|
4
|
+
# mix-in for the class DataFrame
|
5
5
|
module DataFrameSelectable
|
6
6
|
# select columns: [symbol] or [string]
|
7
7
|
# select rows: [array of index], [range]
|
@@ -12,25 +12,25 @@ module RedAmber
|
|
12
12
|
# expand Range like [1..3, 4] to [1, 2, 3, 4]
|
13
13
|
expanded =
|
14
14
|
args.each_with_object([]) do |e, a|
|
15
|
-
e.is_a?(Range) ? a.concat(e
|
15
|
+
e.is_a?(Range) ? a.concat(normalized_array(e)) : a.append(e)
|
16
16
|
end
|
17
17
|
|
18
18
|
return select_rows(expanded) if integers?(expanded)
|
19
19
|
return select_columns(expanded.map(&:to_sym)) if sym_or_str?(expanded)
|
20
20
|
|
21
|
-
raise DataFrameArgumentError, "
|
21
|
+
raise DataFrameArgumentError, "Invalid argument #{args}"
|
22
22
|
end
|
23
23
|
|
24
24
|
def head(n_rows = 5)
|
25
|
-
raise DataFrameArgumentError, "
|
25
|
+
raise DataFrameArgumentError, "Index is out of range #{n_rows}" if n_rows.negative?
|
26
26
|
|
27
27
|
self[0...[n_rows, size].min]
|
28
28
|
end
|
29
29
|
|
30
30
|
def tail(n_rows = 5)
|
31
|
-
raise DataFrameArgumentError, "
|
31
|
+
raise DataFrameArgumentError, "Index is out of range #{n_rows}" if n_rows.negative?
|
32
32
|
|
33
|
-
self[-[n_rows, size].min
|
33
|
+
self[-[n_rows, size].min..]
|
34
34
|
end
|
35
35
|
|
36
36
|
def first(n_rows = 1)
|
@@ -52,14 +52,27 @@ module RedAmber
|
|
52
52
|
end
|
53
53
|
|
54
54
|
def select_rows(indeces)
|
55
|
-
|
56
|
-
raise DataFrameArgumentError, "invalid index: #{indeces} for [0..#{size - 1}]"
|
57
|
-
end
|
55
|
+
out_of_range?(indeces) && raise(DataFrameArgumentError, "Invalid index: #{indeces} for 0..#{size - 1}")
|
58
56
|
|
59
57
|
a = indeces.map { |i| @table.slice(i).to_a }
|
60
58
|
DataFrame.new(@table.schema, a)
|
61
59
|
end
|
62
60
|
|
61
|
+
def normalized_array(range)
|
62
|
+
both_end = [range.begin, range.end]
|
63
|
+
both_end[1] -= 1 if range.exclude_end? && range.end.is_a?(Integer)
|
64
|
+
|
65
|
+
if both_end.any?(Integer) || both_end.all?(&:nil?)
|
66
|
+
if both_end.any? { |e| e&.>=(size) || e&.<(-size) }
|
67
|
+
raise DataFrameArgumentError, "Index out of range: #{range} for 0..#{size - 1}"
|
68
|
+
end
|
69
|
+
|
70
|
+
(0...size).to_a[range]
|
71
|
+
else
|
72
|
+
range.to_a
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
63
76
|
def out_of_range?(indeces)
|
64
77
|
indeces.max >= size || indeces.min < -size
|
65
78
|
end
|
data/lib/red_amber/vector.rb
CHANGED
@@ -164,7 +164,7 @@ module RedAmber
|
|
164
164
|
when Rover::Vector
|
165
165
|
func.execute([data, other.to_a])
|
166
166
|
else
|
167
|
-
raise ArgumentError, "
|
167
|
+
raise ArgumentError, "Operand is not supported: #{other.class}"
|
168
168
|
end
|
169
169
|
options[:aggregate] ? output.value : Vector.new(output.value)
|
170
170
|
end
|
data/lib/red_amber/version.rb
CHANGED
data/red_amber.gemspec
CHANGED
@@ -6,10 +6,10 @@ Gem::Specification.new do |spec|
|
|
6
6
|
spec.name = 'red_amber'
|
7
7
|
spec.version = RedAmber::VERSION
|
8
8
|
spec.authors = ['Hirokazu SUZUKI (heronshoes)']
|
9
|
-
spec.email = ['
|
9
|
+
spec.email = ['heronshoes877@gmail.com']
|
10
10
|
|
11
|
-
spec.summary = 'Simple
|
12
|
-
spec.description = '
|
11
|
+
spec.summary = 'Simple dataframe library for Ruby'
|
12
|
+
spec.description = 'RedAmber is a simple dataframe library powered by Red Arrow with simple API similar to Rover-df.'
|
13
13
|
spec.homepage = 'https://github.com/heronshoes/red_amber'
|
14
14
|
spec.license = 'MIT'
|
15
15
|
spec.required_ruby_version = '>= 2.7'
|
@@ -30,8 +30,8 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
|
-
spec.add_dependency 'red-arrow', '
|
34
|
-
spec.add_dependency 'red-parquet', '
|
33
|
+
spec.add_dependency 'red-arrow', '>= 7.0.0'
|
34
|
+
spec.add_dependency 'red-parquet', '>= 7.0.0'
|
35
35
|
spec.add_dependency 'rover-df', '~> 0.3.0'
|
36
36
|
|
37
37
|
# Development dependency has gone to the Gemfile (rubygems/bundler#7237)
|
metadata
CHANGED
@@ -1,41 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 7.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 7.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: red-parquet
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 7.0.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 7.0.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
@@ -52,9 +52,10 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.3.0
|
55
|
-
description:
|
55
|
+
description: RedAmber is a simple dataframe library powered by Red Arrow with simple
|
56
|
+
API similar to Rover-df.
|
56
57
|
email:
|
57
|
-
-
|
58
|
+
- heronshoes877@gmail.com
|
58
59
|
executables: []
|
59
60
|
extensions: []
|
60
61
|
extra_rdoc_files: []
|
@@ -102,5 +103,5 @@ requirements: []
|
|
102
103
|
rubygems_version: 3.3.7
|
103
104
|
signing_key:
|
104
105
|
specification_version: 4
|
105
|
-
summary: Simple
|
106
|
+
summary: Simple dataframe library for Ruby
|
106
107
|
test_files: []
|