rover-df 0.2.6 → 0.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/lib/rover/data_frame.rb +4 -2
- data/lib/rover/vector.rb +1 -0
- data/lib/rover/version.rb +1 -1
- data/lib/rover.rb +24 -10
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c75bed3989211e806e54c296290e5f7b3af236a15742daac876e211e3ca5a76f
|
4
|
+
data.tar.gz: 5865ff8f1d0036423f18cfee867da63214ee50f79d373b0f0f244853d8efbefa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 11718bc8ade75a605e92cabe05c29e55c6d4dfe427cd5ada0a8a216db678b32a88f4a43843d1e7dcda7b7a64adb63b76969f1d958e91ca57c4f71989632e14aa
|
7
|
+
data.tar.gz: 16940236090625bef69cb14d6d9f9f50720314edea1b5892f60443799e5389700ddfb0d79a29ee1e193168097add9d7195799e7f049d85f9c9dc9c443843a678
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## 0.2.7 (2022-01-16)
|
2
|
+
|
3
|
+
- Added support for booleans to Parquet methods
|
4
|
+
- Added support for creating data frames from `ActiveRecord::Result`
|
5
|
+
- Added `types` option to `read_parquet` and `parse_parquet` methods
|
6
|
+
|
1
7
|
## 0.2.6 (2021-10-27)
|
2
8
|
|
3
9
|
- Added support for `nil` headers to `read_csv` and `parse_csv`
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -61,7 +61,7 @@ Rover.read_csv("file.csv")
|
|
61
61
|
Rover.parse_csv("CSV,data,string")
|
62
62
|
```
|
63
63
|
|
64
|
-
From Parquet (requires the [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) gem)
|
64
|
+
From Parquet (requires the [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) gem)
|
65
65
|
|
66
66
|
```ruby
|
67
67
|
Rover.read_parquet("file.parquet")
|
@@ -401,7 +401,7 @@ CSV
|
|
401
401
|
df.to_csv
|
402
402
|
```
|
403
403
|
|
404
|
-
Parquet (requires the [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) gem)
|
404
|
+
Parquet (requires the [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) gem)
|
405
405
|
|
406
406
|
```ruby
|
407
407
|
df.to_parquet
|
data/lib/rover/data_frame.rb
CHANGED
@@ -40,8 +40,8 @@ module Rover
|
|
40
40
|
vectors.each do |k, v|
|
41
41
|
@vectors[k] = to_vector(v, type: types[k])
|
42
42
|
end
|
43
|
-
elsif defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || (data.is_a?(Class) && data < ActiveRecord::Base))
|
44
|
-
result = data.connection.select_all(data.all.to_sql)
|
43
|
+
elsif defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || (data.is_a?(Class) && data < ActiveRecord::Base) || data.is_a?(ActiveRecord::Result))
|
44
|
+
result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.all.to_sql)
|
45
45
|
result.columns.each_with_index do |k, i|
|
46
46
|
@vectors[k] = to_vector(result.rows.map { |r| r[i] }, type: types[k])
|
47
47
|
end
|
@@ -250,6 +250,8 @@ module Rover
|
|
250
250
|
:double
|
251
251
|
when :float32
|
252
252
|
:float
|
253
|
+
when :bool
|
254
|
+
:boolean
|
253
255
|
when :object
|
254
256
|
if @vectors[name].all? { |v| v.is_a?(String) }
|
255
257
|
:string
|
data/lib/rover/vector.rb
CHANGED
data/lib/rover/version.rb
CHANGED
data/lib/rover.rb
CHANGED
@@ -19,14 +19,14 @@ module Rover
|
|
19
19
|
csv_to_df(CSV.parse(str, **csv_options(options)), types: types, headers: options[:headers])
|
20
20
|
end
|
21
21
|
|
22
|
-
def read_parquet(path)
|
22
|
+
def read_parquet(path, types: nil)
|
23
23
|
require "parquet"
|
24
|
-
parquet_to_df(Arrow::Table.load(path))
|
24
|
+
parquet_to_df(Arrow::Table.load(path), types: types)
|
25
25
|
end
|
26
26
|
|
27
|
-
def parse_parquet(str)
|
27
|
+
def parse_parquet(str, types: nil)
|
28
28
|
require "parquet"
|
29
|
-
parquet_to_df(Arrow::Table.load(Arrow::Buffer.new(str), format: :parquet))
|
29
|
+
parquet_to_df(Arrow::Table.load(Arrow::Buffer.new(str), format: :parquet), types: types)
|
30
30
|
end
|
31
31
|
|
32
32
|
private
|
@@ -64,6 +64,7 @@ module Rover
|
|
64
64
|
end
|
65
65
|
|
66
66
|
PARQUET_TYPE_MAPPING = {
|
67
|
+
"bool" => Numo::Bit,
|
67
68
|
"float" => Numo::SFloat,
|
68
69
|
"double" => Numo::DFloat,
|
69
70
|
"int8" => Numo::Int8,
|
@@ -77,15 +78,28 @@ module Rover
|
|
77
78
|
"uint64" => Numo::UInt64
|
78
79
|
}
|
79
80
|
|
80
|
-
def parquet_to_df(table)
|
81
|
+
def parquet_to_df(table, types: nil)
|
81
82
|
data = {}
|
83
|
+
types ||= {}
|
82
84
|
table.each_column do |column|
|
83
85
|
k = column.field.name
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
86
|
+
if types[k]
|
87
|
+
data[k] = Vector.new(column.data.values, type: types[k])
|
88
|
+
else
|
89
|
+
type = column.field.data_type.to_s
|
90
|
+
numo_type = PARQUET_TYPE_MAPPING[type]
|
91
|
+
raise "Unknown type: #{type}" unless numo_type
|
92
|
+
|
93
|
+
# TODO automatic conversion?
|
94
|
+
# int => float
|
95
|
+
# bool => object
|
96
|
+
if (type.include?("int") || type == "bool") && column.n_nulls > 0
|
97
|
+
raise "Nulls not supported for #{type} column: #{k}"
|
98
|
+
end
|
99
|
+
|
100
|
+
# TODO improve performance
|
101
|
+
data[k] = numo_type.cast(column.data.values)
|
102
|
+
end
|
89
103
|
end
|
90
104
|
DataFrame.new(data)
|
91
105
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rover-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
58
|
- !ruby/object:Gem::Version
|
59
59
|
version: '0'
|
60
60
|
requirements: []
|
61
|
-
rubygems_version: 3.
|
61
|
+
rubygems_version: 3.3.3
|
62
62
|
signing_key:
|
63
63
|
specification_version: 4
|
64
64
|
summary: Simple, powerful data frames for Ruby
|