rover-df 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/lib/rover/data_frame.rb +4 -2
- data/lib/rover/vector.rb +1 -0
- data/lib/rover/version.rb +1 -1
- data/lib/rover.rb +24 -10
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c75bed3989211e806e54c296290e5f7b3af236a15742daac876e211e3ca5a76f
|
4
|
+
data.tar.gz: 5865ff8f1d0036423f18cfee867da63214ee50f79d373b0f0f244853d8efbefa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 11718bc8ade75a605e92cabe05c29e55c6d4dfe427cd5ada0a8a216db678b32a88f4a43843d1e7dcda7b7a64adb63b76969f1d958e91ca57c4f71989632e14aa
|
7
|
+
data.tar.gz: 16940236090625bef69cb14d6d9f9f50720314edea1b5892f60443799e5389700ddfb0d79a29ee1e193168097add9d7195799e7f049d85f9c9dc9c443843a678
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## 0.2.7 (2022-01-16)
|
2
|
+
|
3
|
+
- Added support for booleans to Parquet methods
|
4
|
+
- Added support for creating data frames from `ActiveRecord::Result`
|
5
|
+
- Added `types` option to `read_parquet` and `parse_parquet` methods
|
6
|
+
|
1
7
|
## 0.2.6 (2021-10-27)
|
2
8
|
|
3
9
|
- Added support for `nil` headers to `read_csv` and `parse_csv`
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -61,7 +61,7 @@ Rover.read_csv("file.csv")
|
|
61
61
|
Rover.parse_csv("CSV,data,string")
|
62
62
|
```
|
63
63
|
|
64
|
-
From Parquet (requires the [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) gem)
|
64
|
+
From Parquet (requires the [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) gem)
|
65
65
|
|
66
66
|
```ruby
|
67
67
|
Rover.read_parquet("file.parquet")
|
@@ -401,7 +401,7 @@ CSV
|
|
401
401
|
df.to_csv
|
402
402
|
```
|
403
403
|
|
404
|
-
Parquet (requires the [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) gem)
|
404
|
+
Parquet (requires the [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) gem)
|
405
405
|
|
406
406
|
```ruby
|
407
407
|
df.to_parquet
|
data/lib/rover/data_frame.rb
CHANGED
@@ -40,8 +40,8 @@ module Rover
|
|
40
40
|
vectors.each do |k, v|
|
41
41
|
@vectors[k] = to_vector(v, type: types[k])
|
42
42
|
end
|
43
|
-
elsif defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || (data.is_a?(Class) && data < ActiveRecord::Base))
|
44
|
-
result = data.connection.select_all(data.all.to_sql)
|
43
|
+
elsif defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || (data.is_a?(Class) && data < ActiveRecord::Base) || data.is_a?(ActiveRecord::Result))
|
44
|
+
result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.all.to_sql)
|
45
45
|
result.columns.each_with_index do |k, i|
|
46
46
|
@vectors[k] = to_vector(result.rows.map { |r| r[i] }, type: types[k])
|
47
47
|
end
|
@@ -250,6 +250,8 @@ module Rover
|
|
250
250
|
:double
|
251
251
|
when :float32
|
252
252
|
:float
|
253
|
+
when :bool
|
254
|
+
:boolean
|
253
255
|
when :object
|
254
256
|
if @vectors[name].all? { |v| v.is_a?(String) }
|
255
257
|
:string
|
data/lib/rover/vector.rb
CHANGED
data/lib/rover/version.rb
CHANGED
data/lib/rover.rb
CHANGED
@@ -19,14 +19,14 @@ module Rover
|
|
19
19
|
csv_to_df(CSV.parse(str, **csv_options(options)), types: types, headers: options[:headers])
|
20
20
|
end
|
21
21
|
|
22
|
-
def read_parquet(path)
|
22
|
+
def read_parquet(path, types: nil)
|
23
23
|
require "parquet"
|
24
|
-
parquet_to_df(Arrow::Table.load(path))
|
24
|
+
parquet_to_df(Arrow::Table.load(path), types: types)
|
25
25
|
end
|
26
26
|
|
27
|
-
def parse_parquet(str)
|
27
|
+
def parse_parquet(str, types: nil)
|
28
28
|
require "parquet"
|
29
|
-
parquet_to_df(Arrow::Table.load(Arrow::Buffer.new(str), format: :parquet))
|
29
|
+
parquet_to_df(Arrow::Table.load(Arrow::Buffer.new(str), format: :parquet), types: types)
|
30
30
|
end
|
31
31
|
|
32
32
|
private
|
@@ -64,6 +64,7 @@ module Rover
|
|
64
64
|
end
|
65
65
|
|
66
66
|
PARQUET_TYPE_MAPPING = {
|
67
|
+
"bool" => Numo::Bit,
|
67
68
|
"float" => Numo::SFloat,
|
68
69
|
"double" => Numo::DFloat,
|
69
70
|
"int8" => Numo::Int8,
|
@@ -77,15 +78,28 @@ module Rover
|
|
77
78
|
"uint64" => Numo::UInt64
|
78
79
|
}
|
79
80
|
|
80
|
-
def parquet_to_df(table)
|
81
|
+
def parquet_to_df(table, types: nil)
|
81
82
|
data = {}
|
83
|
+
types ||= {}
|
82
84
|
table.each_column do |column|
|
83
85
|
k = column.field.name
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
86
|
+
if types[k]
|
87
|
+
data[k] = Vector.new(column.data.values, type: types[k])
|
88
|
+
else
|
89
|
+
type = column.field.data_type.to_s
|
90
|
+
numo_type = PARQUET_TYPE_MAPPING[type]
|
91
|
+
raise "Unknown type: #{type}" unless numo_type
|
92
|
+
|
93
|
+
# TODO automatic conversion?
|
94
|
+
# int => float
|
95
|
+
# bool => object
|
96
|
+
if (type.include?("int") || type == "bool") && column.n_nulls > 0
|
97
|
+
raise "Nulls not supported for #{type} column: #{k}"
|
98
|
+
end
|
99
|
+
|
100
|
+
# TODO improve performance
|
101
|
+
data[k] = numo_type.cast(column.data.values)
|
102
|
+
end
|
89
103
|
end
|
90
104
|
DataFrame.new(data)
|
91
105
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rover-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
58
|
- !ruby/object:Gem::Version
|
59
59
|
version: '0'
|
60
60
|
requirements: []
|
61
|
-
rubygems_version: 3.
|
61
|
+
rubygems_version: 3.3.3
|
62
62
|
signing_key:
|
63
63
|
specification_version: 4
|
64
64
|
summary: Simple, powerful data frames for Ruby
|