rover-df 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 01e2a90ba133ae07ad6ad482bdca985df806d6a073fa2d93029b2b7e1b55dc49
4
- data.tar.gz: 96f4171420dea68b38cffdd5a365657bc464f6d1f0c4f6bf1aefb20377c56179
3
+ metadata.gz: c75bed3989211e806e54c296290e5f7b3af236a15742daac876e211e3ca5a76f
4
+ data.tar.gz: 5865ff8f1d0036423f18cfee867da63214ee50f79d373b0f0f244853d8efbefa
5
5
  SHA512:
6
- metadata.gz: 2451d6844c7ece459e61c8e1499047f8efd6472a0d57317b7e2e1110527d843e8177c16ccbb1aeb0fd61e647fdd4291ebf73d4bfe008560eff7b963b1ac22ee6
7
- data.tar.gz: 18ad0cfb8fc22aeb63d2e1b11333b1a5989c7bcc0f2b5fbebedb11acf3d3dc26e7235109e1501d0e4b9a06b5aa7e47b71bdb23de6d6fcd87c9fb53d2bf0be330
6
+ metadata.gz: 11718bc8ade75a605e92cabe05c29e55c6d4dfe427cd5ada0a8a216db678b32a88f4a43843d1e7dcda7b7a64adb63b76969f1d958e91ca57c4f71989632e14aa
7
+ data.tar.gz: 16940236090625bef69cb14d6d9f9f50720314edea1b5892f60443799e5389700ddfb0d79a29ee1e193168097add9d7195799e7f049d85f9c9dc9c443843a678
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.2.7 (2022-01-16)
2
+
3
+ - Added support for booleans to Parquet methods
4
+ - Added support for creating data frames from `ActiveRecord::Result`
5
+ - Added `types` option to `read_parquet` and `parse_parquet` methods
6
+
1
7
  ## 0.2.6 (2021-10-27)
2
8
 
3
9
  - Added support for `nil` headers to `read_csv` and `parse_csv`
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2020-2021 Andrew Kane
1
+ Copyright (c) 2020-2022 Andrew Kane
2
2
 
3
3
  MIT License
4
4
 
data/README.md CHANGED
@@ -61,7 +61,7 @@ Rover.read_csv("file.csv")
61
61
  Rover.parse_csv("CSV,data,string")
62
62
  ```
63
63
 
64
- From Parquet (requires the [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) gem) [unreleased]
64
+ From Parquet (requires the [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) gem)
65
65
 
66
66
  ```ruby
67
67
  Rover.read_parquet("file.parquet")
@@ -401,7 +401,7 @@ CSV
401
401
  df.to_csv
402
402
  ```
403
403
 
404
- Parquet (requires the [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) gem) [unreleased]
404
+ Parquet (requires the [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet) gem)
405
405
 
406
406
  ```ruby
407
407
  df.to_parquet
@@ -40,8 +40,8 @@ module Rover
40
40
  vectors.each do |k, v|
41
41
  @vectors[k] = to_vector(v, type: types[k])
42
42
  end
43
- elsif defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || (data.is_a?(Class) && data < ActiveRecord::Base))
44
- result = data.connection.select_all(data.all.to_sql)
43
+ elsif defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || (data.is_a?(Class) && data < ActiveRecord::Base) || data.is_a?(ActiveRecord::Result))
44
+ result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.all.to_sql)
45
45
  result.columns.each_with_index do |k, i|
46
46
  @vectors[k] = to_vector(result.rows.map { |r| r[i] }, type: types[k])
47
47
  end
@@ -250,6 +250,8 @@ module Rover
250
250
  :double
251
251
  when :float32
252
252
  :float
253
+ when :bool
254
+ :boolean
253
255
  when :object
254
256
  if @vectors[name].all? { |v| v.is_a?(String) }
255
257
  :string
data/lib/rover/vector.rb CHANGED
@@ -359,6 +359,7 @@ module Rover
359
359
  data = data.to_a
360
360
 
361
361
  if type
362
+ data = data.map { |v| v || Float::NAN } if [:float, :float32].include?(type)
362
363
  data = numo_type.cast(data)
363
364
  else
364
365
  data =
data/lib/rover/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rover
2
- VERSION = "0.2.6"
2
+ VERSION = "0.2.7"
3
3
  end
data/lib/rover.rb CHANGED
@@ -19,14 +19,14 @@ module Rover
19
19
  csv_to_df(CSV.parse(str, **csv_options(options)), types: types, headers: options[:headers])
20
20
  end
21
21
 
22
- def read_parquet(path)
22
+ def read_parquet(path, types: nil)
23
23
  require "parquet"
24
- parquet_to_df(Arrow::Table.load(path))
24
+ parquet_to_df(Arrow::Table.load(path), types: types)
25
25
  end
26
26
 
27
- def parse_parquet(str)
27
+ def parse_parquet(str, types: nil)
28
28
  require "parquet"
29
- parquet_to_df(Arrow::Table.load(Arrow::Buffer.new(str), format: :parquet))
29
+ parquet_to_df(Arrow::Table.load(Arrow::Buffer.new(str), format: :parquet), types: types)
30
30
  end
31
31
 
32
32
  private
@@ -64,6 +64,7 @@ module Rover
64
64
  end
65
65
 
66
66
  PARQUET_TYPE_MAPPING = {
67
+ "bool" => Numo::Bit,
67
68
  "float" => Numo::SFloat,
68
69
  "double" => Numo::DFloat,
69
70
  "int8" => Numo::Int8,
@@ -77,15 +78,28 @@ module Rover
77
78
  "uint64" => Numo::UInt64
78
79
  }
79
80
 
80
- def parquet_to_df(table)
81
+ def parquet_to_df(table, types: nil)
81
82
  data = {}
83
+ types ||= {}
82
84
  table.each_column do |column|
83
85
  k = column.field.name
84
- type = column.field.data_type.to_s
85
- numo_type = PARQUET_TYPE_MAPPING[type]
86
- raise "Unknown type: #{type}" unless numo_type
87
- # TODO improve performance
88
- data[k] = numo_type.cast(column.data.values)
86
+ if types[k]
87
+ data[k] = Vector.new(column.data.values, type: types[k])
88
+ else
89
+ type = column.field.data_type.to_s
90
+ numo_type = PARQUET_TYPE_MAPPING[type]
91
+ raise "Unknown type: #{type}" unless numo_type
92
+
93
+ # TODO automatic conversion?
94
+ # int => float
95
+ # bool => object
96
+ if (type.include?("int") || type == "bool") && column.n_nulls > 0
97
+ raise "Nulls not supported for #{type} column: #{k}"
98
+ end
99
+
100
+ # TODO improve performance
101
+ data[k] = numo_type.cast(column.data.values)
102
+ end
89
103
  end
90
104
  DataFrame.new(data)
91
105
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rover-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-27 00:00:00.000000000 Z
11
+ date: 2022-01-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
58
58
  - !ruby/object:Gem::Version
59
59
  version: '0'
60
60
  requirements: []
61
- rubygems_version: 3.2.22
61
+ rubygems_version: 3.3.3
62
62
  signing_key:
63
63
  specification_version: 4
64
64
  summary: Simple, powerful data frames for Ruby