red-arrow-activerecord 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +33 -0
- data/Rakefile +18 -0
- data/doc/text/news.md +5 -0
- data/lib/arrow-activerecord/arrowable.rb +104 -0
- data/lib/arrow-activerecord/version.rb +3 -0
- data/lib/arrow-activerecord.rb +4 -0
- metadata +136 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5ac5974438fd59eeab35b60221680f207c80acfc
|
4
|
+
data.tar.gz: 3bac786c5c9058ad3fcac6bbdaff8e6b3f2ff731
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c5c20bb8254c395ea9eafafcf9996e7dac7df8110c49685211fec1edecd82299f2de96c20d67f0f62c339e5e153c255be8e1e09349413891f0fc327422c2bb5d
|
7
|
+
data.tar.gz: e39eb1ea4ec0914dc0360b81c81b0b89ec9c848b2eafd3151574bf442795c24c138387fc91248c3df68ebdff823cd4504bb4165ac98b9ee203dfd2a10701094b
|
data/README.md
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# README
|
2
|
+
|
3
|
+
## Name
|
4
|
+
Red ActiveRecord
|
5
|
+
|
6
|
+
## Usage
|
7
|
+
|
8
|
+
```rb
|
9
|
+
User.all.select(:id, :name).to_arrow
|
10
|
+
```
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem 'red-arrow-activerecord', require: 'arrow-activerecord'
|
17
|
+
```
|
18
|
+
|
19
|
+
And then execute:
|
20
|
+
```bash
|
21
|
+
$ bundle
|
22
|
+
```
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
```bash
|
26
|
+
$ gem install red-arrow-activerecord
|
27
|
+
```
|
28
|
+
|
29
|
+
## Contributing
|
30
|
+
Contribution directions go here.
|
31
|
+
|
32
|
+
## License
|
33
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "bundler/gem_helper"
|
3
|
+
|
4
|
+
base_dir = File.join(File.dirname(__FILE__))
|
5
|
+
|
6
|
+
helper = Bundler::GemHelper.new(base_dir)
|
7
|
+
def helper.version_tag
|
8
|
+
version
|
9
|
+
end
|
10
|
+
|
11
|
+
helper.install
|
12
|
+
|
13
|
+
desc "Run tests"
|
14
|
+
task :test do
|
15
|
+
ruby("test/run-test.rb")
|
16
|
+
end
|
17
|
+
|
18
|
+
task default: :test
|
data/doc/text/news.md
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'arrow'
|
2
|
+
|
3
|
+
module ArrowActiveRecord
|
4
|
+
module Arrowable
|
5
|
+
def to_arrow(batch_size: 10000)
|
6
|
+
target_column_names = select_values
|
7
|
+
target_column_names = column_names if select_values.empty?
|
8
|
+
|
9
|
+
fields = []
|
10
|
+
data_types = []
|
11
|
+
target_column_names.each do |name|
|
12
|
+
name = name.to_s
|
13
|
+
target_column = columns.find do |column|
|
14
|
+
column.name == name
|
15
|
+
end
|
16
|
+
arrow_data_type = extract_arrow_data_type(target_column)
|
17
|
+
fields << Arrow::Field.new(name, arrow_data_type)
|
18
|
+
data_types << arrow_data_type
|
19
|
+
end
|
20
|
+
schema = Arrow::Schema.new(fields)
|
21
|
+
|
22
|
+
arrow_array_batches = data_types.collect do
|
23
|
+
[]
|
24
|
+
end
|
25
|
+
in_batches(of: batch_size).each do |relation|
|
26
|
+
column_values_set = relation.pluck(*target_column_names).transpose
|
27
|
+
data_types.each_with_index do |data_type, i|
|
28
|
+
column_values = column_values_set[i]
|
29
|
+
arrow_array_batches[i] << build_arrow_array(column_values, data_type)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
columns = fields.collect.with_index do |field, i|
|
33
|
+
chunked_array = Arrow::ChunkedArray.new(arrow_array_batches[i])
|
34
|
+
Arrow::Column.new(field, chunked_array)
|
35
|
+
end
|
36
|
+
|
37
|
+
Arrow::Table.new(schema, columns)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
def extract_arrow_data_type(column)
|
42
|
+
type = nil
|
43
|
+
if column
|
44
|
+
if column.bigint?
|
45
|
+
type = :bigint
|
46
|
+
else
|
47
|
+
type = column.type
|
48
|
+
end
|
49
|
+
end
|
50
|
+
case type
|
51
|
+
when :bigint
|
52
|
+
Arrow::Int64DataType.new
|
53
|
+
when :binary
|
54
|
+
Arrow::BinaryDataType.new
|
55
|
+
when :boolean
|
56
|
+
Arrow::BooleanDataType.new
|
57
|
+
when :date
|
58
|
+
Arrow::Date32DataType.new
|
59
|
+
when :datetime
|
60
|
+
Arrow::TimestampDataType.new(:nano)
|
61
|
+
# when :decimal
|
62
|
+
when :float
|
63
|
+
Arrow::FloatDataType.new
|
64
|
+
when :integer
|
65
|
+
Arrow::Int32DataType.new
|
66
|
+
# when :json
|
67
|
+
when :string, :text
|
68
|
+
Arrow::StringDataType.new
|
69
|
+
when :time, :timestamp
|
70
|
+
Arrow::TimestampDataType.new(:nano)
|
71
|
+
when :timestamp
|
72
|
+
Arrow::TimestampDataType.new(:nano)
|
73
|
+
else
|
74
|
+
message = "unsupported data type: #{type}: #{column.inspect}"
|
75
|
+
raise NotImplementedError, message
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def build_arrow_array(column_values, data_type)
|
80
|
+
case data_type
|
81
|
+
when Arrow::Int64DataType
|
82
|
+
Arrow::Int64Array.new(column_values)
|
83
|
+
when Arrow::BinaryDataType
|
84
|
+
Arrow::BinaryArray.new(column_values)
|
85
|
+
when Arrow::BooleanDataType
|
86
|
+
Arrow::BooleanArray.new(column_values)
|
87
|
+
when Arrow::Date32DataType
|
88
|
+
Arrow::Date32Array.new(column_values)
|
89
|
+
when Arrow::TimestampDataType
|
90
|
+
builder = Arrow::TimestampArrayBuilder.new(data_type)
|
91
|
+
builder.build(column_values)
|
92
|
+
when Arrow::FloatDataType
|
93
|
+
Arrow::FloatArray.new(column_values)
|
94
|
+
when Arrow::Int32DataType
|
95
|
+
Arrow::Int32Array.new(column_values)
|
96
|
+
when Arrow::StringDataType
|
97
|
+
Arrow::StringArray.new(column_values)
|
98
|
+
else
|
99
|
+
message = "unsupported data type: #{data_type.inspect}"
|
100
|
+
raise NotImplementedError, message
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
metadata
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: red-arrow-activerecord
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- hatappi
|
8
|
+
- Kouhei Sutou
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2018-02-04 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: activerecord
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: red-arrow
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: bundler
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: rake
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: sqlite3
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: test-unit
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
description: A library that provides conversion method between Apache Arrow and ActiveRecord
|
99
|
+
email:
|
100
|
+
- hatappi@hatappi.me
|
101
|
+
- kou@clear-code.com
|
102
|
+
executables: []
|
103
|
+
extensions: []
|
104
|
+
extra_rdoc_files: []
|
105
|
+
files:
|
106
|
+
- README.md
|
107
|
+
- Rakefile
|
108
|
+
- doc/text/news.md
|
109
|
+
- lib/arrow-activerecord.rb
|
110
|
+
- lib/arrow-activerecord/arrowable.rb
|
111
|
+
- lib/arrow-activerecord/version.rb
|
112
|
+
homepage: https://github.com/red-data-tools/red-arrow-activerecord
|
113
|
+
licenses:
|
114
|
+
- MIT
|
115
|
+
metadata: {}
|
116
|
+
post_install_message:
|
117
|
+
rdoc_options: []
|
118
|
+
require_paths:
|
119
|
+
- lib
|
120
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - ">="
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0'
|
130
|
+
requirements: []
|
131
|
+
rubyforge_project:
|
132
|
+
rubygems_version: 2.5.2.2
|
133
|
+
signing_key:
|
134
|
+
specification_version: 4
|
135
|
+
summary: A library that provides conversion method between Apache Arrow and ActiveRecord
|
136
|
+
test_files: []
|