red-arrow-activerecord 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +33 -0
- data/Rakefile +18 -0
- data/doc/text/news.md +5 -0
- data/lib/arrow-activerecord/arrowable.rb +104 -0
- data/lib/arrow-activerecord/version.rb +3 -0
- data/lib/arrow-activerecord.rb +4 -0
- metadata +136 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5ac5974438fd59eeab35b60221680f207c80acfc
|
4
|
+
data.tar.gz: 3bac786c5c9058ad3fcac6bbdaff8e6b3f2ff731
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c5c20bb8254c395ea9eafafcf9996e7dac7df8110c49685211fec1edecd82299f2de96c20d67f0f62c339e5e153c255be8e1e09349413891f0fc327422c2bb5d
|
7
|
+
data.tar.gz: e39eb1ea4ec0914dc0360b81c81b0b89ec9c848b2eafd3151574bf442795c24c138387fc91248c3df68ebdff823cd4504bb4165ac98b9ee203dfd2a10701094b
|
data/README.md
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# README
|
2
|
+
|
3
|
+
## Name
|
4
|
+
Red ActiveRecord
|
5
|
+
|
6
|
+
## Usage
|
7
|
+
|
8
|
+
```rb
|
9
|
+
User.all.select(:id, :name).to_arrow
|
10
|
+
```
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem 'red-arrow-activerecord', require: 'arrow-activerecord'
|
17
|
+
```
|
18
|
+
|
19
|
+
And then execute:
|
20
|
+
```bash
|
21
|
+
$ bundle
|
22
|
+
```
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
```bash
|
26
|
+
$ gem install red-arrow-activerecord
|
27
|
+
```
|
28
|
+
|
29
|
+
## Contributing
|
30
|
+
Contribution directions go here.
|
31
|
+
|
32
|
+
## License
|
33
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "bundler/gem_helper"
|
3
|
+
|
4
|
+
base_dir = File.join(File.dirname(__FILE__))
|
5
|
+
|
6
|
+
helper = Bundler::GemHelper.new(base_dir)
|
7
|
+
def helper.version_tag
|
8
|
+
version
|
9
|
+
end
|
10
|
+
|
11
|
+
helper.install
|
12
|
+
|
13
|
+
desc "Run tests"
|
14
|
+
task :test do
|
15
|
+
ruby("test/run-test.rb")
|
16
|
+
end
|
17
|
+
|
18
|
+
task default: :test
|
data/doc/text/news.md
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'arrow'
|
2
|
+
|
3
|
+
module ArrowActiveRecord
|
4
|
+
module Arrowable
|
5
|
+
def to_arrow(batch_size: 10000)
|
6
|
+
target_column_names = select_values
|
7
|
+
target_column_names = column_names if select_values.empty?
|
8
|
+
|
9
|
+
fields = []
|
10
|
+
data_types = []
|
11
|
+
target_column_names.each do |name|
|
12
|
+
name = name.to_s
|
13
|
+
target_column = columns.find do |column|
|
14
|
+
column.name == name
|
15
|
+
end
|
16
|
+
arrow_data_type = extract_arrow_data_type(target_column)
|
17
|
+
fields << Arrow::Field.new(name, arrow_data_type)
|
18
|
+
data_types << arrow_data_type
|
19
|
+
end
|
20
|
+
schema = Arrow::Schema.new(fields)
|
21
|
+
|
22
|
+
arrow_array_batches = data_types.collect do
|
23
|
+
[]
|
24
|
+
end
|
25
|
+
in_batches(of: batch_size).each do |relation|
|
26
|
+
column_values_set = relation.pluck(*target_column_names).transpose
|
27
|
+
data_types.each_with_index do |data_type, i|
|
28
|
+
column_values = column_values_set[i]
|
29
|
+
arrow_array_batches[i] << build_arrow_array(column_values, data_type)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
columns = fields.collect.with_index do |field, i|
|
33
|
+
chunked_array = Arrow::ChunkedArray.new(arrow_array_batches[i])
|
34
|
+
Arrow::Column.new(field, chunked_array)
|
35
|
+
end
|
36
|
+
|
37
|
+
Arrow::Table.new(schema, columns)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
def extract_arrow_data_type(column)
|
42
|
+
type = nil
|
43
|
+
if column
|
44
|
+
if column.bigint?
|
45
|
+
type = :bigint
|
46
|
+
else
|
47
|
+
type = column.type
|
48
|
+
end
|
49
|
+
end
|
50
|
+
case type
|
51
|
+
when :bigint
|
52
|
+
Arrow::Int64DataType.new
|
53
|
+
when :binary
|
54
|
+
Arrow::BinaryDataType.new
|
55
|
+
when :boolean
|
56
|
+
Arrow::BooleanDataType.new
|
57
|
+
when :date
|
58
|
+
Arrow::Date32DataType.new
|
59
|
+
when :datetime
|
60
|
+
Arrow::TimestampDataType.new(:nano)
|
61
|
+
# when :decimal
|
62
|
+
when :float
|
63
|
+
Arrow::FloatDataType.new
|
64
|
+
when :integer
|
65
|
+
Arrow::Int32DataType.new
|
66
|
+
# when :json
|
67
|
+
when :string, :text
|
68
|
+
Arrow::StringDataType.new
|
69
|
+
when :time, :timestamp
|
70
|
+
Arrow::TimestampDataType.new(:nano)
|
71
|
+
when :timestamp
|
72
|
+
Arrow::TimestampDataType.new(:nano)
|
73
|
+
else
|
74
|
+
message = "unsupported data type: #{type}: #{column.inspect}"
|
75
|
+
raise NotImplementedError, message
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def build_arrow_array(column_values, data_type)
|
80
|
+
case data_type
|
81
|
+
when Arrow::Int64DataType
|
82
|
+
Arrow::Int64Array.new(column_values)
|
83
|
+
when Arrow::BinaryDataType
|
84
|
+
Arrow::BinaryArray.new(column_values)
|
85
|
+
when Arrow::BooleanDataType
|
86
|
+
Arrow::BooleanArray.new(column_values)
|
87
|
+
when Arrow::Date32DataType
|
88
|
+
Arrow::Date32Array.new(column_values)
|
89
|
+
when Arrow::TimestampDataType
|
90
|
+
builder = Arrow::TimestampArrayBuilder.new(data_type)
|
91
|
+
builder.build(column_values)
|
92
|
+
when Arrow::FloatDataType
|
93
|
+
Arrow::FloatArray.new(column_values)
|
94
|
+
when Arrow::Int32DataType
|
95
|
+
Arrow::Int32Array.new(column_values)
|
96
|
+
when Arrow::StringDataType
|
97
|
+
Arrow::StringArray.new(column_values)
|
98
|
+
else
|
99
|
+
message = "unsupported data type: #{data_type.inspect}"
|
100
|
+
raise NotImplementedError, message
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
metadata
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: red-arrow-activerecord
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- hatappi
|
8
|
+
- Kouhei Sutou
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2018-02-04 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: activerecord
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: red-arrow
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: bundler
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: rake
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: sqlite3
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: test-unit
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
description: A library that provides conversion method between Apache Arrow and ActiveRecord
|
99
|
+
email:
|
100
|
+
- hatappi@hatappi.me
|
101
|
+
- kou@clear-code.com
|
102
|
+
executables: []
|
103
|
+
extensions: []
|
104
|
+
extra_rdoc_files: []
|
105
|
+
files:
|
106
|
+
- README.md
|
107
|
+
- Rakefile
|
108
|
+
- doc/text/news.md
|
109
|
+
- lib/arrow-activerecord.rb
|
110
|
+
- lib/arrow-activerecord/arrowable.rb
|
111
|
+
- lib/arrow-activerecord/version.rb
|
112
|
+
homepage: https://github.com/red-data-tools/red-arrow-activerecord
|
113
|
+
licenses:
|
114
|
+
- MIT
|
115
|
+
metadata: {}
|
116
|
+
post_install_message:
|
117
|
+
rdoc_options: []
|
118
|
+
require_paths:
|
119
|
+
- lib
|
120
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - ">="
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0'
|
130
|
+
requirements: []
|
131
|
+
rubyforge_project:
|
132
|
+
rubygems_version: 2.5.2.2
|
133
|
+
signing_key:
|
134
|
+
specification_version: 4
|
135
|
+
summary: A library that provides conversion method between Apache Arrow and ActiveRecord
|
136
|
+
test_files: []
|