red-arrow-activerecord 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5ac5974438fd59eeab35b60221680f207c80acfc
4
+ data.tar.gz: 3bac786c5c9058ad3fcac6bbdaff8e6b3f2ff731
5
+ SHA512:
6
+ metadata.gz: c5c20bb8254c395ea9eafafcf9996e7dac7df8110c49685211fec1edecd82299f2de96c20d67f0f62c339e5e153c255be8e1e09349413891f0fc327422c2bb5d
7
+ data.tar.gz: e39eb1ea4ec0914dc0360b81c81b0b89ec9c848b2eafd3151574bf442795c24c138387fc91248c3df68ebdff823cd4504bb4165ac98b9ee203dfd2a10701094b
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # README
2
+
3
+ ## Name
4
+ Red ActiveRecord
5
+
6
+ ## Usage
7
+
8
+ ```rb
9
+ User.all.select(:id, :name).to_arrow
10
+ ```
11
+
12
+ ## Installation
13
+ Add this line to your application's Gemfile:
14
+
15
+ ```ruby
16
+ gem 'red-arrow-activerecord', require: 'arrow-activerecord'
17
+ ```
18
+
19
+ And then execute:
20
+ ```bash
21
+ $ bundle
22
+ ```
23
+
24
+ Or install it yourself as:
25
+ ```bash
26
+ $ gem install red-arrow-activerecord
27
+ ```
28
+
29
+ ## Contributing
30
+ Contribution directions go here.
31
+
32
+ ## License
33
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ require "rubygems"
2
+ require "bundler/gem_helper"
3
+
4
+ base_dir = File.join(File.dirname(__FILE__))
5
+
6
+ helper = Bundler::GemHelper.new(base_dir)
7
+ def helper.version_tag
8
+ version
9
+ end
10
+
11
+ helper.install
12
+
13
+ desc "Run tests"
14
+ task :test do
15
+ ruby("test/run-test.rb")
16
+ end
17
+
18
+ task default: :test
data/doc/text/news.md ADDED
@@ -0,0 +1,5 @@
1
+ # News
2
+
3
+ ## 0.1.0 - 2018-02-05
4
+
5
+ Initial release!!!
@@ -0,0 +1,104 @@
1
+ require 'arrow'
2
+
3
+ module ArrowActiveRecord
4
+ module Arrowable
5
+ def to_arrow(batch_size: 10000)
6
+ target_column_names = select_values
7
+ target_column_names = column_names if select_values.empty?
8
+
9
+ fields = []
10
+ data_types = []
11
+ target_column_names.each do |name|
12
+ name = name.to_s
13
+ target_column = columns.find do |column|
14
+ column.name == name
15
+ end
16
+ arrow_data_type = extract_arrow_data_type(target_column)
17
+ fields << Arrow::Field.new(name, arrow_data_type)
18
+ data_types << arrow_data_type
19
+ end
20
+ schema = Arrow::Schema.new(fields)
21
+
22
+ arrow_array_batches = data_types.collect do
23
+ []
24
+ end
25
+ in_batches(of: batch_size).each do |relation|
26
+ column_values_set = relation.pluck(*target_column_names).transpose
27
+ data_types.each_with_index do |data_type, i|
28
+ column_values = column_values_set[i]
29
+ arrow_array_batches[i] << build_arrow_array(column_values, data_type)
30
+ end
31
+ end
32
+ columns = fields.collect.with_index do |field, i|
33
+ chunked_array = Arrow::ChunkedArray.new(arrow_array_batches[i])
34
+ Arrow::Column.new(field, chunked_array)
35
+ end
36
+
37
+ Arrow::Table.new(schema, columns)
38
+ end
39
+
40
+ private
41
+ def extract_arrow_data_type(column)
42
+ type = nil
43
+ if column
44
+ if column.bigint?
45
+ type = :bigint
46
+ else
47
+ type = column.type
48
+ end
49
+ end
50
+ case type
51
+ when :bigint
52
+ Arrow::Int64DataType.new
53
+ when :binary
54
+ Arrow::BinaryDataType.new
55
+ when :boolean
56
+ Arrow::BooleanDataType.new
57
+ when :date
58
+ Arrow::Date32DataType.new
59
+ when :datetime
60
+ Arrow::TimestampDataType.new(:nano)
61
+ # when :decimal
62
+ when :float
63
+ Arrow::FloatDataType.new
64
+ when :integer
65
+ Arrow::Int32DataType.new
66
+ # when :json
67
+ when :string, :text
68
+ Arrow::StringDataType.new
69
+ when :time, :timestamp
70
+ Arrow::TimestampDataType.new(:nano)
71
+ when :timestamp
72
+ Arrow::TimestampDataType.new(:nano)
73
+ else
74
+ message = "unsupported data type: #{type}: #{column.inspect}"
75
+ raise NotImplementedError, message
76
+ end
77
+ end
78
+
79
+ def build_arrow_array(column_values, data_type)
80
+ case data_type
81
+ when Arrow::Int64DataType
82
+ Arrow::Int64Array.new(column_values)
83
+ when Arrow::BinaryDataType
84
+ Arrow::BinaryArray.new(column_values)
85
+ when Arrow::BooleanDataType
86
+ Arrow::BooleanArray.new(column_values)
87
+ when Arrow::Date32DataType
88
+ Arrow::Date32Array.new(column_values)
89
+ when Arrow::TimestampDataType
90
+ builder = Arrow::TimestampArrayBuilder.new(data_type)
91
+ builder.build(column_values)
92
+ when Arrow::FloatDataType
93
+ Arrow::FloatArray.new(column_values)
94
+ when Arrow::Int32DataType
95
+ Arrow::Int32Array.new(column_values)
96
+ when Arrow::StringDataType
97
+ Arrow::StringArray.new(column_values)
98
+ else
99
+ message = "unsupported data type: #{data_type.inspect}"
100
+ raise NotImplementedError, message
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,3 @@
1
+ module ArrowActiverecord
2
+ VERSION = '0.1.0'
3
+ end
@@ -0,0 +1,4 @@
1
+ require "active_record"
2
+ require "arrow-activerecord/arrowable"
3
+
4
+ ActiveRecord::Relation.send :include, ArrowActiveRecord::Arrowable
metadata ADDED
@@ -0,0 +1,136 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: red-arrow-activerecord
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - hatappi
8
+ - Kouhei Sutou
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2018-02-04 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: activerecord
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: red-arrow
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: bundler
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: rake
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: sqlite3
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ - !ruby/object:Gem::Dependency
85
+ name: test-unit
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ type: :development
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ description: A library that provides conversion method between Apache Arrow and ActiveRecord
99
+ email:
100
+ - hatappi@hatappi.me
101
+ - kou@clear-code.com
102
+ executables: []
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - README.md
107
+ - Rakefile
108
+ - doc/text/news.md
109
+ - lib/arrow-activerecord.rb
110
+ - lib/arrow-activerecord/arrowable.rb
111
+ - lib/arrow-activerecord/version.rb
112
+ homepage: https://github.com/red-data-tools/red-arrow-activerecord
113
+ licenses:
114
+ - MIT
115
+ metadata: {}
116
+ post_install_message:
117
+ rdoc_options: []
118
+ require_paths:
119
+ - lib
120
+ required_ruby_version: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ required_rubygems_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ requirements: []
131
+ rubyforge_project:
132
+ rubygems_version: 2.5.2.2
133
+ signing_key:
134
+ specification_version: 4
135
+ summary: A library that provides conversion method between Apache Arrow and ActiveRecord
136
+ test_files: []