red-arrow-activerecord 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5ac5974438fd59eeab35b60221680f207c80acfc
4
+ data.tar.gz: 3bac786c5c9058ad3fcac6bbdaff8e6b3f2ff731
5
+ SHA512:
6
+ metadata.gz: c5c20bb8254c395ea9eafafcf9996e7dac7df8110c49685211fec1edecd82299f2de96c20d67f0f62c339e5e153c255be8e1e09349413891f0fc327422c2bb5d
7
+ data.tar.gz: e39eb1ea4ec0914dc0360b81c81b0b89ec9c848b2eafd3151574bf442795c24c138387fc91248c3df68ebdff823cd4504bb4165ac98b9ee203dfd2a10701094b
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # README
2
+
3
+ ## Name
4
+ Red ActiveRecord
5
+
6
+ ## Usage
7
+
8
+ ```rb
9
+ User.all.select(:id, :name).to_arrow
10
+ ```
11
+
12
+ ## Installation
13
+ Add this line to your application's Gemfile:
14
+
15
+ ```ruby
16
+ gem 'red-arrow-activerecord', require: 'arrow-activerecord'
17
+ ```
18
+
19
+ And then execute:
20
+ ```bash
21
+ $ bundle
22
+ ```
23
+
24
+ Or install it yourself as:
25
+ ```bash
26
+ $ gem install red-arrow-activerecord
27
+ ```
28
+
29
+ ## Contributing
30
+ Contribution directions go here.
31
+
32
+ ## License
33
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ require "rubygems"
2
+ require "bundler/gem_helper"
3
+
4
+ base_dir = File.join(File.dirname(__FILE__))
5
+
6
+ helper = Bundler::GemHelper.new(base_dir)
7
+ def helper.version_tag
8
+ version
9
+ end
10
+
11
+ helper.install
12
+
13
+ desc "Run tests"
14
+ task :test do
15
+ ruby("test/run-test.rb")
16
+ end
17
+
18
+ task default: :test
data/doc/text/news.md ADDED
@@ -0,0 +1,5 @@
1
+ # News
2
+
3
+ ## 0.1.0 - 2018-02-05
4
+
5
+ Initial release!!!
@@ -0,0 +1,104 @@
1
+ require 'arrow'
2
+
3
+ module ArrowActiveRecord
4
+ module Arrowable
5
+ def to_arrow(batch_size: 10000)
6
+ target_column_names = select_values
7
+ target_column_names = column_names if select_values.empty?
8
+
9
+ fields = []
10
+ data_types = []
11
+ target_column_names.each do |name|
12
+ name = name.to_s
13
+ target_column = columns.find do |column|
14
+ column.name == name
15
+ end
16
+ arrow_data_type = extract_arrow_data_type(target_column)
17
+ fields << Arrow::Field.new(name, arrow_data_type)
18
+ data_types << arrow_data_type
19
+ end
20
+ schema = Arrow::Schema.new(fields)
21
+
22
+ arrow_array_batches = data_types.collect do
23
+ []
24
+ end
25
+ in_batches(of: batch_size).each do |relation|
26
+ column_values_set = relation.pluck(*target_column_names).transpose
27
+ data_types.each_with_index do |data_type, i|
28
+ column_values = column_values_set[i]
29
+ arrow_array_batches[i] << build_arrow_array(column_values, data_type)
30
+ end
31
+ end
32
+ columns = fields.collect.with_index do |field, i|
33
+ chunked_array = Arrow::ChunkedArray.new(arrow_array_batches[i])
34
+ Arrow::Column.new(field, chunked_array)
35
+ end
36
+
37
+ Arrow::Table.new(schema, columns)
38
+ end
39
+
40
+ private
41
+ def extract_arrow_data_type(column)
42
+ type = nil
43
+ if column
44
+ if column.bigint?
45
+ type = :bigint
46
+ else
47
+ type = column.type
48
+ end
49
+ end
50
+ case type
51
+ when :bigint
52
+ Arrow::Int64DataType.new
53
+ when :binary
54
+ Arrow::BinaryDataType.new
55
+ when :boolean
56
+ Arrow::BooleanDataType.new
57
+ when :date
58
+ Arrow::Date32DataType.new
59
+ when :datetime
60
+ Arrow::TimestampDataType.new(:nano)
61
+ # when :decimal
62
+ when :float
63
+ Arrow::FloatDataType.new
64
+ when :integer
65
+ Arrow::Int32DataType.new
66
+ # when :json
67
+ when :string, :text
68
+ Arrow::StringDataType.new
69
+ when :time, :timestamp
70
+ Arrow::TimestampDataType.new(:nano)
71
+ when :timestamp
72
+ Arrow::TimestampDataType.new(:nano)
73
+ else
74
+ message = "unsupported data type: #{type}: #{column.inspect}"
75
+ raise NotImplementedError, message
76
+ end
77
+ end
78
+
79
+ def build_arrow_array(column_values, data_type)
80
+ case data_type
81
+ when Arrow::Int64DataType
82
+ Arrow::Int64Array.new(column_values)
83
+ when Arrow::BinaryDataType
84
+ Arrow::BinaryArray.new(column_values)
85
+ when Arrow::BooleanDataType
86
+ Arrow::BooleanArray.new(column_values)
87
+ when Arrow::Date32DataType
88
+ Arrow::Date32Array.new(column_values)
89
+ when Arrow::TimestampDataType
90
+ builder = Arrow::TimestampArrayBuilder.new(data_type)
91
+ builder.build(column_values)
92
+ when Arrow::FloatDataType
93
+ Arrow::FloatArray.new(column_values)
94
+ when Arrow::Int32DataType
95
+ Arrow::Int32Array.new(column_values)
96
+ when Arrow::StringDataType
97
+ Arrow::StringArray.new(column_values)
98
+ else
99
+ message = "unsupported data type: #{data_type.inspect}"
100
+ raise NotImplementedError, message
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,3 @@
1
+ module ArrowActiverecord
2
+ VERSION = '0.1.0'
3
+ end
@@ -0,0 +1,4 @@
1
+ require "active_record"
2
+ require "arrow-activerecord/arrowable"
3
+
4
+ ActiveRecord::Relation.send :include, ArrowActiveRecord::Arrowable
metadata ADDED
@@ -0,0 +1,136 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: red-arrow-activerecord
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - hatappi
8
+ - Kouhei Sutou
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2018-02-04 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: activerecord
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: red-arrow
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: bundler
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: rake
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: sqlite3
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ - !ruby/object:Gem::Dependency
85
+ name: test-unit
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ type: :development
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ description: A library that provides conversion method between Apache Arrow and ActiveRecord
99
+ email:
100
+ - hatappi@hatappi.me
101
+ - kou@clear-code.com
102
+ executables: []
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - README.md
107
+ - Rakefile
108
+ - doc/text/news.md
109
+ - lib/arrow-activerecord.rb
110
+ - lib/arrow-activerecord/arrowable.rb
111
+ - lib/arrow-activerecord/version.rb
112
+ homepage: https://github.com/red-data-tools/red-arrow-activerecord
113
+ licenses:
114
+ - MIT
115
+ metadata: {}
116
+ post_install_message:
117
+ rdoc_options: []
118
+ require_paths:
119
+ - lib
120
+ required_ruby_version: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ required_rubygems_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ requirements: []
131
+ rubyforge_project:
132
+ rubygems_version: 2.5.2.2
133
+ signing_key:
134
+ specification_version: 4
135
+ summary: A library that provides conversion method between Apache Arrow and ActiveRecord
136
+ test_files: []