embulk-input-big-query-async 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +83 -0
- data/Rakefile +1 -0
- data/embulk-input-big-query-async.gemspec +24 -0
- data/lib/embulk/input/big-query-async.rb +146 -0
- data/lib/embulk/input/big-query-async/version.rb +7 -0
- data/pkg/embulk-input-big-query-async-0.0.1.gem +0 -0
- metadata +93 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e1e61ab475f7f94c2420064083bb8faf71e03ca29c31342cd4f288fd0af323f6
|
4
|
+
data.tar.gz: a88983894e55334d9a09a45c239a493ed362ab71d0ecf6d46c6df5887b915eef
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7fd62497ffc4bbbd314368abe870a62ea3cf2746133361d572704ef906050f30907380436f640d7941ac71a4c15af41ebc40d94adfdd2140fcdd0cb2bc9aaa58
|
7
|
+
data.tar.gz: 0e2ff2c50ceba30a3c0c352898452257a6fb3b2a11d3df8695fad6a5703adbd5fab04979ef703df0cd1206200ef13ab3d0bd5f79ee36ebb268d2ebb3549ffcce
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2017 TODO: Write your name
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
# Embulk::Input::Bigquery
|
2
|
+
|
3
|
+
This is Embulk input plugin from Bigquery.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
install it yourself as:
|
8
|
+
|
9
|
+
$ embulk gem install embulk-input-big-query-async
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
```
|
14
|
+
in:
|
15
|
+
type: bigquery-async
|
16
|
+
project: 'project-name'
|
17
|
+
keyfile: '/home/hogehoge/bigquery-keyfile.json'
|
18
|
+
sql: 'SELECT price,category_id FROM [ecsite.products] GROUP BY category_id'
|
19
|
+
columns:
|
20
|
+
- {name: price, type: long}
|
21
|
+
- {name: category_id, type: string}
|
22
|
+
max: 2000
|
23
|
+
synchronous_method: true
|
24
|
+
out:
|
25
|
+
type: stdout
|
26
|
+
```
|
27
|
+
|
28
|
+
If, table name is changeable, then
|
29
|
+
|
30
|
+
```
|
31
|
+
in:
|
32
|
+
type: bibigquery-asyncquery
|
33
|
+
project: 'project-name'
|
34
|
+
keyfile: '/home/hogehoge/bigquery-keyfile.json'
|
35
|
+
sql_erb: 'SELECT price,category_id FROM [ecsite.products_<%= params["date"].strftime("%Y%m") %>] GROUP BY category_id'
|
36
|
+
erb_params:
|
37
|
+
date: "require 'date'; (Date.today - 1)"
|
38
|
+
columns:
|
39
|
+
- {name: price, type: long}
|
40
|
+
- {name: category_id, type: long}
|
41
|
+
- {name: month, type: timestamp, format: '%Y-%m', eval: 'require "time"; Time.parse(params["date"]).to_i'}
|
42
|
+
```
|
43
|
+
|
44
|
+
## Optional Configuration
|
45
|
+
This plugin uses the gem [`google-cloud(Google Cloud Client Library for Ruby)`](https://github.com/GoogleCloudPlatform/google-cloud-ruby) and queries data using the synchronous method or the asynchronous method.
|
46
|
+
Therefore some optional configuration items comply with the Google Cloud Client Library.
|
47
|
+
|
48
|
+
### optional bigquery parameter
|
49
|
+
|
50
|
+
The detail of follows params is [here](https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/master/google-cloud-bigquery/lib/google/cloud/bigquery/project.rb).
|
51
|
+
|
52
|
+
- max :
|
53
|
+
- default value : **null** and null value is interpreted as no maximum row count in the Google Cloud Client Library. This param is supported only synchronous method.
|
54
|
+
- cache :
|
55
|
+
- default value : **null** and null value is interpreted as true in the Google Cloud Client Library.
|
56
|
+
- timeout :
|
57
|
+
- default value : **null** and null value is interpreted as 10000 milliseconds in the Google Cloud Client Library. This param is supported only synchronous method.
|
58
|
+
- dryrun :
|
59
|
+
- default value : **null** and null value is interpreted as false in the Google Cloud Client Library. This param is supported only synchronous method.
|
60
|
+
- standard_sql :
|
61
|
+
- default value : **null** and null value is interpreted as true in the Google Cloud Client Library.
|
62
|
+
- legacy_sql :
|
63
|
+
- default value : **null** and null value is interpreted as false in the Google Cloud Client Library.
|
64
|
+
- large_results :
|
65
|
+
- default value : **null** and null value is interpreted as false in the Google Cloud Client Library. This param is supported only asynchronous method.
|
66
|
+
- write :
|
67
|
+
- default value : **null** and null value is interpreted as empty in the Google Cloud Client Library. This param is supported only asynchronous method.
|
68
|
+
|
69
|
+
### the bigquery method
|
70
|
+
Big query library in Google Cloud Client Library has [two methods](https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/master/google-cloud-bigquery/lib/google/cloud/bigquery/project.rb) for query.
|
71
|
+
|
72
|
+
The default method in this plugin is synchronous_method.
|
73
|
+
The logic which how select query method is [here](https://github.com/ykoyano/embulk-input-bigquery/blob/master/lib/embulk/input/bigquery.rb#L41).
|
74
|
+
|
75
|
+
- synchronous_method:
|
76
|
+
- type : boolean
|
77
|
+
- default value : **null**
|
78
|
+
- This method uses `query` method in the Google Cloud Client Library.
|
79
|
+
- It should be noted that the number of records for `query` method is **limited**. Therefore, if you get many records, you should use `query_job` method with asynchronous_method option.
|
80
|
+
- asynchronous_method:
|
81
|
+
- type : boolean
|
82
|
+
- default value : **null**
|
83
|
+
- This method uses `query_job` method in the Google Cloud Client Library.
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'embulk/input/big-query-async/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "embulk-input-big-query-async"
|
8
|
+
spec.version = Embulk::Input::Bigqueryasync::VERSION
|
9
|
+
spec.authors = ["Angelos Alexopoulos"]
|
10
|
+
spec.email = ["alexopoulos7@gmail.com"]
|
11
|
+
spec.description = %q{embulk input plugin from bigquery.}
|
12
|
+
spec.summary = %q{Embulk input plugin from bigquery.}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_dependency "google-cloud-bigquery", '~> 0.26.0'
|
24
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
require "embulk/input/bigquery/version"
|
2
|
+
require "google/cloud/bigquery"
|
3
|
+
require 'erb'
|
4
|
+
|
5
|
+
module Embulk
|
6
|
+
module Input
|
7
|
+
class InputBigquery < InputPlugin
|
8
|
+
Plugin.register_input('big-query-async', self)
|
9
|
+
|
10
|
+
def self.transaction(config, &control)
|
11
|
+
sql = config[:sql]
|
12
|
+
params = {}
|
13
|
+
unless sql
|
14
|
+
sql_erb = config[:sql_erb]
|
15
|
+
erb = ERB.new(sql_erb)
|
16
|
+
erb_params = config[:erb_params]
|
17
|
+
erb_params.each do |k, v|
|
18
|
+
params[k] = eval(v)
|
19
|
+
end
|
20
|
+
|
21
|
+
sql = erb.result(binding)
|
22
|
+
end
|
23
|
+
|
24
|
+
task = {
|
25
|
+
project: config[:project],
|
26
|
+
keyfile: config[:keyfile],
|
27
|
+
sql: sql,
|
28
|
+
columns: config[:columns],
|
29
|
+
params: params,
|
30
|
+
synchronous_method: config[:synchronous_method],
|
31
|
+
asynchronous_method: config[:asynchronous_method],
|
32
|
+
dataset: config[:dataset],
|
33
|
+
table: config[:table],
|
34
|
+
option: {
|
35
|
+
cache: config[:cache],
|
36
|
+
standard_sql: config[:standard_sql],
|
37
|
+
legacy_sql: config[:legacy_sql],
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
if task[:synchronous_method] || !task[:asynchronous_method]
|
42
|
+
task[:option].merge!(
|
43
|
+
{
|
44
|
+
max: config[:max],
|
45
|
+
timeout: config[:timeout],
|
46
|
+
dryrun: config[:dryrun],
|
47
|
+
}
|
48
|
+
)
|
49
|
+
else
|
50
|
+
task[:option].merge!(
|
51
|
+
{
|
52
|
+
large_results: config[:legacy_sql],
|
53
|
+
write: config[:write],
|
54
|
+
}
|
55
|
+
)
|
56
|
+
end
|
57
|
+
|
58
|
+
columns = []
|
59
|
+
config[:columns].each_with_index do |c, i|
|
60
|
+
columns << Column.new(i, c['name'], c['type'].to_sym)
|
61
|
+
end
|
62
|
+
|
63
|
+
yield(task, columns, 1)
|
64
|
+
|
65
|
+
return {}
|
66
|
+
end
|
67
|
+
|
68
|
+
def run
|
69
|
+
bq = Google::Cloud::Bigquery.new(project: @task[:project], keyfile: @task[:keyfile])
|
70
|
+
params = @task[:params]
|
71
|
+
@task[:columns] = values_to_sym(@task[:columns], 'name')
|
72
|
+
option = keys_to_sym(@task[:option])
|
73
|
+
if @task[:synchronous_method] || @task[:asynchronous_method].nil?
|
74
|
+
run_synchronous_query(bq, option)
|
75
|
+
else
|
76
|
+
if @task[:dataset]
|
77
|
+
dataset = bq.dataset(@task[:dataset])
|
78
|
+
option[:table] = dataset.table(@task[:table])
|
79
|
+
if option[:table].nil?
|
80
|
+
option[:table] = dataset.create_table(@task[:table])
|
81
|
+
end
|
82
|
+
end
|
83
|
+
run_asynchronous_query(bq, option)
|
84
|
+
end
|
85
|
+
@page_builder.finish
|
86
|
+
return {}
|
87
|
+
end
|
88
|
+
|
89
|
+
def run_synchronous_query(bq, option)
|
90
|
+
rows = bq.query(@task[:sql], **option)
|
91
|
+
rows.each do |row|
|
92
|
+
record = extract_record(row)
|
93
|
+
@page_builder.add(record)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def run_asynchronous_query(bq, option)
|
98
|
+
job = bq.query_job(@task[:sql], **option)
|
99
|
+
job.wait_until_done!
|
100
|
+
return {} if job.failed?
|
101
|
+
results = job.query_results
|
102
|
+
while results
|
103
|
+
results.each do |row|
|
104
|
+
record = extract_record(row)
|
105
|
+
@page_builder.add(record)
|
106
|
+
end
|
107
|
+
results = results.next
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def extract_record(row)
|
112
|
+
columns = []
|
113
|
+
@task[:columns].each do |c|
|
114
|
+
val = row[c['name']]
|
115
|
+
if c['eval']
|
116
|
+
val = eval(c['eval'], binding)
|
117
|
+
end
|
118
|
+
columns << val
|
119
|
+
end
|
120
|
+
return columns
|
121
|
+
end
|
122
|
+
|
123
|
+
def values_to_sym(hashs, key)
|
124
|
+
hashs.map do |h|
|
125
|
+
h[key] = h[key].to_sym
|
126
|
+
h
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def keys_to_sym(hash)
|
131
|
+
ret = {}
|
132
|
+
hash.each do |key, value|
|
133
|
+
ret[key.to_sym] = value
|
134
|
+
end
|
135
|
+
ret
|
136
|
+
end
|
137
|
+
|
138
|
+
def values_to_sym(hashs, key)
|
139
|
+
hashs.map do |h|
|
140
|
+
h[key] = h[key].to_sym
|
141
|
+
h
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
Binary file
|
metadata
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: embulk-input-big-query-async
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Angelos Alexopoulos
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-04-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: google-cloud-bigquery
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.26.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.26.0
|
55
|
+
description: embulk input plugin from bigquery.
|
56
|
+
email:
|
57
|
+
- alexopoulos7@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- Gemfile
|
63
|
+
- LICENSE.txt
|
64
|
+
- README.md
|
65
|
+
- Rakefile
|
66
|
+
- embulk-input-big-query-async.gemspec
|
67
|
+
- lib/embulk/input/big-query-async.rb
|
68
|
+
- lib/embulk/input/big-query-async/version.rb
|
69
|
+
- pkg/embulk-input-big-query-async-0.0.1.gem
|
70
|
+
homepage: ''
|
71
|
+
licenses:
|
72
|
+
- MIT
|
73
|
+
metadata: {}
|
74
|
+
post_install_message:
|
75
|
+
rdoc_options: []
|
76
|
+
require_paths:
|
77
|
+
- lib
|
78
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
requirements: []
|
89
|
+
rubygems_version: 3.0.3
|
90
|
+
signing_key:
|
91
|
+
specification_version: 4
|
92
|
+
summary: Embulk input plugin from bigquery.
|
93
|
+
test_files: []
|