embulk-input-big-query-async 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e1e61ab475f7f94c2420064083bb8faf71e03ca29c31342cd4f288fd0af323f6
4
+ data.tar.gz: a88983894e55334d9a09a45c239a493ed362ab71d0ecf6d46c6df5887b915eef
5
+ SHA512:
6
+ metadata.gz: 7fd62497ffc4bbbd314368abe870a62ea3cf2746133361d572704ef906050f30907380436f640d7941ac71a4c15af41ebc40d94adfdd2140fcdd0cb2bc9aaa58
7
+ data.tar.gz: 0e2ff2c50ceba30a3c0c352898452257a6fb3b2a11d3df8695fad6a5703adbd5fab04979ef703df0cd1206200ef13ab3d0bd5f79ee36ebb268d2ebb3549ffcce
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in embulk-input-big-query-async.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2017 TODO: Write your name
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,83 @@
1
+ # Embulk::Input::Bigquery
2
+
3
+ This is Embulk input plugin from Bigquery.
4
+
5
+ ## Installation
6
+
7
+ install it yourself as:
8
+
9
+ $ embulk gem install embulk-input-big-query-async
10
+
11
+ ## Usage
12
+
13
+ ```
14
+ in:
15
+ type: bigquery-async
16
+ project: 'project-name'
17
+ keyfile: '/home/hogehoge/bigquery-keyfile.json'
18
+ sql: 'SELECT price,category_id FROM [ecsite.products] GROUP BY category_id'
19
+ columns:
20
+ - {name: price, type: long}
21
+ - {name: category_id, type: string}
22
+ max: 2000
23
+ synchronous_method: true
24
+ out:
25
+ type: stdout
26
+ ```
27
+
28
+ If, table name is changeable, then
29
+
30
+ ```
31
+ in:
32
+ type: bibigquery-asyncquery
33
+ project: 'project-name'
34
+ keyfile: '/home/hogehoge/bigquery-keyfile.json'
35
+ sql_erb: 'SELECT price,category_id FROM [ecsite.products_<%= params["date"].strftime("%Y%m") %>] GROUP BY category_id'
36
+ erb_params:
37
+ date: "require 'date'; (Date.today - 1)"
38
+ columns:
39
+ - {name: price, type: long}
40
+ - {name: category_id, type: long}
41
+ - {name: month, type: timestamp, format: '%Y-%m', eval: 'require "time"; Time.parse(params["date"]).to_i'}
42
+ ```
43
+
44
+ ## Optional Configuration
45
+ This plugin uses the gem [`google-cloud(Google Cloud Client Library for Ruby)`](https://github.com/GoogleCloudPlatform/google-cloud-ruby) and queries data using the synchronous method or the asynchronous method.
46
+ Therefore some optional configuration items comply with the Google Cloud Client Library.
47
+
48
+ ### optional bigquery parameter
49
+
50
+ The detail of follows params is [here](https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/master/google-cloud-bigquery/lib/google/cloud/bigquery/project.rb).
51
+
52
+ - max :
53
+ - default value : **null** and null value is interpreted as no maximum row count in the Google Cloud Client Library. This param is supported only synchronous method.
54
+ - cache :
55
+ - default value : **null** and null value is interpreted as true in the Google Cloud Client Library.
56
+ - timeout :
57
+ - default value : **null** and null value is interpreted as 10000 milliseconds in the Google Cloud Client Library. This param is supported only synchronous method.
58
+ - dryrun :
59
+ - default value : **null** and null value is interpreted as false in the Google Cloud Client Library. This param is supported only synchronous method.
60
+ - standard_sql :
61
+ - default value : **null** and null value is interpreted as true in the Google Cloud Client Library.
62
+ - legacy_sql :
63
+ - default value : **null** and null value is interpreted as false in the Google Cloud Client Library.
64
+ - large_results :
65
+ - default value : **null** and null value is interpreted as false in the Google Cloud Client Library. This param is supported only asynchronous method.
66
+ - write :
67
+ - default value : **null** and null value is interpreted as empty in the Google Cloud Client Library. This param is supported only asynchronous method.
68
+
69
+ ### the bigquery method
70
+ Big query library in Google Cloud Client Library has [two methods](https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/master/google-cloud-bigquery/lib/google/cloud/bigquery/project.rb) for query.
71
+
72
+ The default method in this plugin is synchronous_method.
73
+ The logic which how select query method is [here](https://github.com/ykoyano/embulk-input-bigquery/blob/master/lib/embulk/input/bigquery.rb#L41).
74
+
75
+ - synchronous_method:
76
+ - type : boolean
77
+ - default value : **null**
78
+ - This method uses `query` method in the Google Cloud Client Library.
79
+ - It should be noted that the number of records for `query` method is **limited**. Therefore, if you get many records, you should use `query_job` method with asynchronous_method option.
80
+ - asynchronous_method:
81
+ - type : boolean
82
+ - default value : **null**
83
+ - This method uses `query_job` method in the Google Cloud Client Library.
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'embulk/input/big-query-async/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "embulk-input-big-query-async"
8
+ spec.version = Embulk::Input::Bigqueryasync::VERSION
9
+ spec.authors = ["Angelos Alexopoulos"]
10
+ spec.email = ["alexopoulos7@gmail.com"]
11
+ spec.description = %q{embulk input plugin from bigquery.}
12
+ spec.summary = %q{Embulk input plugin from bigquery.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_dependency "google-cloud-bigquery", '~> 0.26.0'
24
+ end
@@ -0,0 +1,146 @@
1
+ require "embulk/input/bigquery/version"
2
+ require "google/cloud/bigquery"
3
+ require 'erb'
4
+
5
+ module Embulk
6
+ module Input
7
+ class InputBigquery < InputPlugin
8
+ Plugin.register_input('big-query-async', self)
9
+
10
+ def self.transaction(config, &control)
11
+ sql = config[:sql]
12
+ params = {}
13
+ unless sql
14
+ sql_erb = config[:sql_erb]
15
+ erb = ERB.new(sql_erb)
16
+ erb_params = config[:erb_params]
17
+ erb_params.each do |k, v|
18
+ params[k] = eval(v)
19
+ end
20
+
21
+ sql = erb.result(binding)
22
+ end
23
+
24
+ task = {
25
+ project: config[:project],
26
+ keyfile: config[:keyfile],
27
+ sql: sql,
28
+ columns: config[:columns],
29
+ params: params,
30
+ synchronous_method: config[:synchronous_method],
31
+ asynchronous_method: config[:asynchronous_method],
32
+ dataset: config[:dataset],
33
+ table: config[:table],
34
+ option: {
35
+ cache: config[:cache],
36
+ standard_sql: config[:standard_sql],
37
+ legacy_sql: config[:legacy_sql],
38
+ }
39
+ }
40
+
41
+ if task[:synchronous_method] || !task[:asynchronous_method]
42
+ task[:option].merge!(
43
+ {
44
+ max: config[:max],
45
+ timeout: config[:timeout],
46
+ dryrun: config[:dryrun],
47
+ }
48
+ )
49
+ else
50
+ task[:option].merge!(
51
+ {
52
+ large_results: config[:legacy_sql],
53
+ write: config[:write],
54
+ }
55
+ )
56
+ end
57
+
58
+ columns = []
59
+ config[:columns].each_with_index do |c, i|
60
+ columns << Column.new(i, c['name'], c['type'].to_sym)
61
+ end
62
+
63
+ yield(task, columns, 1)
64
+
65
+ return {}
66
+ end
67
+
68
+ def run
69
+ bq = Google::Cloud::Bigquery.new(project: @task[:project], keyfile: @task[:keyfile])
70
+ params = @task[:params]
71
+ @task[:columns] = values_to_sym(@task[:columns], 'name')
72
+ option = keys_to_sym(@task[:option])
73
+ if @task[:synchronous_method] || @task[:asynchronous_method].nil?
74
+ run_synchronous_query(bq, option)
75
+ else
76
+ if @task[:dataset]
77
+ dataset = bq.dataset(@task[:dataset])
78
+ option[:table] = dataset.table(@task[:table])
79
+ if option[:table].nil?
80
+ option[:table] = dataset.create_table(@task[:table])
81
+ end
82
+ end
83
+ run_asynchronous_query(bq, option)
84
+ end
85
+ @page_builder.finish
86
+ return {}
87
+ end
88
+
89
+ def run_synchronous_query(bq, option)
90
+ rows = bq.query(@task[:sql], **option)
91
+ rows.each do |row|
92
+ record = extract_record(row)
93
+ @page_builder.add(record)
94
+ end
95
+ end
96
+
97
+ def run_asynchronous_query(bq, option)
98
+ job = bq.query_job(@task[:sql], **option)
99
+ job.wait_until_done!
100
+ return {} if job.failed?
101
+ results = job.query_results
102
+ while results
103
+ results.each do |row|
104
+ record = extract_record(row)
105
+ @page_builder.add(record)
106
+ end
107
+ results = results.next
108
+ end
109
+ end
110
+
111
+ def extract_record(row)
112
+ columns = []
113
+ @task[:columns].each do |c|
114
+ val = row[c['name']]
115
+ if c['eval']
116
+ val = eval(c['eval'], binding)
117
+ end
118
+ columns << val
119
+ end
120
+ return columns
121
+ end
122
+
123
+ def values_to_sym(hashs, key)
124
+ hashs.map do |h|
125
+ h[key] = h[key].to_sym
126
+ h
127
+ end
128
+ end
129
+
130
+ def keys_to_sym(hash)
131
+ ret = {}
132
+ hash.each do |key, value|
133
+ ret[key.to_sym] = value
134
+ end
135
+ ret
136
+ end
137
+
138
+ def values_to_sym(hashs, key)
139
+ hashs.map do |h|
140
+ h[key] = h[key].to_sym
141
+ h
142
+ end
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,7 @@
1
+ module Embulk
2
+ module Input
3
+ module Bigqueryasync
4
+ VERSION = "0.0.2"
5
+ end
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-input-big-query-async
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Angelos Alexopoulos
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-04-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: google-cloud-bigquery
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.26.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.26.0
55
+ description: embulk input plugin from bigquery.
56
+ email:
57
+ - alexopoulos7@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - Gemfile
63
+ - LICENSE.txt
64
+ - README.md
65
+ - Rakefile
66
+ - embulk-input-big-query-async.gemspec
67
+ - lib/embulk/input/big-query-async.rb
68
+ - lib/embulk/input/big-query-async/version.rb
69
+ - pkg/embulk-input-big-query-async-0.0.1.gem
70
+ homepage: ''
71
+ licenses:
72
+ - MIT
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubygems_version: 3.0.3
90
+ signing_key:
91
+ specification_version: 4
92
+ summary: Embulk input plugin from bigquery.
93
+ test_files: []