patriot-gcp 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/init.rb +2 -0
- data/lib/patriot_gcp.rb +2 -0
- data/lib/patriot_gcp/command.rb +1 -0
- data/lib/patriot_gcp/command/load_to_bigquery.rb +64 -0
- data/lib/patriot_gcp/ext.rb +1 -0
- data/lib/patriot_gcp/ext/bigquery.rb +162 -0
- data/lib/patriot_gcp/version.rb +4 -0
- metadata +66 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 40974fda52b072f2713f5af1fdeb0d40007b12bc
|
4
|
+
data.tar.gz: 444fd93b08537f1d1279100bbe73c1b2d773b6aa
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8645578eb374795e8bd9aae1398dca46cc257e0652a883b3d31cb604f534c88c70f872471b810c2084b5538593cb517824018cb1c6acfecfba56fe6aadf026c5
|
7
|
+
data.tar.gz: f16cca37d6e4ce03a9ea8728cded822dec6d4b25d2584885183de77a50dbef6ef7c9c5555135a46fb6479951bddf8540aeb52c5a1b0e2a2f2864ae8957456353
|
data/init.rb
ADDED
data/lib/patriot_gcp.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'patriot_gcp/command/load_to_bigquery'
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module PatriotGCP
|
2
|
+
module Command
|
3
|
+
class LoadToBigQueryCommand < Patriot::Command::Base
|
4
|
+
declare_command_name :load_to_bigquery
|
5
|
+
include PatriotGCP::Ext::BigQuery
|
6
|
+
|
7
|
+
command_attr :inifile, :dataset, :table, :schema, :options, :input_file, :name_suffix, :polling_interval
|
8
|
+
|
9
|
+
class BigQueryException < Exception; end
|
10
|
+
class GoogleCloudPlatformException < Exception; end
|
11
|
+
|
12
|
+
def job_id
|
13
|
+
job_id = "#{command_name}_#{@dataset}_#{@table}"
|
14
|
+
job_id = "#{job_id}_#{@name_suffix}" unless @name_suffix.nil?
|
15
|
+
return job_id
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
def execute
|
20
|
+
@logger.info "start load_to_bigquery"
|
21
|
+
|
22
|
+
unless File.exist?(@input_file)
|
23
|
+
raise Exception, "The given file doesn't exist."
|
24
|
+
end
|
25
|
+
|
26
|
+
unless File.size?(@input_file)
|
27
|
+
@logger.warn "The target file is empty"
|
28
|
+
return
|
29
|
+
end
|
30
|
+
|
31
|
+
ini = IniFile.load(@inifile)
|
32
|
+
if ini.nil?
|
33
|
+
raise Exception, "inifile not found"
|
34
|
+
end
|
35
|
+
|
36
|
+
service_account = ini["gcp"]["service_account"]
|
37
|
+
private_key = ini["gcp"]["private_key"]
|
38
|
+
key_pass = ini["gcp"]["key_pass"]
|
39
|
+
project_id = ini["bigquery"]["project_id"]
|
40
|
+
|
41
|
+
if service_account.nil? or private_key.nil?
|
42
|
+
raise GoogleCloudPlatformException, "configuration for GCP is not enough."
|
43
|
+
elsif project_id.nil?
|
44
|
+
raise BigQueryException, "configuration for BigQuery is not enough."
|
45
|
+
end
|
46
|
+
|
47
|
+
@logger.info "start uploading"
|
48
|
+
stat_info = bq_load(@input_file,
|
49
|
+
private_key,
|
50
|
+
key_pass,
|
51
|
+
service_account,
|
52
|
+
project_id,
|
53
|
+
@dataset,
|
54
|
+
@table,
|
55
|
+
@schema,
|
56
|
+
@options,
|
57
|
+
@polling_interval)
|
58
|
+
|
59
|
+
@logger.info "upload succeeded: #{stat_info}"
|
60
|
+
@logger.info "end load_to_bigquery"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require "patriot_gcp/ext/bigquery"
|
@@ -0,0 +1,162 @@
|
|
1
|
+
require 'google/api_client'
|
2
|
+
require 'patriot_gcp/version'
|
3
|
+
|
4
|
+
|
5
|
+
module PatriotGCP
|
6
|
+
module Ext
|
7
|
+
module BigQuery
|
8
|
+
|
9
|
+
def self.included(cls)
|
10
|
+
cls.send(:include, Patriot::Util::System)
|
11
|
+
end
|
12
|
+
|
13
|
+
class BigQueryException < Exception; end
|
14
|
+
|
15
|
+
def _get_auth_client(p12_key, key_pass, email)
|
16
|
+
key = Google::APIClient::KeyUtils.load_from_pkcs12(p12_key, key_pass)
|
17
|
+
auth_client = Signet::OAuth2::Client.new(
|
18
|
+
:token_credential_uri => 'https://accounts.google.com/o/oauth2/token',
|
19
|
+
:audience => 'https://accounts.google.com/o/oauth2/token',
|
20
|
+
:scope => 'https://www.googleapis.com/auth/bigquery',
|
21
|
+
:issuer => email,
|
22
|
+
:signing_key => key)
|
23
|
+
auth_client.fetch_access_token!
|
24
|
+
return auth_client
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def _get_api_client()
|
29
|
+
Google::APIClient.new(
|
30
|
+
:application_name => VERSION::PROJECT_NAME,
|
31
|
+
:application_version => VERSION::VERSION)
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def _make_body(project_id, dataset_id, table_id, schema, options)
|
36
|
+
body = {
|
37
|
+
'configuration' => {
|
38
|
+
'load' => {
|
39
|
+
'schema' => schema,
|
40
|
+
'destinationTable' => {
|
41
|
+
'projectId' => project_id,
|
42
|
+
'datasetId' => dataset_id,
|
43
|
+
'tableId' => table_id
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
}
|
48
|
+
if options
|
49
|
+
options.each{|key, value|
|
50
|
+
body['configuration']['load'][key] = value
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
return body
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def _poll(bq_client,
|
59
|
+
api_client,
|
60
|
+
auth_client,
|
61
|
+
project_id,
|
62
|
+
job_id,
|
63
|
+
polling_interval)
|
64
|
+
|
65
|
+
polling_interval.times{
|
66
|
+
response = JSON.parse(api_client.execute(
|
67
|
+
:api_method => bq_client.jobs.get,
|
68
|
+
:parameters => {
|
69
|
+
'jobId' => job_id,
|
70
|
+
'projectId' => project_id
|
71
|
+
},
|
72
|
+
:headers => {'Content-Type' => 'application/json; charset=UTF-8'},
|
73
|
+
:authorization => auth_client
|
74
|
+
).response.body)
|
75
|
+
state = response["status"]["state"]
|
76
|
+
|
77
|
+
if state == 'DONE'
|
78
|
+
if response["status"]["errors"]
|
79
|
+
raise BigQueryException, "upload failed: #{response['status']['errors']}"
|
80
|
+
else
|
81
|
+
return response["statistics"]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
sleep 60
|
86
|
+
}
|
87
|
+
|
88
|
+
raise BigQueryException,"registered job didn't finish within: #{polling_interval} mins. please check if it will finish later on. jobId: #{job_id}"
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
def _bq_load(filename,
|
93
|
+
project_id,
|
94
|
+
dataset_id,
|
95
|
+
table_id,
|
96
|
+
auth_client,
|
97
|
+
api_client,
|
98
|
+
schema,
|
99
|
+
options,
|
100
|
+
polling_interval)
|
101
|
+
|
102
|
+
bq_client = api_client.discovered_api('bigquery', 'v2')
|
103
|
+
body = _make_body(project_id, dataset_id, table_id, schema, options)
|
104
|
+
media = Google::APIClient::UploadIO.new(filename, "application/octet-stream")
|
105
|
+
|
106
|
+
result = api_client.execute(
|
107
|
+
:api_method => bq_client.jobs.insert,
|
108
|
+
:parameters => {
|
109
|
+
'projectId' => project_id,
|
110
|
+
'uploadType' => 'multipart'
|
111
|
+
},
|
112
|
+
:body_object => body,
|
113
|
+
:authorization => auth_client,
|
114
|
+
:media => media
|
115
|
+
)
|
116
|
+
|
117
|
+
begin
|
118
|
+
job_id = JSON.parse(result.response.body)['jobReference']['jobId']
|
119
|
+
rescue
|
120
|
+
raise BigQueryException, "failed to register job: #{result.response.body}"
|
121
|
+
end
|
122
|
+
|
123
|
+
return _poll(bq_client,
|
124
|
+
api_client,
|
125
|
+
auth_client,
|
126
|
+
project_id,
|
127
|
+
job_id,
|
128
|
+
polling_interval)
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
def bq_load(filename,
|
133
|
+
p12_key,
|
134
|
+
key_pass,
|
135
|
+
email,
|
136
|
+
project_id,
|
137
|
+
dataset_id,
|
138
|
+
table_id,
|
139
|
+
schema,
|
140
|
+
options=nil,
|
141
|
+
polling_interval=nil)
|
142
|
+
|
143
|
+
options ||= {}
|
144
|
+
polling_interval ||= 60
|
145
|
+
|
146
|
+
api_client = _get_api_client()
|
147
|
+
auth_client = _get_auth_client(p12_key, key_pass, email)
|
148
|
+
|
149
|
+
return _bq_load(filename,
|
150
|
+
project_id,
|
151
|
+
dataset_id,
|
152
|
+
table_id,
|
153
|
+
auth_client,
|
154
|
+
api_client,
|
155
|
+
schema,
|
156
|
+
options,
|
157
|
+
polling_interval)
|
158
|
+
end
|
159
|
+
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: patriot-gcp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Hitoshi Tsuda
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-11-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: patriot-workflow-scheduler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.7'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.7'
|
27
|
+
description: plugins for Patriot Worlflow Scheduler, which deal with GCP such as BigQuery.
|
28
|
+
email:
|
29
|
+
- tsuda_hitoshi@cyberagent.co.jp
|
30
|
+
executables: []
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- lib/patriot_gcp/command/load_to_bigquery.rb
|
35
|
+
- lib/patriot_gcp/command.rb
|
36
|
+
- lib/patriot_gcp/ext/bigquery.rb
|
37
|
+
- lib/patriot_gcp/ext.rb
|
38
|
+
- lib/patriot_gcp/version.rb
|
39
|
+
- lib/patriot_gcp.rb
|
40
|
+
- init.rb
|
41
|
+
homepage: https://github.com/CyberAgent/patriot-workflow-scheduler
|
42
|
+
licenses:
|
43
|
+
- Apache License, Version 2.0
|
44
|
+
metadata: {}
|
45
|
+
post_install_message:
|
46
|
+
rdoc_options: []
|
47
|
+
require_paths:
|
48
|
+
- lib
|
49
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
requirements: []
|
60
|
+
rubyforge_project: patriot-gcp
|
61
|
+
rubygems_version: 2.0.14
|
62
|
+
signing_key:
|
63
|
+
specification_version: 4
|
64
|
+
summary: GCP plugin for Patriot Workflow Scheduler
|
65
|
+
test_files: []
|
66
|
+
has_rdoc:
|