patriot-gcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/init.rb +2 -0
- data/lib/patriot_gcp.rb +2 -0
- data/lib/patriot_gcp/command.rb +1 -0
- data/lib/patriot_gcp/command/load_to_bigquery.rb +64 -0
- data/lib/patriot_gcp/ext.rb +1 -0
- data/lib/patriot_gcp/ext/bigquery.rb +162 -0
- data/lib/patriot_gcp/version.rb +4 -0
- metadata +66 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 40974fda52b072f2713f5af1fdeb0d40007b12bc
|
4
|
+
data.tar.gz: 444fd93b08537f1d1279100bbe73c1b2d773b6aa
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8645578eb374795e8bd9aae1398dca46cc257e0652a883b3d31cb604f534c88c70f872471b810c2084b5538593cb517824018cb1c6acfecfba56fe6aadf026c5
|
7
|
+
data.tar.gz: f16cca37d6e4ce03a9ea8728cded822dec6d4b25d2584885183de77a50dbef6ef7c9c5555135a46fb6479951bddf8540aeb52c5a1b0e2a2f2864ae8957456353
|
data/init.rb
ADDED
data/lib/patriot_gcp.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'patriot_gcp/command/load_to_bigquery'
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module PatriotGCP
|
2
|
+
module Command
|
3
|
+
class LoadToBigQueryCommand < Patriot::Command::Base
|
4
|
+
declare_command_name :load_to_bigquery
|
5
|
+
include PatriotGCP::Ext::BigQuery
|
6
|
+
|
7
|
+
command_attr :inifile, :dataset, :table, :schema, :options, :input_file, :name_suffix, :polling_interval
|
8
|
+
|
9
|
+
class BigQueryException < Exception; end
|
10
|
+
class GoogleCloudPlatformException < Exception; end
|
11
|
+
|
12
|
+
def job_id
|
13
|
+
job_id = "#{command_name}_#{@dataset}_#{@table}"
|
14
|
+
job_id = "#{job_id}_#{@name_suffix}" unless @name_suffix.nil?
|
15
|
+
return job_id
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
def execute
|
20
|
+
@logger.info "start load_to_bigquery"
|
21
|
+
|
22
|
+
unless File.exist?(@input_file)
|
23
|
+
raise Exception, "The given file doesn't exist."
|
24
|
+
end
|
25
|
+
|
26
|
+
unless File.size?(@input_file)
|
27
|
+
@logger.warn "The target file is empty"
|
28
|
+
return
|
29
|
+
end
|
30
|
+
|
31
|
+
ini = IniFile.load(@inifile)
|
32
|
+
if ini.nil?
|
33
|
+
raise Exception, "inifile not found"
|
34
|
+
end
|
35
|
+
|
36
|
+
service_account = ini["gcp"]["service_account"]
|
37
|
+
private_key = ini["gcp"]["private_key"]
|
38
|
+
key_pass = ini["gcp"]["key_pass"]
|
39
|
+
project_id = ini["bigquery"]["project_id"]
|
40
|
+
|
41
|
+
if service_account.nil? or private_key.nil?
|
42
|
+
raise GoogleCloudPlatformException, "configuration for GCP is not enough."
|
43
|
+
elsif project_id.nil?
|
44
|
+
raise BigQueryException, "configuration for BigQuery is not enough."
|
45
|
+
end
|
46
|
+
|
47
|
+
@logger.info "start uploading"
|
48
|
+
stat_info = bq_load(@input_file,
|
49
|
+
private_key,
|
50
|
+
key_pass,
|
51
|
+
service_account,
|
52
|
+
project_id,
|
53
|
+
@dataset,
|
54
|
+
@table,
|
55
|
+
@schema,
|
56
|
+
@options,
|
57
|
+
@polling_interval)
|
58
|
+
|
59
|
+
@logger.info "upload succeeded: #{stat_info}"
|
60
|
+
@logger.info "end load_to_bigquery"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require "patriot_gcp/ext/bigquery"
|
@@ -0,0 +1,162 @@
|
|
1
|
+
require 'google/api_client'
|
2
|
+
require 'patriot_gcp/version'
|
3
|
+
|
4
|
+
|
5
|
+
module PatriotGCP
|
6
|
+
module Ext
|
7
|
+
module BigQuery
|
8
|
+
|
9
|
+
def self.included(cls)
|
10
|
+
cls.send(:include, Patriot::Util::System)
|
11
|
+
end
|
12
|
+
|
13
|
+
class BigQueryException < Exception; end
|
14
|
+
|
15
|
+
def _get_auth_client(p12_key, key_pass, email)
|
16
|
+
key = Google::APIClient::KeyUtils.load_from_pkcs12(p12_key, key_pass)
|
17
|
+
auth_client = Signet::OAuth2::Client.new(
|
18
|
+
:token_credential_uri => 'https://accounts.google.com/o/oauth2/token',
|
19
|
+
:audience => 'https://accounts.google.com/o/oauth2/token',
|
20
|
+
:scope => 'https://www.googleapis.com/auth/bigquery',
|
21
|
+
:issuer => email,
|
22
|
+
:signing_key => key)
|
23
|
+
auth_client.fetch_access_token!
|
24
|
+
return auth_client
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def _get_api_client()
|
29
|
+
Google::APIClient.new(
|
30
|
+
:application_name => VERSION::PROJECT_NAME,
|
31
|
+
:application_version => VERSION::VERSION)
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def _make_body(project_id, dataset_id, table_id, schema, options)
|
36
|
+
body = {
|
37
|
+
'configuration' => {
|
38
|
+
'load' => {
|
39
|
+
'schema' => schema,
|
40
|
+
'destinationTable' => {
|
41
|
+
'projectId' => project_id,
|
42
|
+
'datasetId' => dataset_id,
|
43
|
+
'tableId' => table_id
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
}
|
48
|
+
if options
|
49
|
+
options.each{|key, value|
|
50
|
+
body['configuration']['load'][key] = value
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
return body
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def _poll(bq_client,
|
59
|
+
api_client,
|
60
|
+
auth_client,
|
61
|
+
project_id,
|
62
|
+
job_id,
|
63
|
+
polling_interval)
|
64
|
+
|
65
|
+
polling_interval.times{
|
66
|
+
response = JSON.parse(api_client.execute(
|
67
|
+
:api_method => bq_client.jobs.get,
|
68
|
+
:parameters => {
|
69
|
+
'jobId' => job_id,
|
70
|
+
'projectId' => project_id
|
71
|
+
},
|
72
|
+
:headers => {'Content-Type' => 'application/json; charset=UTF-8'},
|
73
|
+
:authorization => auth_client
|
74
|
+
).response.body)
|
75
|
+
state = response["status"]["state"]
|
76
|
+
|
77
|
+
if state == 'DONE'
|
78
|
+
if response["status"]["errors"]
|
79
|
+
raise BigQueryException, "upload failed: #{response['status']['errors']}"
|
80
|
+
else
|
81
|
+
return response["statistics"]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
sleep 60
|
86
|
+
}
|
87
|
+
|
88
|
+
raise BigQueryException,"registered job didn't finish within: #{polling_interval} mins. please check if it will finish later on. jobId: #{job_id}"
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
def _bq_load(filename,
|
93
|
+
project_id,
|
94
|
+
dataset_id,
|
95
|
+
table_id,
|
96
|
+
auth_client,
|
97
|
+
api_client,
|
98
|
+
schema,
|
99
|
+
options,
|
100
|
+
polling_interval)
|
101
|
+
|
102
|
+
bq_client = api_client.discovered_api('bigquery', 'v2')
|
103
|
+
body = _make_body(project_id, dataset_id, table_id, schema, options)
|
104
|
+
media = Google::APIClient::UploadIO.new(filename, "application/octet-stream")
|
105
|
+
|
106
|
+
result = api_client.execute(
|
107
|
+
:api_method => bq_client.jobs.insert,
|
108
|
+
:parameters => {
|
109
|
+
'projectId' => project_id,
|
110
|
+
'uploadType' => 'multipart'
|
111
|
+
},
|
112
|
+
:body_object => body,
|
113
|
+
:authorization => auth_client,
|
114
|
+
:media => media
|
115
|
+
)
|
116
|
+
|
117
|
+
begin
|
118
|
+
job_id = JSON.parse(result.response.body)['jobReference']['jobId']
|
119
|
+
rescue
|
120
|
+
raise BigQueryException, "failed to register job: #{result.response.body}"
|
121
|
+
end
|
122
|
+
|
123
|
+
return _poll(bq_client,
|
124
|
+
api_client,
|
125
|
+
auth_client,
|
126
|
+
project_id,
|
127
|
+
job_id,
|
128
|
+
polling_interval)
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
def bq_load(filename,
|
133
|
+
p12_key,
|
134
|
+
key_pass,
|
135
|
+
email,
|
136
|
+
project_id,
|
137
|
+
dataset_id,
|
138
|
+
table_id,
|
139
|
+
schema,
|
140
|
+
options=nil,
|
141
|
+
polling_interval=nil)
|
142
|
+
|
143
|
+
options ||= {}
|
144
|
+
polling_interval ||= 60
|
145
|
+
|
146
|
+
api_client = _get_api_client()
|
147
|
+
auth_client = _get_auth_client(p12_key, key_pass, email)
|
148
|
+
|
149
|
+
return _bq_load(filename,
|
150
|
+
project_id,
|
151
|
+
dataset_id,
|
152
|
+
table_id,
|
153
|
+
auth_client,
|
154
|
+
api_client,
|
155
|
+
schema,
|
156
|
+
options,
|
157
|
+
polling_interval)
|
158
|
+
end
|
159
|
+
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: patriot-gcp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Hitoshi Tsuda
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-11-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: patriot-workflow-scheduler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.7'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.7'
|
27
|
+
description: plugins for Patriot Worlflow Scheduler, which deal with GCP such as BigQuery.
|
28
|
+
email:
|
29
|
+
- tsuda_hitoshi@cyberagent.co.jp
|
30
|
+
executables: []
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- lib/patriot_gcp/command/load_to_bigquery.rb
|
35
|
+
- lib/patriot_gcp/command.rb
|
36
|
+
- lib/patriot_gcp/ext/bigquery.rb
|
37
|
+
- lib/patriot_gcp/ext.rb
|
38
|
+
- lib/patriot_gcp/version.rb
|
39
|
+
- lib/patriot_gcp.rb
|
40
|
+
- init.rb
|
41
|
+
homepage: https://github.com/CyberAgent/patriot-workflow-scheduler
|
42
|
+
licenses:
|
43
|
+
- Apache License, Version 2.0
|
44
|
+
metadata: {}
|
45
|
+
post_install_message:
|
46
|
+
rdoc_options: []
|
47
|
+
require_paths:
|
48
|
+
- lib
|
49
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
requirements: []
|
60
|
+
rubyforge_project: patriot-gcp
|
61
|
+
rubygems_version: 2.0.14
|
62
|
+
signing_key:
|
63
|
+
specification_version: 4
|
64
|
+
summary: GCP plugin for Patriot Workflow Scheduler
|
65
|
+
test_files: []
|
66
|
+
has_rdoc:
|