patriot-gcp 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 40974fda52b072f2713f5af1fdeb0d40007b12bc
4
+ data.tar.gz: 444fd93b08537f1d1279100bbe73c1b2d773b6aa
5
+ SHA512:
6
+ metadata.gz: 8645578eb374795e8bd9aae1398dca46cc257e0652a883b3d31cb604f534c88c70f872471b810c2084b5538593cb517824018cb1c6acfecfba56fe6aadf026c5
7
+ data.tar.gz: f16cca37d6e4ce03a9ea8728cded822dec6d4b25d2584885183de77a50dbef6ef7c9c5555135a46fb6479951bddf8540aeb52c5a1b0e2a2f2864ae8957456353
data/init.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'patriot'
2
+ require 'patriot_gcp'
@@ -0,0 +1,2 @@
1
+ require 'patriot_gcp/ext'
2
+ require 'patriot_gcp/command'
@@ -0,0 +1 @@
1
+ require 'patriot_gcp/command/load_to_bigquery'
@@ -0,0 +1,64 @@
1
+ module PatriotGCP
2
+ module Command
3
+ class LoadToBigQueryCommand < Patriot::Command::Base
4
+ declare_command_name :load_to_bigquery
5
+ include PatriotGCP::Ext::BigQuery
6
+
7
+ command_attr :inifile, :dataset, :table, :schema, :options, :input_file, :name_suffix, :polling_interval
8
+
9
+ class BigQueryException < Exception; end
10
+ class GoogleCloudPlatformException < Exception; end
11
+
12
+ def job_id
13
+ job_id = "#{command_name}_#{@dataset}_#{@table}"
14
+ job_id = "#{job_id}_#{@name_suffix}" unless @name_suffix.nil?
15
+ return job_id
16
+ end
17
+
18
+
19
+ def execute
20
+ @logger.info "start load_to_bigquery"
21
+
22
+ unless File.exist?(@input_file)
23
+ raise Exception, "The given file doesn't exist."
24
+ end
25
+
26
+ unless File.size?(@input_file)
27
+ @logger.warn "The target file is empty"
28
+ return
29
+ end
30
+
31
+ ini = IniFile.load(@inifile)
32
+ if ini.nil?
33
+ raise Exception, "inifile not found"
34
+ end
35
+
36
+ service_account = ini["gcp"]["service_account"]
37
+ private_key = ini["gcp"]["private_key"]
38
+ key_pass = ini["gcp"]["key_pass"]
39
+ project_id = ini["bigquery"]["project_id"]
40
+
41
+ if service_account.nil? or private_key.nil?
42
+ raise GoogleCloudPlatformException, "configuration for GCP is not enough."
43
+ elsif project_id.nil?
44
+ raise BigQueryException, "configuration for BigQuery is not enough."
45
+ end
46
+
47
+ @logger.info "start uploading"
48
+ stat_info = bq_load(@input_file,
49
+ private_key,
50
+ key_pass,
51
+ service_account,
52
+ project_id,
53
+ @dataset,
54
+ @table,
55
+ @schema,
56
+ @options,
57
+ @polling_interval)
58
+
59
+ @logger.info "upload succeeded: #{stat_info}"
60
+ @logger.info "end load_to_bigquery"
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1 @@
1
+ require "patriot_gcp/ext/bigquery"
@@ -0,0 +1,162 @@
1
+ require 'google/api_client'
2
+ require 'patriot_gcp/version'
3
+
4
+
5
+ module PatriotGCP
6
+ module Ext
7
+ module BigQuery
8
+
9
+ def self.included(cls)
10
+ cls.send(:include, Patriot::Util::System)
11
+ end
12
+
13
+ class BigQueryException < Exception; end
14
+
15
+ def _get_auth_client(p12_key, key_pass, email)
16
+ key = Google::APIClient::KeyUtils.load_from_pkcs12(p12_key, key_pass)
17
+ auth_client = Signet::OAuth2::Client.new(
18
+ :token_credential_uri => 'https://accounts.google.com/o/oauth2/token',
19
+ :audience => 'https://accounts.google.com/o/oauth2/token',
20
+ :scope => 'https://www.googleapis.com/auth/bigquery',
21
+ :issuer => email,
22
+ :signing_key => key)
23
+ auth_client.fetch_access_token!
24
+ return auth_client
25
+ end
26
+
27
+
28
+ def _get_api_client()
29
+ Google::APIClient.new(
30
+ :application_name => VERSION::PROJECT_NAME,
31
+ :application_version => VERSION::VERSION)
32
+ end
33
+
34
+
35
+ def _make_body(project_id, dataset_id, table_id, schema, options)
36
+ body = {
37
+ 'configuration' => {
38
+ 'load' => {
39
+ 'schema' => schema,
40
+ 'destinationTable' => {
41
+ 'projectId' => project_id,
42
+ 'datasetId' => dataset_id,
43
+ 'tableId' => table_id
44
+ }
45
+ }
46
+ }
47
+ }
48
+ if options
49
+ options.each{|key, value|
50
+ body['configuration']['load'][key] = value
51
+ }
52
+ end
53
+
54
+ return body
55
+ end
56
+
57
+
58
+ def _poll(bq_client,
59
+ api_client,
60
+ auth_client,
61
+ project_id,
62
+ job_id,
63
+ polling_interval)
64
+
65
+ polling_interval.times{
66
+ response = JSON.parse(api_client.execute(
67
+ :api_method => bq_client.jobs.get,
68
+ :parameters => {
69
+ 'jobId' => job_id,
70
+ 'projectId' => project_id
71
+ },
72
+ :headers => {'Content-Type' => 'application/json; charset=UTF-8'},
73
+ :authorization => auth_client
74
+ ).response.body)
75
+ state = response["status"]["state"]
76
+
77
+ if state == 'DONE'
78
+ if response["status"]["errors"]
79
+ raise BigQueryException, "upload failed: #{response['status']['errors']}"
80
+ else
81
+ return response["statistics"]
82
+ end
83
+ end
84
+
85
+ sleep 60
86
+ }
87
+
88
+ raise BigQueryException,"registered job didn't finish within: #{polling_interval} mins. please check if it will finish later on. jobId: #{job_id}"
89
+ end
90
+
91
+
92
+ def _bq_load(filename,
93
+ project_id,
94
+ dataset_id,
95
+ table_id,
96
+ auth_client,
97
+ api_client,
98
+ schema,
99
+ options,
100
+ polling_interval)
101
+
102
+ bq_client = api_client.discovered_api('bigquery', 'v2')
103
+ body = _make_body(project_id, dataset_id, table_id, schema, options)
104
+ media = Google::APIClient::UploadIO.new(filename, "application/octet-stream")
105
+
106
+ result = api_client.execute(
107
+ :api_method => bq_client.jobs.insert,
108
+ :parameters => {
109
+ 'projectId' => project_id,
110
+ 'uploadType' => 'multipart'
111
+ },
112
+ :body_object => body,
113
+ :authorization => auth_client,
114
+ :media => media
115
+ )
116
+
117
+ begin
118
+ job_id = JSON.parse(result.response.body)['jobReference']['jobId']
119
+ rescue
120
+ raise BigQueryException, "failed to register job: #{result.response.body}"
121
+ end
122
+
123
+ return _poll(bq_client,
124
+ api_client,
125
+ auth_client,
126
+ project_id,
127
+ job_id,
128
+ polling_interval)
129
+ end
130
+
131
+
132
+ def bq_load(filename,
133
+ p12_key,
134
+ key_pass,
135
+ email,
136
+ project_id,
137
+ dataset_id,
138
+ table_id,
139
+ schema,
140
+ options=nil,
141
+ polling_interval=nil)
142
+
143
+ options ||= {}
144
+ polling_interval ||= 60
145
+
146
+ api_client = _get_api_client()
147
+ auth_client = _get_auth_client(p12_key, key_pass, email)
148
+
149
+ return _bq_load(filename,
150
+ project_id,
151
+ dataset_id,
152
+ table_id,
153
+ auth_client,
154
+ api_client,
155
+ schema,
156
+ options,
157
+ polling_interval)
158
+ end
159
+
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,4 @@
1
+ class VERSION
2
+ VERSION = "0.1.0"
3
+ PROJECT_NAME = "patriot-gcp"
4
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: patriot-gcp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Hitoshi Tsuda
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-11-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: patriot-workflow-scheduler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '0.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '0.7'
27
+ description: plugins for Patriot Worlflow Scheduler, which deal with GCP such as BigQuery.
28
+ email:
29
+ - tsuda_hitoshi@cyberagent.co.jp
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - lib/patriot_gcp/command/load_to_bigquery.rb
35
+ - lib/patriot_gcp/command.rb
36
+ - lib/patriot_gcp/ext/bigquery.rb
37
+ - lib/patriot_gcp/ext.rb
38
+ - lib/patriot_gcp/version.rb
39
+ - lib/patriot_gcp.rb
40
+ - init.rb
41
+ homepage: https://github.com/CyberAgent/patriot-workflow-scheduler
42
+ licenses:
43
+ - Apache License, Version 2.0
44
+ metadata: {}
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubyforge_project: patriot-gcp
61
+ rubygems_version: 2.0.14
62
+ signing_key:
63
+ specification_version: 4
64
+ summary: GCP plugin for Patriot Workflow Scheduler
65
+ test_files: []
66
+ has_rdoc: