patriot-gcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 40974fda52b072f2713f5af1fdeb0d40007b12bc
4
+ data.tar.gz: 444fd93b08537f1d1279100bbe73c1b2d773b6aa
5
+ SHA512:
6
+ metadata.gz: 8645578eb374795e8bd9aae1398dca46cc257e0652a883b3d31cb604f534c88c70f872471b810c2084b5538593cb517824018cb1c6acfecfba56fe6aadf026c5
7
+ data.tar.gz: f16cca37d6e4ce03a9ea8728cded822dec6d4b25d2584885183de77a50dbef6ef7c9c5555135a46fb6479951bddf8540aeb52c5a1b0e2a2f2864ae8957456353
data/init.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'patriot'
2
+ require 'patriot_gcp'
@@ -0,0 +1,2 @@
1
+ require 'patriot_gcp/ext'
2
+ require 'patriot_gcp/command'
@@ -0,0 +1 @@
1
+ require 'patriot_gcp/command/load_to_bigquery'
@@ -0,0 +1,64 @@
1
+ module PatriotGCP
2
+ module Command
3
+ class LoadToBigQueryCommand < Patriot::Command::Base
4
+ declare_command_name :load_to_bigquery
5
+ include PatriotGCP::Ext::BigQuery
6
+
7
+ command_attr :inifile, :dataset, :table, :schema, :options, :input_file, :name_suffix, :polling_interval
8
+
9
+ class BigQueryException < Exception; end
10
+ class GoogleCloudPlatformException < Exception; end
11
+
12
+ def job_id
13
+ job_id = "#{command_name}_#{@dataset}_#{@table}"
14
+ job_id = "#{job_id}_#{@name_suffix}" unless @name_suffix.nil?
15
+ return job_id
16
+ end
17
+
18
+
19
+ def execute
20
+ @logger.info "start load_to_bigquery"
21
+
22
+ unless File.exist?(@input_file)
23
+ raise Exception, "The given file doesn't exist."
24
+ end
25
+
26
+ unless File.size?(@input_file)
27
+ @logger.warn "The target file is empty"
28
+ return
29
+ end
30
+
31
+ ini = IniFile.load(@inifile)
32
+ if ini.nil?
33
+ raise Exception, "inifile not found"
34
+ end
35
+
36
+ service_account = ini["gcp"]["service_account"]
37
+ private_key = ini["gcp"]["private_key"]
38
+ key_pass = ini["gcp"]["key_pass"]
39
+ project_id = ini["bigquery"]["project_id"]
40
+
41
+ if service_account.nil? or private_key.nil?
42
+ raise GoogleCloudPlatformException, "configuration for GCP is not enough."
43
+ elsif project_id.nil?
44
+ raise BigQueryException, "configuration for BigQuery is not enough."
45
+ end
46
+
47
+ @logger.info "start uploading"
48
+ stat_info = bq_load(@input_file,
49
+ private_key,
50
+ key_pass,
51
+ service_account,
52
+ project_id,
53
+ @dataset,
54
+ @table,
55
+ @schema,
56
+ @options,
57
+ @polling_interval)
58
+
59
+ @logger.info "upload succeeded: #{stat_info}"
60
+ @logger.info "end load_to_bigquery"
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1 @@
1
+ require "patriot_gcp/ext/bigquery"
@@ -0,0 +1,162 @@
1
+ require 'google/api_client'
2
+ require 'patriot_gcp/version'
3
+
4
+
5
+ module PatriotGCP
6
+ module Ext
7
+ module BigQuery
8
+
9
+ def self.included(cls)
10
+ cls.send(:include, Patriot::Util::System)
11
+ end
12
+
13
+ class BigQueryException < Exception; end
14
+
15
+ def _get_auth_client(p12_key, key_pass, email)
16
+ key = Google::APIClient::KeyUtils.load_from_pkcs12(p12_key, key_pass)
17
+ auth_client = Signet::OAuth2::Client.new(
18
+ :token_credential_uri => 'https://accounts.google.com/o/oauth2/token',
19
+ :audience => 'https://accounts.google.com/o/oauth2/token',
20
+ :scope => 'https://www.googleapis.com/auth/bigquery',
21
+ :issuer => email,
22
+ :signing_key => key)
23
+ auth_client.fetch_access_token!
24
+ return auth_client
25
+ end
26
+
27
+
28
+ def _get_api_client()
29
+ Google::APIClient.new(
30
+ :application_name => VERSION::PROJECT_NAME,
31
+ :application_version => VERSION::VERSION)
32
+ end
33
+
34
+
35
+ def _make_body(project_id, dataset_id, table_id, schema, options)
36
+ body = {
37
+ 'configuration' => {
38
+ 'load' => {
39
+ 'schema' => schema,
40
+ 'destinationTable' => {
41
+ 'projectId' => project_id,
42
+ 'datasetId' => dataset_id,
43
+ 'tableId' => table_id
44
+ }
45
+ }
46
+ }
47
+ }
48
+ if options
49
+ options.each{|key, value|
50
+ body['configuration']['load'][key] = value
51
+ }
52
+ end
53
+
54
+ return body
55
+ end
56
+
57
+
58
+ def _poll(bq_client,
59
+ api_client,
60
+ auth_client,
61
+ project_id,
62
+ job_id,
63
+ polling_interval)
64
+
65
+ polling_interval.times{
66
+ response = JSON.parse(api_client.execute(
67
+ :api_method => bq_client.jobs.get,
68
+ :parameters => {
69
+ 'jobId' => job_id,
70
+ 'projectId' => project_id
71
+ },
72
+ :headers => {'Content-Type' => 'application/json; charset=UTF-8'},
73
+ :authorization => auth_client
74
+ ).response.body)
75
+ state = response["status"]["state"]
76
+
77
+ if state == 'DONE'
78
+ if response["status"]["errors"]
79
+ raise BigQueryException, "upload failed: #{response['status']['errors']}"
80
+ else
81
+ return response["statistics"]
82
+ end
83
+ end
84
+
85
+ sleep 60
86
+ }
87
+
88
+ raise BigQueryException,"registered job didn't finish within: #{polling_interval} mins. please check if it will finish later on. jobId: #{job_id}"
89
+ end
90
+
91
+
92
+ def _bq_load(filename,
93
+ project_id,
94
+ dataset_id,
95
+ table_id,
96
+ auth_client,
97
+ api_client,
98
+ schema,
99
+ options,
100
+ polling_interval)
101
+
102
+ bq_client = api_client.discovered_api('bigquery', 'v2')
103
+ body = _make_body(project_id, dataset_id, table_id, schema, options)
104
+ media = Google::APIClient::UploadIO.new(filename, "application/octet-stream")
105
+
106
+ result = api_client.execute(
107
+ :api_method => bq_client.jobs.insert,
108
+ :parameters => {
109
+ 'projectId' => project_id,
110
+ 'uploadType' => 'multipart'
111
+ },
112
+ :body_object => body,
113
+ :authorization => auth_client,
114
+ :media => media
115
+ )
116
+
117
+ begin
118
+ job_id = JSON.parse(result.response.body)['jobReference']['jobId']
119
+ rescue
120
+ raise BigQueryException, "failed to register job: #{result.response.body}"
121
+ end
122
+
123
+ return _poll(bq_client,
124
+ api_client,
125
+ auth_client,
126
+ project_id,
127
+ job_id,
128
+ polling_interval)
129
+ end
130
+
131
+
132
+ def bq_load(filename,
133
+ p12_key,
134
+ key_pass,
135
+ email,
136
+ project_id,
137
+ dataset_id,
138
+ table_id,
139
+ schema,
140
+ options=nil,
141
+ polling_interval=nil)
142
+
143
+ options ||= {}
144
+ polling_interval ||= 60
145
+
146
+ api_client = _get_api_client()
147
+ auth_client = _get_auth_client(p12_key, key_pass, email)
148
+
149
+ return _bq_load(filename,
150
+ project_id,
151
+ dataset_id,
152
+ table_id,
153
+ auth_client,
154
+ api_client,
155
+ schema,
156
+ options,
157
+ polling_interval)
158
+ end
159
+
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,4 @@
1
+ class VERSION
2
+ VERSION = "0.1.0"
3
+ PROJECT_NAME = "patriot-gcp"
4
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: patriot-gcp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Hitoshi Tsuda
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-11-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: patriot-workflow-scheduler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '0.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '0.7'
27
+ description: plugins for Patriot Worlflow Scheduler, which deal with GCP such as BigQuery.
28
+ email:
29
+ - tsuda_hitoshi@cyberagent.co.jp
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - lib/patriot_gcp/command/load_to_bigquery.rb
35
+ - lib/patriot_gcp/command.rb
36
+ - lib/patriot_gcp/ext/bigquery.rb
37
+ - lib/patriot_gcp/ext.rb
38
+ - lib/patriot_gcp/version.rb
39
+ - lib/patriot_gcp.rb
40
+ - init.rb
41
+ homepage: https://github.com/CyberAgent/patriot-workflow-scheduler
42
+ licenses:
43
+ - Apache License, Version 2.0
44
+ metadata: {}
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubyforge_project: patriot-gcp
61
+ rubygems_version: 2.0.14
62
+ signing_key:
63
+ specification_version: 4
64
+ summary: GCP plugin for Patriot Workflow Scheduler
65
+ test_files: []
66
+ has_rdoc: