patriot-gcp 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -13
- data/lib/patriot_gcp/command.rb +1 -0
- data/lib/patriot_gcp/command/bq.rb +47 -0
- data/lib/patriot_gcp/command/load_to_bigquery.rb +2 -10
- data/lib/patriot_gcp/ext/bigquery.rb +73 -130
- data/lib/patriot_gcp/version.rb +1 -1
- metadata +29 -15
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
Njk2ZDkwZWJmY2IyNjM0ZGNmNzk5YTQ3MWQ3ZWIyOGM4NTMwNWI3OQ==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 46d7bbfe5645c62e3ee60694ff6571d069159545
|
4
|
+
data.tar.gz: 9da277d13eb9daa09173c082adf58fc0722500bc
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
MzU5ODNkZjhhNWE5OTMxZmNhZmZlZWQwMjljZjM4NjhkZWNmYTYyZWFhNmMw
|
11
|
-
MjdmNjliZGY1MmM4ZWIyN2JhZGM4MTdjMmZjZWI0NGQ2YWFlOWI=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
NzRlYzRmYmFjMDU3ODIxNmQzOGRjOGRkMDc0OTVkZWE3ZDg2OWU3YjM5M2M2
|
14
|
-
Zjg2OTdlZTdlMTVhOTlmOTdkMzhiNDdkZDMyMmMyNzRkZDMwYjRmMjE0OTA3
|
15
|
-
YjYwMDA2ZmE5ZjY5ZjcxOTdhOTYyMDYwNDk5NWRhYTg3YzQyY2Y=
|
6
|
+
metadata.gz: 84952419fe93476b17e6b46e1957cb1d04461cf5749e4b4aa25d8612f2f2799c4279c63bb21da9845ae37ee537d5e0ce6a380e58524ea1f530de0b535c221a07
|
7
|
+
data.tar.gz: dd7f6a1b3e08c27b1108b25afe1530a5b3c244531e5e11c1f67d0f490ec10263e4f86dabfa45423174a6bfef0d98d7a4d9d27d2b72a579d99dd969044e3e9690
|
data/lib/patriot_gcp/command.rb
CHANGED
@@ -0,0 +1,47 @@
|
|
1
|
+
module PatriotGCP
|
2
|
+
module Command
|
3
|
+
class BQCommand < Patriot::Command::Base
|
4
|
+
declare_command_name :bq
|
5
|
+
include PatriotGCP::Ext::BigQuery
|
6
|
+
|
7
|
+
command_attr :inifile, :project_id, :statement, :name_suffix
|
8
|
+
validate_existence :inifile, :project_id, :statement, :name_suffix
|
9
|
+
|
10
|
+
class BigQueryException < Exception; end
|
11
|
+
class GoogleCloudPlatformException < Exception; end
|
12
|
+
|
13
|
+
def job_id
|
14
|
+
"#{command_name}_#{@project_id}_#{@name_suffix}"
|
15
|
+
end
|
16
|
+
|
17
|
+
# @see Patriot::Command::Base#configure
|
18
|
+
def configure
|
19
|
+
if @name_suffix == _date_
|
20
|
+
raise ArgumentError, 'To set _date_ only is not allowed here to avoid job name duplication.'
|
21
|
+
end
|
22
|
+
@statement = eval_attr(@statement)
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
def execute
|
27
|
+
@logger.info "start bq"
|
28
|
+
|
29
|
+
ini = IniFile.load(@inifile)
|
30
|
+
if ini.nil?
|
31
|
+
raise Exception, "inifile not found"
|
32
|
+
end
|
33
|
+
|
34
|
+
bigquery_keyfile = ini["gcp"]["bigquery_keyfile"]
|
35
|
+
|
36
|
+
stat_info = bq(
|
37
|
+
bigquery_keyfile,
|
38
|
+
@project_id,
|
39
|
+
@statement
|
40
|
+
)
|
41
|
+
|
42
|
+
@logger.info "statement execution succeeded: #{stat_info}"
|
43
|
+
@logger.info "end bq"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -28,9 +28,7 @@ module PatriotGCP
|
|
28
28
|
raise Exception, "inifile not found"
|
29
29
|
end
|
30
30
|
|
31
|
-
|
32
|
-
private_key = ini["gcp"]["private_key"]
|
33
|
-
key_pass = ini["gcp"]["key_pass"]
|
31
|
+
bigquery_keyfile = ini["gcp"]["bigquery_keyfile"]
|
34
32
|
|
35
33
|
unless File.exist?(@input_file)
|
36
34
|
raise Exception, "The given file doesn't exist."
|
@@ -41,15 +39,9 @@ module PatriotGCP
|
|
41
39
|
return
|
42
40
|
end
|
43
41
|
|
44
|
-
if service_account.nil? or private_key.nil?
|
45
|
-
raise GoogleCloudPlatformException, "configuration for GCP is not enough."
|
46
|
-
end
|
47
|
-
|
48
42
|
@logger.info "start uploading"
|
49
43
|
stat_info = bq_load(@input_file,
|
50
|
-
|
51
|
-
key_pass,
|
52
|
-
service_account,
|
44
|
+
bigquery_keyfile,
|
53
45
|
@project_id,
|
54
46
|
@dataset,
|
55
47
|
@table,
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require 'google/
|
1
|
+
require 'google/cloud/bigquery'
|
2
2
|
require 'patriot_gcp/version'
|
3
3
|
|
4
4
|
|
@@ -12,151 +12,94 @@ module PatriotGCP
|
|
12
12
|
|
13
13
|
class BigQueryException < Exception; end
|
14
14
|
|
15
|
-
def
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
auth_client.fetch_access_token!
|
24
|
-
return auth_client
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
def _get_api_client()
|
29
|
-
Google::APIClient.new(
|
30
|
-
:application_name => VERSION::PROJECT_NAME,
|
31
|
-
:application_version => VERSION::VERSION)
|
32
|
-
end
|
33
|
-
|
15
|
+
def bq_load(filename,
|
16
|
+
bigquery_keyfile,
|
17
|
+
project_id,
|
18
|
+
dataset_id,
|
19
|
+
table_id,
|
20
|
+
schema,
|
21
|
+
options=nil,
|
22
|
+
polling_interval=nil)
|
34
23
|
|
35
|
-
|
36
|
-
|
37
|
-
'configuration' => {
|
38
|
-
'load' => {
|
39
|
-
'schema' => schema,
|
40
|
-
'destinationTable' => {
|
41
|
-
'projectId' => project_id,
|
42
|
-
'datasetId' => dataset_id,
|
43
|
-
'tableId' => table_id
|
44
|
-
}
|
45
|
-
}
|
46
|
-
}
|
47
|
-
}
|
48
|
-
if options
|
49
|
-
options.each{|key, value|
|
50
|
-
body['configuration']['load'][key] = value
|
51
|
-
}
|
52
|
-
end
|
24
|
+
options ||= {}
|
25
|
+
polling_interval ||= 60
|
53
26
|
|
54
|
-
|
55
|
-
end
|
27
|
+
ENV['BIGQUERY_KEYFILE'] = bigquery_keyfile
|
56
28
|
|
29
|
+
bigquery = Google::Cloud::Bigquery.new(
|
30
|
+
project: project_id,
|
31
|
+
retries: 3,
|
32
|
+
timeout: polling_interval * 60
|
33
|
+
)
|
57
34
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
35
|
+
# exclude partition string
|
36
|
+
# table_name$YYYYMMDD -> table_name
|
37
|
+
original_table_id = table_id.split('$')[0]
|
38
|
+
|
39
|
+
dataset = bigquery.dataset dataset_id
|
40
|
+
table = dataset.table original_table_id
|
41
|
+
|
42
|
+
if table.nil?
|
43
|
+
# TODO:
|
44
|
+
# schemaとoptionがメソッドやその引数で指定されるようになっており、
|
45
|
+
# 大幅な仕様変更となっているが、旧ライブラリ同様の設定を読み込めるよう
|
46
|
+
# 議論されている。
|
47
|
+
# https://github.com/GoogleCloudPlatform/google-cloud-ruby/issues/1919
|
48
|
+
#
|
49
|
+
# こちらが対応された場合は下記ソースを変更する。
|
50
|
+
dataset.create_table original_table_id do |updater|
|
51
|
+
updater.schema do |scm|
|
52
|
+
schema['fields'].each do |row|
|
53
|
+
name = row['name']
|
54
|
+
type = row['type'].downcase.to_sym
|
55
|
+
mode = row['mode'].downcase.to_sym if row['mode']
|
56
|
+
|
57
|
+
scm.method(type).call(name, mode: mode)
|
58
|
+
end
|
82
59
|
end
|
60
|
+
# 取り込み時間分割テーブルに設定
|
61
|
+
updater.time_partitioning_type = "DAY"
|
83
62
|
end
|
63
|
+
end
|
84
64
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
dataset_id,
|
95
|
-
table_id,
|
96
|
-
auth_client,
|
97
|
-
api_client,
|
98
|
-
schema,
|
99
|
-
options,
|
100
|
-
polling_interval)
|
101
|
-
|
102
|
-
bq_client = api_client.discovered_api('bigquery', 'v2')
|
103
|
-
body = _make_body(project_id, dataset_id, table_id, schema, options)
|
104
|
-
media = Google::APIClient::UploadIO.new(filename, "application/octet-stream")
|
105
|
-
|
106
|
-
result = api_client.execute(
|
107
|
-
:api_method => bq_client.jobs.insert,
|
108
|
-
:parameters => {
|
109
|
-
'projectId' => project_id,
|
110
|
-
'uploadType' => 'multipart'
|
111
|
-
},
|
112
|
-
:body_object => body,
|
113
|
-
:authorization => auth_client,
|
114
|
-
:media => media
|
65
|
+
job = dataset.load_job(
|
66
|
+
table_id,
|
67
|
+
filename,
|
68
|
+
format: options['format'] || nil,
|
69
|
+
quote: options['quote'] || nil,
|
70
|
+
skip_leading: options['skipLeadingRows'] || nil,
|
71
|
+
write: options['writeDisposition'] || nil,
|
72
|
+
delimiter: options['fieldDelimiter'] || nil,
|
73
|
+
null_marker: options['nullMarker'] || nil,
|
115
74
|
)
|
116
75
|
|
117
|
-
|
118
|
-
job_id = JSON.parse(result.response.body)['jobReference']['jobId']
|
119
|
-
rescue
|
120
|
-
raise BigQueryException, "failed to register job: #{result.response.body}"
|
121
|
-
end
|
76
|
+
job.wait_until_done!
|
122
77
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
polling_interval)
|
78
|
+
if job.failed?
|
79
|
+
raise BigQueryException, "upload failed: #{job.errors}"
|
80
|
+
else
|
81
|
+
return job.statistics
|
82
|
+
end
|
129
83
|
end
|
130
84
|
|
85
|
+
def bq(bigquery_keyfile, project_id, statement)
|
86
|
+
ENV['BIGQUERY_KEYFILE'] = bigquery_keyfile
|
131
87
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
project_id,
|
137
|
-
dataset_id,
|
138
|
-
table_id,
|
139
|
-
schema,
|
140
|
-
options=nil,
|
141
|
-
polling_interval=nil)
|
88
|
+
bigquery = Google::Cloud::Bigquery.new(
|
89
|
+
project: project_id,
|
90
|
+
retries: 3
|
91
|
+
)
|
142
92
|
|
143
|
-
|
144
|
-
polling_interval ||= 60
|
93
|
+
job = bigquery.query_job statement
|
145
94
|
|
146
|
-
|
147
|
-
auth_client = _get_auth_client(p12_key, key_pass, email)
|
148
|
-
|
149
|
-
return _bq_load(filename,
|
150
|
-
project_id,
|
151
|
-
dataset_id,
|
152
|
-
table_id,
|
153
|
-
auth_client,
|
154
|
-
api_client,
|
155
|
-
schema,
|
156
|
-
options,
|
157
|
-
polling_interval)
|
158
|
-
end
|
95
|
+
job.wait_until_done!
|
159
96
|
|
97
|
+
if job.failed?
|
98
|
+
raise BigQueryException, "statement execution failed: #{job.errors}"
|
99
|
+
else
|
100
|
+
return job.statistics
|
101
|
+
end
|
102
|
+
end
|
160
103
|
end
|
161
104
|
end
|
162
105
|
end
|
data/lib/patriot_gcp/version.rb
CHANGED
metadata
CHANGED
@@ -1,46 +1,61 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patriot-gcp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hitoshi Tsuda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: google-cloud-bigquery
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ~>
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1.3'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
27
|
-
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: patriot-workflow-scheduler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.8.7
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.8.7
|
41
|
+
description: plugins for Patriot Workflow Scheduler, which deal with GCP such as BigQuery.
|
28
42
|
email:
|
29
43
|
- tsuda_hitoshi@cyberagent.co.jp
|
30
44
|
executables: []
|
31
45
|
extensions: []
|
32
46
|
extra_rdoc_files: []
|
33
47
|
files:
|
34
|
-
-
|
35
|
-
- lib/patriot_gcp.rb
|
36
|
-
- lib/patriot_gcp/command.rb
|
48
|
+
- lib/patriot_gcp/command/bq.rb
|
37
49
|
- lib/patriot_gcp/command/load_to_bigquery.rb
|
38
|
-
- lib/patriot_gcp/
|
50
|
+
- lib/patriot_gcp/command.rb
|
39
51
|
- lib/patriot_gcp/ext/bigquery.rb
|
52
|
+
- lib/patriot_gcp/ext.rb
|
40
53
|
- lib/patriot_gcp/version.rb
|
54
|
+
- lib/patriot_gcp.rb
|
55
|
+
- init.rb
|
41
56
|
homepage: https://github.com/CyberAgent/patriot-workflow-scheduler/tree/master/plugins/patriot-gcp
|
42
57
|
licenses:
|
43
|
-
- Apache
|
58
|
+
- Apache-2.0
|
44
59
|
metadata: {}
|
45
60
|
post_install_message:
|
46
61
|
rdoc_options: []
|
@@ -48,19 +63,18 @@ require_paths:
|
|
48
63
|
- lib
|
49
64
|
required_ruby_version: !ruby/object:Gem::Requirement
|
50
65
|
requirements:
|
51
|
-
- -
|
66
|
+
- - '>='
|
52
67
|
- !ruby/object:Gem::Version
|
53
68
|
version: '0'
|
54
69
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
70
|
requirements:
|
56
|
-
- -
|
71
|
+
- - '>='
|
57
72
|
- !ruby/object:Gem::Version
|
58
73
|
version: '0'
|
59
74
|
requirements: []
|
60
75
|
rubyforge_project: patriot-gcp
|
61
|
-
rubygems_version: 2.
|
76
|
+
rubygems_version: 2.0.14.1
|
62
77
|
signing_key:
|
63
78
|
specification_version: 4
|
64
79
|
summary: GCP plugin for Patriot Workflow Scheduler
|
65
80
|
test_files: []
|
66
|
-
has_rdoc:
|