patriot-gcp 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/lib/patriot_gcp/command.rb +1 -0
- data/lib/patriot_gcp/command/bq.rb +47 -0
- data/lib/patriot_gcp/command/load_to_bigquery.rb +2 -10
- data/lib/patriot_gcp/ext/bigquery.rb +73 -130
- data/lib/patriot_gcp/version.rb +1 -1
- metadata +29 -15
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
Njk2ZDkwZWJmY2IyNjM0ZGNmNzk5YTQ3MWQ3ZWIyOGM4NTMwNWI3OQ==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 46d7bbfe5645c62e3ee60694ff6571d069159545
|
4
|
+
data.tar.gz: 9da277d13eb9daa09173c082adf58fc0722500bc
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
MzU5ODNkZjhhNWE5OTMxZmNhZmZlZWQwMjljZjM4NjhkZWNmYTYyZWFhNmMw
|
11
|
-
MjdmNjliZGY1MmM4ZWIyN2JhZGM4MTdjMmZjZWI0NGQ2YWFlOWI=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
NzRlYzRmYmFjMDU3ODIxNmQzOGRjOGRkMDc0OTVkZWE3ZDg2OWU3YjM5M2M2
|
14
|
-
Zjg2OTdlZTdlMTVhOTlmOTdkMzhiNDdkZDMyMmMyNzRkZDMwYjRmMjE0OTA3
|
15
|
-
YjYwMDA2ZmE5ZjY5ZjcxOTdhOTYyMDYwNDk5NWRhYTg3YzQyY2Y=
|
6
|
+
metadata.gz: 84952419fe93476b17e6b46e1957cb1d04461cf5749e4b4aa25d8612f2f2799c4279c63bb21da9845ae37ee537d5e0ce6a380e58524ea1f530de0b535c221a07
|
7
|
+
data.tar.gz: dd7f6a1b3e08c27b1108b25afe1530a5b3c244531e5e11c1f67d0f490ec10263e4f86dabfa45423174a6bfef0d98d7a4d9d27d2b72a579d99dd969044e3e9690
|
data/lib/patriot_gcp/command.rb
CHANGED
@@ -0,0 +1,47 @@
|
|
1
|
+
module PatriotGCP
|
2
|
+
module Command
|
3
|
+
class BQCommand < Patriot::Command::Base
|
4
|
+
declare_command_name :bq
|
5
|
+
include PatriotGCP::Ext::BigQuery
|
6
|
+
|
7
|
+
command_attr :inifile, :project_id, :statement, :name_suffix
|
8
|
+
validate_existence :inifile, :project_id, :statement, :name_suffix
|
9
|
+
|
10
|
+
class BigQueryException < Exception; end
|
11
|
+
class GoogleCloudPlatformException < Exception; end
|
12
|
+
|
13
|
+
def job_id
|
14
|
+
"#{command_name}_#{@project_id}_#{@name_suffix}"
|
15
|
+
end
|
16
|
+
|
17
|
+
# @see Patriot::Command::Base#configure
|
18
|
+
def configure
|
19
|
+
if @name_suffix == _date_
|
20
|
+
raise ArgumentError, 'To set _date_ only is not allowed here to avoid job name duplication.'
|
21
|
+
end
|
22
|
+
@statement = eval_attr(@statement)
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
def execute
|
27
|
+
@logger.info "start bq"
|
28
|
+
|
29
|
+
ini = IniFile.load(@inifile)
|
30
|
+
if ini.nil?
|
31
|
+
raise Exception, "inifile not found"
|
32
|
+
end
|
33
|
+
|
34
|
+
bigquery_keyfile = ini["gcp"]["bigquery_keyfile"]
|
35
|
+
|
36
|
+
stat_info = bq(
|
37
|
+
bigquery_keyfile,
|
38
|
+
@project_id,
|
39
|
+
@statement
|
40
|
+
)
|
41
|
+
|
42
|
+
@logger.info "statement execution succeeded: #{stat_info}"
|
43
|
+
@logger.info "end bq"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -28,9 +28,7 @@ module PatriotGCP
|
|
28
28
|
raise Exception, "inifile not found"
|
29
29
|
end
|
30
30
|
|
31
|
-
|
32
|
-
private_key = ini["gcp"]["private_key"]
|
33
|
-
key_pass = ini["gcp"]["key_pass"]
|
31
|
+
bigquery_keyfile = ini["gcp"]["bigquery_keyfile"]
|
34
32
|
|
35
33
|
unless File.exist?(@input_file)
|
36
34
|
raise Exception, "The given file doesn't exist."
|
@@ -41,15 +39,9 @@ module PatriotGCP
|
|
41
39
|
return
|
42
40
|
end
|
43
41
|
|
44
|
-
if service_account.nil? or private_key.nil?
|
45
|
-
raise GoogleCloudPlatformException, "configuration for GCP is not enough."
|
46
|
-
end
|
47
|
-
|
48
42
|
@logger.info "start uploading"
|
49
43
|
stat_info = bq_load(@input_file,
|
50
|
-
|
51
|
-
key_pass,
|
52
|
-
service_account,
|
44
|
+
bigquery_keyfile,
|
53
45
|
@project_id,
|
54
46
|
@dataset,
|
55
47
|
@table,
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require 'google/
|
1
|
+
require 'google/cloud/bigquery'
|
2
2
|
require 'patriot_gcp/version'
|
3
3
|
|
4
4
|
|
@@ -12,151 +12,94 @@ module PatriotGCP
|
|
12
12
|
|
13
13
|
class BigQueryException < Exception; end
|
14
14
|
|
15
|
-
def
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
auth_client.fetch_access_token!
|
24
|
-
return auth_client
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
def _get_api_client()
|
29
|
-
Google::APIClient.new(
|
30
|
-
:application_name => VERSION::PROJECT_NAME,
|
31
|
-
:application_version => VERSION::VERSION)
|
32
|
-
end
|
33
|
-
|
15
|
+
def bq_load(filename,
|
16
|
+
bigquery_keyfile,
|
17
|
+
project_id,
|
18
|
+
dataset_id,
|
19
|
+
table_id,
|
20
|
+
schema,
|
21
|
+
options=nil,
|
22
|
+
polling_interval=nil)
|
34
23
|
|
35
|
-
|
36
|
-
|
37
|
-
'configuration' => {
|
38
|
-
'load' => {
|
39
|
-
'schema' => schema,
|
40
|
-
'destinationTable' => {
|
41
|
-
'projectId' => project_id,
|
42
|
-
'datasetId' => dataset_id,
|
43
|
-
'tableId' => table_id
|
44
|
-
}
|
45
|
-
}
|
46
|
-
}
|
47
|
-
}
|
48
|
-
if options
|
49
|
-
options.each{|key, value|
|
50
|
-
body['configuration']['load'][key] = value
|
51
|
-
}
|
52
|
-
end
|
24
|
+
options ||= {}
|
25
|
+
polling_interval ||= 60
|
53
26
|
|
54
|
-
|
55
|
-
end
|
27
|
+
ENV['BIGQUERY_KEYFILE'] = bigquery_keyfile
|
56
28
|
|
29
|
+
bigquery = Google::Cloud::Bigquery.new(
|
30
|
+
project: project_id,
|
31
|
+
retries: 3,
|
32
|
+
timeout: polling_interval * 60
|
33
|
+
)
|
57
34
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
35
|
+
# exclude partition string
|
36
|
+
# table_name$YYYYMMDD -> table_name
|
37
|
+
original_table_id = table_id.split('$')[0]
|
38
|
+
|
39
|
+
dataset = bigquery.dataset dataset_id
|
40
|
+
table = dataset.table original_table_id
|
41
|
+
|
42
|
+
if table.nil?
|
43
|
+
# TODO:
|
44
|
+
# schemaとoptionがメソッドやその引数で指定されるようになっており、
|
45
|
+
# 大幅な仕様変更となっているが、旧ライブラリ同様の設定を読み込めるよう
|
46
|
+
# 議論されている。
|
47
|
+
# https://github.com/GoogleCloudPlatform/google-cloud-ruby/issues/1919
|
48
|
+
#
|
49
|
+
# こちらが対応された場合は下記ソースを変更する。
|
50
|
+
dataset.create_table original_table_id do |updater|
|
51
|
+
updater.schema do |scm|
|
52
|
+
schema['fields'].each do |row|
|
53
|
+
name = row['name']
|
54
|
+
type = row['type'].downcase.to_sym
|
55
|
+
mode = row['mode'].downcase.to_sym if row['mode']
|
56
|
+
|
57
|
+
scm.method(type).call(name, mode: mode)
|
58
|
+
end
|
82
59
|
end
|
60
|
+
# 取り込み時間分割テーブルに設定
|
61
|
+
updater.time_partitioning_type = "DAY"
|
83
62
|
end
|
63
|
+
end
|
84
64
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
dataset_id,
|
95
|
-
table_id,
|
96
|
-
auth_client,
|
97
|
-
api_client,
|
98
|
-
schema,
|
99
|
-
options,
|
100
|
-
polling_interval)
|
101
|
-
|
102
|
-
bq_client = api_client.discovered_api('bigquery', 'v2')
|
103
|
-
body = _make_body(project_id, dataset_id, table_id, schema, options)
|
104
|
-
media = Google::APIClient::UploadIO.new(filename, "application/octet-stream")
|
105
|
-
|
106
|
-
result = api_client.execute(
|
107
|
-
:api_method => bq_client.jobs.insert,
|
108
|
-
:parameters => {
|
109
|
-
'projectId' => project_id,
|
110
|
-
'uploadType' => 'multipart'
|
111
|
-
},
|
112
|
-
:body_object => body,
|
113
|
-
:authorization => auth_client,
|
114
|
-
:media => media
|
65
|
+
job = dataset.load_job(
|
66
|
+
table_id,
|
67
|
+
filename,
|
68
|
+
format: options['format'] || nil,
|
69
|
+
quote: options['quote'] || nil,
|
70
|
+
skip_leading: options['skipLeadingRows'] || nil,
|
71
|
+
write: options['writeDisposition'] || nil,
|
72
|
+
delimiter: options['fieldDelimiter'] || nil,
|
73
|
+
null_marker: options['nullMarker'] || nil,
|
115
74
|
)
|
116
75
|
|
117
|
-
|
118
|
-
job_id = JSON.parse(result.response.body)['jobReference']['jobId']
|
119
|
-
rescue
|
120
|
-
raise BigQueryException, "failed to register job: #{result.response.body}"
|
121
|
-
end
|
76
|
+
job.wait_until_done!
|
122
77
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
polling_interval)
|
78
|
+
if job.failed?
|
79
|
+
raise BigQueryException, "upload failed: #{job.errors}"
|
80
|
+
else
|
81
|
+
return job.statistics
|
82
|
+
end
|
129
83
|
end
|
130
84
|
|
85
|
+
def bq(bigquery_keyfile, project_id, statement)
|
86
|
+
ENV['BIGQUERY_KEYFILE'] = bigquery_keyfile
|
131
87
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
project_id,
|
137
|
-
dataset_id,
|
138
|
-
table_id,
|
139
|
-
schema,
|
140
|
-
options=nil,
|
141
|
-
polling_interval=nil)
|
88
|
+
bigquery = Google::Cloud::Bigquery.new(
|
89
|
+
project: project_id,
|
90
|
+
retries: 3
|
91
|
+
)
|
142
92
|
|
143
|
-
|
144
|
-
polling_interval ||= 60
|
93
|
+
job = bigquery.query_job statement
|
145
94
|
|
146
|
-
|
147
|
-
auth_client = _get_auth_client(p12_key, key_pass, email)
|
148
|
-
|
149
|
-
return _bq_load(filename,
|
150
|
-
project_id,
|
151
|
-
dataset_id,
|
152
|
-
table_id,
|
153
|
-
auth_client,
|
154
|
-
api_client,
|
155
|
-
schema,
|
156
|
-
options,
|
157
|
-
polling_interval)
|
158
|
-
end
|
95
|
+
job.wait_until_done!
|
159
96
|
|
97
|
+
if job.failed?
|
98
|
+
raise BigQueryException, "statement execution failed: #{job.errors}"
|
99
|
+
else
|
100
|
+
return job.statistics
|
101
|
+
end
|
102
|
+
end
|
160
103
|
end
|
161
104
|
end
|
162
105
|
end
|
data/lib/patriot_gcp/version.rb
CHANGED
metadata
CHANGED
@@ -1,46 +1,61 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patriot-gcp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hitoshi Tsuda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: google-cloud-bigquery
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ~>
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1.3'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
27
|
-
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: patriot-workflow-scheduler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.8.7
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.8.7
|
41
|
+
description: plugins for Patriot Workflow Scheduler, which deal with GCP such as BigQuery.
|
28
42
|
email:
|
29
43
|
- tsuda_hitoshi@cyberagent.co.jp
|
30
44
|
executables: []
|
31
45
|
extensions: []
|
32
46
|
extra_rdoc_files: []
|
33
47
|
files:
|
34
|
-
-
|
35
|
-
- lib/patriot_gcp.rb
|
36
|
-
- lib/patriot_gcp/command.rb
|
48
|
+
- lib/patriot_gcp/command/bq.rb
|
37
49
|
- lib/patriot_gcp/command/load_to_bigquery.rb
|
38
|
-
- lib/patriot_gcp/
|
50
|
+
- lib/patriot_gcp/command.rb
|
39
51
|
- lib/patriot_gcp/ext/bigquery.rb
|
52
|
+
- lib/patriot_gcp/ext.rb
|
40
53
|
- lib/patriot_gcp/version.rb
|
54
|
+
- lib/patriot_gcp.rb
|
55
|
+
- init.rb
|
41
56
|
homepage: https://github.com/CyberAgent/patriot-workflow-scheduler/tree/master/plugins/patriot-gcp
|
42
57
|
licenses:
|
43
|
-
- Apache
|
58
|
+
- Apache-2.0
|
44
59
|
metadata: {}
|
45
60
|
post_install_message:
|
46
61
|
rdoc_options: []
|
@@ -48,19 +63,18 @@ require_paths:
|
|
48
63
|
- lib
|
49
64
|
required_ruby_version: !ruby/object:Gem::Requirement
|
50
65
|
requirements:
|
51
|
-
- -
|
66
|
+
- - '>='
|
52
67
|
- !ruby/object:Gem::Version
|
53
68
|
version: '0'
|
54
69
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
70
|
requirements:
|
56
|
-
- -
|
71
|
+
- - '>='
|
57
72
|
- !ruby/object:Gem::Version
|
58
73
|
version: '0'
|
59
74
|
requirements: []
|
60
75
|
rubyforge_project: patriot-gcp
|
61
|
-
rubygems_version: 2.
|
76
|
+
rubygems_version: 2.0.14.1
|
62
77
|
signing_key:
|
63
78
|
specification_version: 4
|
64
79
|
summary: GCP plugin for Patriot Workflow Scheduler
|
65
80
|
test_files: []
|
66
|
-
has_rdoc:
|