patriot-gcp 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- OWU1ZjcyZTZkYjJlNmE2ZjVlNjU1MDVlOTNmOWRlOTAxOTQyZDU5Nw==
5
- data.tar.gz: !binary |-
6
- Njk2ZDkwZWJmY2IyNjM0ZGNmNzk5YTQ3MWQ3ZWIyOGM4NTMwNWI3OQ==
2
+ SHA1:
3
+ metadata.gz: 46d7bbfe5645c62e3ee60694ff6571d069159545
4
+ data.tar.gz: 9da277d13eb9daa09173c082adf58fc0722500bc
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- ZGQxM2E5ZDY2MjgwNGVlZTdlNmEzNmI0NTUyOGNlOTIyNWYwNGFmYmVmZDVk
10
- MzU5ODNkZjhhNWE5OTMxZmNhZmZlZWQwMjljZjM4NjhkZWNmYTYyZWFhNmMw
11
- MjdmNjliZGY1MmM4ZWIyN2JhZGM4MTdjMmZjZWI0NGQ2YWFlOWI=
12
- data.tar.gz: !binary |-
13
- NzRlYzRmYmFjMDU3ODIxNmQzOGRjOGRkMDc0OTVkZWE3ZDg2OWU3YjM5M2M2
14
- Zjg2OTdlZTdlMTVhOTlmOTdkMzhiNDdkZDMyMmMyNzRkZDMwYjRmMjE0OTA3
15
- YjYwMDA2ZmE5ZjY5ZjcxOTdhOTYyMDYwNDk5NWRhYTg3YzQyY2Y=
6
+ metadata.gz: 84952419fe93476b17e6b46e1957cb1d04461cf5749e4b4aa25d8612f2f2799c4279c63bb21da9845ae37ee537d5e0ce6a380e58524ea1f530de0b535c221a07
7
+ data.tar.gz: dd7f6a1b3e08c27b1108b25afe1530a5b3c244531e5e11c1f67d0f490ec10263e4f86dabfa45423174a6bfef0d98d7a4d9d27d2b72a579d99dd969044e3e9690
@@ -1 +1,2 @@
1
1
  require 'patriot_gcp/command/load_to_bigquery'
2
+ require 'patriot_gcp/command/bq'
@@ -0,0 +1,47 @@
1
+ module PatriotGCP
2
+ module Command
3
+ class BQCommand < Patriot::Command::Base
4
+ declare_command_name :bq
5
+ include PatriotGCP::Ext::BigQuery
6
+
7
+ command_attr :inifile, :project_id, :statement, :name_suffix
8
+ validate_existence :inifile, :project_id, :statement, :name_suffix
9
+
10
+ class BigQueryException < Exception; end
11
+ class GoogleCloudPlatformException < Exception; end
12
+
13
+ def job_id
14
+ "#{command_name}_#{@project_id}_#{@name_suffix}"
15
+ end
16
+
17
+ # @see Patriot::Command::Base#configure
18
+ def configure
19
+ if @name_suffix == _date_
20
+ raise ArgumentError, 'To set _date_ only is not allowed here to avoid job name duplication.'
21
+ end
22
+ @statement = eval_attr(@statement)
23
+ self
24
+ end
25
+
26
+ def execute
27
+ @logger.info "start bq"
28
+
29
+ ini = IniFile.load(@inifile)
30
+ if ini.nil?
31
+ raise Exception, "inifile not found"
32
+ end
33
+
34
+ bigquery_keyfile = ini["gcp"]["bigquery_keyfile"]
35
+
36
+ stat_info = bq(
37
+ bigquery_keyfile,
38
+ @project_id,
39
+ @statement
40
+ )
41
+
42
+ @logger.info "statement execution succeeded: #{stat_info}"
43
+ @logger.info "end bq"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -28,9 +28,7 @@ module PatriotGCP
28
28
  raise Exception, "inifile not found"
29
29
  end
30
30
 
31
- service_account = ini["gcp"]["service_account"]
32
- private_key = ini["gcp"]["private_key"]
33
- key_pass = ini["gcp"]["key_pass"]
31
+ bigquery_keyfile = ini["gcp"]["bigquery_keyfile"]
34
32
 
35
33
  unless File.exist?(@input_file)
36
34
  raise Exception, "The given file doesn't exist."
@@ -41,15 +39,9 @@ module PatriotGCP
41
39
  return
42
40
  end
43
41
 
44
- if service_account.nil? or private_key.nil?
45
- raise GoogleCloudPlatformException, "configuration for GCP is not enough."
46
- end
47
-
48
42
  @logger.info "start uploading"
49
43
  stat_info = bq_load(@input_file,
50
- private_key,
51
- key_pass,
52
- service_account,
44
+ bigquery_keyfile,
53
45
  @project_id,
54
46
  @dataset,
55
47
  @table,
@@ -1,4 +1,4 @@
1
- require 'google/api_client'
1
+ require 'google/cloud/bigquery'
2
2
  require 'patriot_gcp/version'
3
3
 
4
4
 
@@ -12,151 +12,94 @@ module PatriotGCP
12
12
 
13
13
  class BigQueryException < Exception; end
14
14
 
15
- def _get_auth_client(p12_key, key_pass, email)
16
- key = Google::APIClient::KeyUtils.load_from_pkcs12(p12_key, key_pass)
17
- auth_client = Signet::OAuth2::Client.new(
18
- :token_credential_uri => 'https://accounts.google.com/o/oauth2/token',
19
- :audience => 'https://accounts.google.com/o/oauth2/token',
20
- :scope => 'https://www.googleapis.com/auth/bigquery',
21
- :issuer => email,
22
- :signing_key => key)
23
- auth_client.fetch_access_token!
24
- return auth_client
25
- end
26
-
27
-
28
- def _get_api_client()
29
- Google::APIClient.new(
30
- :application_name => VERSION::PROJECT_NAME,
31
- :application_version => VERSION::VERSION)
32
- end
33
-
15
+ def bq_load(filename,
16
+ bigquery_keyfile,
17
+ project_id,
18
+ dataset_id,
19
+ table_id,
20
+ schema,
21
+ options=nil,
22
+ polling_interval=nil)
34
23
 
35
- def _make_body(project_id, dataset_id, table_id, schema, options)
36
- body = {
37
- 'configuration' => {
38
- 'load' => {
39
- 'schema' => schema,
40
- 'destinationTable' => {
41
- 'projectId' => project_id,
42
- 'datasetId' => dataset_id,
43
- 'tableId' => table_id
44
- }
45
- }
46
- }
47
- }
48
- if options
49
- options.each{|key, value|
50
- body['configuration']['load'][key] = value
51
- }
52
- end
24
+ options ||= {}
25
+ polling_interval ||= 60
53
26
 
54
- return body
55
- end
27
+ ENV['BIGQUERY_KEYFILE'] = bigquery_keyfile
56
28
 
29
+ bigquery = Google::Cloud::Bigquery.new(
30
+ project: project_id,
31
+ retries: 3,
32
+ timeout: polling_interval * 60
33
+ )
57
34
 
58
- def _poll(bq_client,
59
- api_client,
60
- auth_client,
61
- project_id,
62
- job_id,
63
- polling_interval)
64
-
65
- polling_interval.times{
66
- response = JSON.parse(api_client.execute(
67
- :api_method => bq_client.jobs.get,
68
- :parameters => {
69
- 'jobId' => job_id,
70
- 'projectId' => project_id
71
- },
72
- :headers => {'Content-Type' => 'application/json; charset=UTF-8'},
73
- :authorization => auth_client
74
- ).response.body)
75
- state = response["status"]["state"]
76
-
77
- if state == 'DONE'
78
- if response["status"]["errors"]
79
- raise BigQueryException, "upload failed: #{response['status']['errors']}"
80
- else
81
- return response["statistics"]
35
+ # exclude partition string
36
+ # table_name$YYYYMMDD -> table_name
37
+ original_table_id = table_id.split('$')[0]
38
+
39
+ dataset = bigquery.dataset dataset_id
40
+ table = dataset.table original_table_id
41
+
42
+ if table.nil?
43
+ # TODO:
44
+ # schemaとoptionがメソッドやその引数で指定されるようになっており、
45
+ # 大幅な仕様変更となっているが、旧ライブラリ同様の設定を読み込めるよう
46
+ # 議論されている。
47
+ # https://github.com/GoogleCloudPlatform/google-cloud-ruby/issues/1919
48
+ #
49
+ # こちらが対応された場合は下記ソースを変更する。
50
+ dataset.create_table original_table_id do |updater|
51
+ updater.schema do |scm|
52
+ schema['fields'].each do |row|
53
+ name = row['name']
54
+ type = row['type'].downcase.to_sym
55
+ mode = row['mode'].downcase.to_sym if row['mode']
56
+
57
+ scm.method(type).call(name, mode: mode)
58
+ end
82
59
  end
60
+ # 取り込み時間分割テーブルに設定
61
+ updater.time_partitioning_type = "DAY"
83
62
  end
63
+ end
84
64
 
85
- sleep 60
86
- }
87
-
88
- raise BigQueryException,"registered job didn't finish within: #{polling_interval} mins. please check if it will finish later on. jobId: #{job_id}"
89
- end
90
-
91
-
92
- def _bq_load(filename,
93
- project_id,
94
- dataset_id,
95
- table_id,
96
- auth_client,
97
- api_client,
98
- schema,
99
- options,
100
- polling_interval)
101
-
102
- bq_client = api_client.discovered_api('bigquery', 'v2')
103
- body = _make_body(project_id, dataset_id, table_id, schema, options)
104
- media = Google::APIClient::UploadIO.new(filename, "application/octet-stream")
105
-
106
- result = api_client.execute(
107
- :api_method => bq_client.jobs.insert,
108
- :parameters => {
109
- 'projectId' => project_id,
110
- 'uploadType' => 'multipart'
111
- },
112
- :body_object => body,
113
- :authorization => auth_client,
114
- :media => media
65
+ job = dataset.load_job(
66
+ table_id,
67
+ filename,
68
+ format: options['format'] || nil,
69
+ quote: options['quote'] || nil,
70
+ skip_leading: options['skipLeadingRows'] || nil,
71
+ write: options['writeDisposition'] || nil,
72
+ delimiter: options['fieldDelimiter'] || nil,
73
+ null_marker: options['nullMarker'] || nil,
115
74
  )
116
75
 
117
- begin
118
- job_id = JSON.parse(result.response.body)['jobReference']['jobId']
119
- rescue
120
- raise BigQueryException, "failed to register job: #{result.response.body}"
121
- end
76
+ job.wait_until_done!
122
77
 
123
- return _poll(bq_client,
124
- api_client,
125
- auth_client,
126
- project_id,
127
- job_id,
128
- polling_interval)
78
+ if job.failed?
79
+ raise BigQueryException, "upload failed: #{job.errors}"
80
+ else
81
+ return job.statistics
82
+ end
129
83
  end
130
84
 
85
+ def bq(bigquery_keyfile, project_id, statement)
86
+ ENV['BIGQUERY_KEYFILE'] = bigquery_keyfile
131
87
 
132
- def bq_load(filename,
133
- p12_key,
134
- key_pass,
135
- email,
136
- project_id,
137
- dataset_id,
138
- table_id,
139
- schema,
140
- options=nil,
141
- polling_interval=nil)
88
+ bigquery = Google::Cloud::Bigquery.new(
89
+ project: project_id,
90
+ retries: 3
91
+ )
142
92
 
143
- options ||= {}
144
- polling_interval ||= 60
93
+ job = bigquery.query_job statement
145
94
 
146
- api_client = _get_api_client()
147
- auth_client = _get_auth_client(p12_key, key_pass, email)
148
-
149
- return _bq_load(filename,
150
- project_id,
151
- dataset_id,
152
- table_id,
153
- auth_client,
154
- api_client,
155
- schema,
156
- options,
157
- polling_interval)
158
- end
95
+ job.wait_until_done!
159
96
 
97
+ if job.failed?
98
+ raise BigQueryException, "statement execution failed: #{job.errors}"
99
+ else
100
+ return job.statistics
101
+ end
102
+ end
160
103
  end
161
104
  end
162
105
  end
@@ -1,4 +1,4 @@
1
1
  class VERSION
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  PROJECT_NAME = "patriot-gcp"
4
4
  end
metadata CHANGED
@@ -1,46 +1,61 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patriot-gcp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hitoshi Tsuda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-09 00:00:00.000000000 Z
11
+ date: 2018-06-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: patriot-workflow-scheduler
14
+ name: google-cloud-bigquery
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ~>
18
18
  - !ruby/object:Gem::Version
19
- version: '0.7'
19
+ version: '1.3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ~>
25
25
  - !ruby/object:Gem::Version
26
- version: '0.7'
27
- description: plugins for Patriot Worlflow Scheduler, which deal with GCP such as BigQuery.
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: patriot-workflow-scheduler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.8.7
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.7
41
+ description: plugins for Patriot Workflow Scheduler, which deal with GCP such as BigQuery.
28
42
  email:
29
43
  - tsuda_hitoshi@cyberagent.co.jp
30
44
  executables: []
31
45
  extensions: []
32
46
  extra_rdoc_files: []
33
47
  files:
34
- - init.rb
35
- - lib/patriot_gcp.rb
36
- - lib/patriot_gcp/command.rb
48
+ - lib/patriot_gcp/command/bq.rb
37
49
  - lib/patriot_gcp/command/load_to_bigquery.rb
38
- - lib/patriot_gcp/ext.rb
50
+ - lib/patriot_gcp/command.rb
39
51
  - lib/patriot_gcp/ext/bigquery.rb
52
+ - lib/patriot_gcp/ext.rb
40
53
  - lib/patriot_gcp/version.rb
54
+ - lib/patriot_gcp.rb
55
+ - init.rb
41
56
  homepage: https://github.com/CyberAgent/patriot-workflow-scheduler/tree/master/plugins/patriot-gcp
42
57
  licenses:
43
- - Apache License, Version 2.0
58
+ - Apache-2.0
44
59
  metadata: {}
45
60
  post_install_message:
46
61
  rdoc_options: []
@@ -48,19 +63,18 @@ require_paths:
48
63
  - lib
49
64
  required_ruby_version: !ruby/object:Gem::Requirement
50
65
  requirements:
51
- - - ! '>='
66
+ - - '>='
52
67
  - !ruby/object:Gem::Version
53
68
  version: '0'
54
69
  required_rubygems_version: !ruby/object:Gem::Requirement
55
70
  requirements:
56
- - - ! '>='
71
+ - - '>='
57
72
  - !ruby/object:Gem::Version
58
73
  version: '0'
59
74
  requirements: []
60
75
  rubyforge_project: patriot-gcp
61
- rubygems_version: 2.4.7
76
+ rubygems_version: 2.0.14.1
62
77
  signing_key:
63
78
  specification_version: 4
64
79
  summary: GCP plugin for Patriot Workflow Scheduler
65
80
  test_files: []
66
- has_rdoc: