patriot-gcp 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- OWU1ZjcyZTZkYjJlNmE2ZjVlNjU1MDVlOTNmOWRlOTAxOTQyZDU5Nw==
5
- data.tar.gz: !binary |-
6
- Njk2ZDkwZWJmY2IyNjM0ZGNmNzk5YTQ3MWQ3ZWIyOGM4NTMwNWI3OQ==
2
+ SHA1:
3
+ metadata.gz: 46d7bbfe5645c62e3ee60694ff6571d069159545
4
+ data.tar.gz: 9da277d13eb9daa09173c082adf58fc0722500bc
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- ZGQxM2E5ZDY2MjgwNGVlZTdlNmEzNmI0NTUyOGNlOTIyNWYwNGFmYmVmZDVk
10
- MzU5ODNkZjhhNWE5OTMxZmNhZmZlZWQwMjljZjM4NjhkZWNmYTYyZWFhNmMw
11
- MjdmNjliZGY1MmM4ZWIyN2JhZGM4MTdjMmZjZWI0NGQ2YWFlOWI=
12
- data.tar.gz: !binary |-
13
- NzRlYzRmYmFjMDU3ODIxNmQzOGRjOGRkMDc0OTVkZWE3ZDg2OWU3YjM5M2M2
14
- Zjg2OTdlZTdlMTVhOTlmOTdkMzhiNDdkZDMyMmMyNzRkZDMwYjRmMjE0OTA3
15
- YjYwMDA2ZmE5ZjY5ZjcxOTdhOTYyMDYwNDk5NWRhYTg3YzQyY2Y=
6
+ metadata.gz: 84952419fe93476b17e6b46e1957cb1d04461cf5749e4b4aa25d8612f2f2799c4279c63bb21da9845ae37ee537d5e0ce6a380e58524ea1f530de0b535c221a07
7
+ data.tar.gz: dd7f6a1b3e08c27b1108b25afe1530a5b3c244531e5e11c1f67d0f490ec10263e4f86dabfa45423174a6bfef0d98d7a4d9d27d2b72a579d99dd969044e3e9690
@@ -1 +1,2 @@
1
1
  require 'patriot_gcp/command/load_to_bigquery'
2
+ require 'patriot_gcp/command/bq'
@@ -0,0 +1,47 @@
1
+ module PatriotGCP
2
+ module Command
3
+ class BQCommand < Patriot::Command::Base
4
+ declare_command_name :bq
5
+ include PatriotGCP::Ext::BigQuery
6
+
7
+ command_attr :inifile, :project_id, :statement, :name_suffix
8
+ validate_existence :inifile, :project_id, :statement, :name_suffix
9
+
10
+ class BigQueryException < Exception; end
11
+ class GoogleCloudPlatformException < Exception; end
12
+
13
+ def job_id
14
+ "#{command_name}_#{@project_id}_#{@name_suffix}"
15
+ end
16
+
17
+ # @see Patriot::Command::Base#configure
18
+ def configure
19
+ if @name_suffix == _date_
20
+ raise ArgumentError, 'To set _date_ only is not allowed here to avoid job name duplication.'
21
+ end
22
+ @statement = eval_attr(@statement)
23
+ self
24
+ end
25
+
26
+ def execute
27
+ @logger.info "start bq"
28
+
29
+ ini = IniFile.load(@inifile)
30
+ if ini.nil?
31
+ raise Exception, "inifile not found"
32
+ end
33
+
34
+ bigquery_keyfile = ini["gcp"]["bigquery_keyfile"]
35
+
36
+ stat_info = bq(
37
+ bigquery_keyfile,
38
+ @project_id,
39
+ @statement
40
+ )
41
+
42
+ @logger.info "statement execution succeeded: #{stat_info}"
43
+ @logger.info "end bq"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -28,9 +28,7 @@ module PatriotGCP
28
28
  raise Exception, "inifile not found"
29
29
  end
30
30
 
31
- service_account = ini["gcp"]["service_account"]
32
- private_key = ini["gcp"]["private_key"]
33
- key_pass = ini["gcp"]["key_pass"]
31
+ bigquery_keyfile = ini["gcp"]["bigquery_keyfile"]
34
32
 
35
33
  unless File.exist?(@input_file)
36
34
  raise Exception, "The given file doesn't exist."
@@ -41,15 +39,9 @@ module PatriotGCP
41
39
  return
42
40
  end
43
41
 
44
- if service_account.nil? or private_key.nil?
45
- raise GoogleCloudPlatformException, "configuration for GCP is not enough."
46
- end
47
-
48
42
  @logger.info "start uploading"
49
43
  stat_info = bq_load(@input_file,
50
- private_key,
51
- key_pass,
52
- service_account,
44
+ bigquery_keyfile,
53
45
  @project_id,
54
46
  @dataset,
55
47
  @table,
@@ -1,4 +1,4 @@
1
- require 'google/api_client'
1
+ require 'google/cloud/bigquery'
2
2
  require 'patriot_gcp/version'
3
3
 
4
4
 
@@ -12,151 +12,94 @@ module PatriotGCP
12
12
 
13
13
  class BigQueryException < Exception; end
14
14
 
15
- def _get_auth_client(p12_key, key_pass, email)
16
- key = Google::APIClient::KeyUtils.load_from_pkcs12(p12_key, key_pass)
17
- auth_client = Signet::OAuth2::Client.new(
18
- :token_credential_uri => 'https://accounts.google.com/o/oauth2/token',
19
- :audience => 'https://accounts.google.com/o/oauth2/token',
20
- :scope => 'https://www.googleapis.com/auth/bigquery',
21
- :issuer => email,
22
- :signing_key => key)
23
- auth_client.fetch_access_token!
24
- return auth_client
25
- end
26
-
27
-
28
- def _get_api_client()
29
- Google::APIClient.new(
30
- :application_name => VERSION::PROJECT_NAME,
31
- :application_version => VERSION::VERSION)
32
- end
33
-
15
+ def bq_load(filename,
16
+ bigquery_keyfile,
17
+ project_id,
18
+ dataset_id,
19
+ table_id,
20
+ schema,
21
+ options=nil,
22
+ polling_interval=nil)
34
23
 
35
- def _make_body(project_id, dataset_id, table_id, schema, options)
36
- body = {
37
- 'configuration' => {
38
- 'load' => {
39
- 'schema' => schema,
40
- 'destinationTable' => {
41
- 'projectId' => project_id,
42
- 'datasetId' => dataset_id,
43
- 'tableId' => table_id
44
- }
45
- }
46
- }
47
- }
48
- if options
49
- options.each{|key, value|
50
- body['configuration']['load'][key] = value
51
- }
52
- end
24
+ options ||= {}
25
+ polling_interval ||= 60
53
26
 
54
- return body
55
- end
27
+ ENV['BIGQUERY_KEYFILE'] = bigquery_keyfile
56
28
 
29
+ bigquery = Google::Cloud::Bigquery.new(
30
+ project: project_id,
31
+ retries: 3,
32
+ timeout: polling_interval * 60
33
+ )
57
34
 
58
- def _poll(bq_client,
59
- api_client,
60
- auth_client,
61
- project_id,
62
- job_id,
63
- polling_interval)
64
-
65
- polling_interval.times{
66
- response = JSON.parse(api_client.execute(
67
- :api_method => bq_client.jobs.get,
68
- :parameters => {
69
- 'jobId' => job_id,
70
- 'projectId' => project_id
71
- },
72
- :headers => {'Content-Type' => 'application/json; charset=UTF-8'},
73
- :authorization => auth_client
74
- ).response.body)
75
- state = response["status"]["state"]
76
-
77
- if state == 'DONE'
78
- if response["status"]["errors"]
79
- raise BigQueryException, "upload failed: #{response['status']['errors']}"
80
- else
81
- return response["statistics"]
35
+ # exclude partition string
36
+ # table_name$YYYYMMDD -> table_name
37
+ original_table_id = table_id.split('$')[0]
38
+
39
+ dataset = bigquery.dataset dataset_id
40
+ table = dataset.table original_table_id
41
+
42
+ if table.nil?
43
+ # TODO:
44
+ # schemaとoptionがメソッドやその引数で指定されるようになっており、
45
+ # 大幅な仕様変更となっているが、旧ライブラリ同様の設定を読み込めるよう
46
+ # 議論されている。
47
+ # https://github.com/GoogleCloudPlatform/google-cloud-ruby/issues/1919
48
+ #
49
+ # こちらが対応された場合は下記ソースを変更する。
50
+ dataset.create_table original_table_id do |updater|
51
+ updater.schema do |scm|
52
+ schema['fields'].each do |row|
53
+ name = row['name']
54
+ type = row['type'].downcase.to_sym
55
+ mode = row['mode'].downcase.to_sym if row['mode']
56
+
57
+ scm.method(type).call(name, mode: mode)
58
+ end
82
59
  end
60
+ # 取り込み時間分割テーブルに設定
61
+ updater.time_partitioning_type = "DAY"
83
62
  end
63
+ end
84
64
 
85
- sleep 60
86
- }
87
-
88
- raise BigQueryException,"registered job didn't finish within: #{polling_interval} mins. please check if it will finish later on. jobId: #{job_id}"
89
- end
90
-
91
-
92
- def _bq_load(filename,
93
- project_id,
94
- dataset_id,
95
- table_id,
96
- auth_client,
97
- api_client,
98
- schema,
99
- options,
100
- polling_interval)
101
-
102
- bq_client = api_client.discovered_api('bigquery', 'v2')
103
- body = _make_body(project_id, dataset_id, table_id, schema, options)
104
- media = Google::APIClient::UploadIO.new(filename, "application/octet-stream")
105
-
106
- result = api_client.execute(
107
- :api_method => bq_client.jobs.insert,
108
- :parameters => {
109
- 'projectId' => project_id,
110
- 'uploadType' => 'multipart'
111
- },
112
- :body_object => body,
113
- :authorization => auth_client,
114
- :media => media
65
+ job = dataset.load_job(
66
+ table_id,
67
+ filename,
68
+ format: options['format'] || nil,
69
+ quote: options['quote'] || nil,
70
+ skip_leading: options['skipLeadingRows'] || nil,
71
+ write: options['writeDisposition'] || nil,
72
+ delimiter: options['fieldDelimiter'] || nil,
73
+ null_marker: options['nullMarker'] || nil,
115
74
  )
116
75
 
117
- begin
118
- job_id = JSON.parse(result.response.body)['jobReference']['jobId']
119
- rescue
120
- raise BigQueryException, "failed to register job: #{result.response.body}"
121
- end
76
+ job.wait_until_done!
122
77
 
123
- return _poll(bq_client,
124
- api_client,
125
- auth_client,
126
- project_id,
127
- job_id,
128
- polling_interval)
78
+ if job.failed?
79
+ raise BigQueryException, "upload failed: #{job.errors}"
80
+ else
81
+ return job.statistics
82
+ end
129
83
  end
130
84
 
85
+ def bq(bigquery_keyfile, project_id, statement)
86
+ ENV['BIGQUERY_KEYFILE'] = bigquery_keyfile
131
87
 
132
- def bq_load(filename,
133
- p12_key,
134
- key_pass,
135
- email,
136
- project_id,
137
- dataset_id,
138
- table_id,
139
- schema,
140
- options=nil,
141
- polling_interval=nil)
88
+ bigquery = Google::Cloud::Bigquery.new(
89
+ project: project_id,
90
+ retries: 3
91
+ )
142
92
 
143
- options ||= {}
144
- polling_interval ||= 60
93
+ job = bigquery.query_job statement
145
94
 
146
- api_client = _get_api_client()
147
- auth_client = _get_auth_client(p12_key, key_pass, email)
148
-
149
- return _bq_load(filename,
150
- project_id,
151
- dataset_id,
152
- table_id,
153
- auth_client,
154
- api_client,
155
- schema,
156
- options,
157
- polling_interval)
158
- end
95
+ job.wait_until_done!
159
96
 
97
+ if job.failed?
98
+ raise BigQueryException, "statement execution failed: #{job.errors}"
99
+ else
100
+ return job.statistics
101
+ end
102
+ end
160
103
  end
161
104
  end
162
105
  end
@@ -1,4 +1,4 @@
1
1
  class VERSION
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  PROJECT_NAME = "patriot-gcp"
4
4
  end
metadata CHANGED
@@ -1,46 +1,61 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patriot-gcp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hitoshi Tsuda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-09 00:00:00.000000000 Z
11
+ date: 2018-06-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: patriot-workflow-scheduler
14
+ name: google-cloud-bigquery
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ~>
18
18
  - !ruby/object:Gem::Version
19
- version: '0.7'
19
+ version: '1.3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ~>
25
25
  - !ruby/object:Gem::Version
26
- version: '0.7'
27
- description: plugins for Patriot Worlflow Scheduler, which deal with GCP such as BigQuery.
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: patriot-workflow-scheduler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.8.7
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.7
41
+ description: plugins for Patriot Workflow Scheduler, which deal with GCP such as BigQuery.
28
42
  email:
29
43
  - tsuda_hitoshi@cyberagent.co.jp
30
44
  executables: []
31
45
  extensions: []
32
46
  extra_rdoc_files: []
33
47
  files:
34
- - init.rb
35
- - lib/patriot_gcp.rb
36
- - lib/patriot_gcp/command.rb
48
+ - lib/patriot_gcp/command/bq.rb
37
49
  - lib/patriot_gcp/command/load_to_bigquery.rb
38
- - lib/patriot_gcp/ext.rb
50
+ - lib/patriot_gcp/command.rb
39
51
  - lib/patriot_gcp/ext/bigquery.rb
52
+ - lib/patriot_gcp/ext.rb
40
53
  - lib/patriot_gcp/version.rb
54
+ - lib/patriot_gcp.rb
55
+ - init.rb
41
56
  homepage: https://github.com/CyberAgent/patriot-workflow-scheduler/tree/master/plugins/patriot-gcp
42
57
  licenses:
43
- - Apache License, Version 2.0
58
+ - Apache-2.0
44
59
  metadata: {}
45
60
  post_install_message:
46
61
  rdoc_options: []
@@ -48,19 +63,18 @@ require_paths:
48
63
  - lib
49
64
  required_ruby_version: !ruby/object:Gem::Requirement
50
65
  requirements:
51
- - - ! '>='
66
+ - - '>='
52
67
  - !ruby/object:Gem::Version
53
68
  version: '0'
54
69
  required_rubygems_version: !ruby/object:Gem::Requirement
55
70
  requirements:
56
- - - ! '>='
71
+ - - '>='
57
72
  - !ruby/object:Gem::Version
58
73
  version: '0'
59
74
  requirements: []
60
75
  rubyforge_project: patriot-gcp
61
- rubygems_version: 2.4.7
76
+ rubygems_version: 2.0.14.1
62
77
  signing_key:
63
78
  specification_version: 4
64
79
  summary: GCP plugin for Patriot Workflow Scheduler
65
80
  test_files: []
66
- has_rdoc: